1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Empty register class.
 170 reg_class no_reg();
 171 
 172 // Class for all pointer/long registers
 173 reg_class all_reg(RAX, RAX_H,
 174                   RDX, RDX_H,
 175                   RBP, RBP_H,
 176                   RDI, RDI_H,
 177                   RSI, RSI_H,
 178                   RCX, RCX_H,
 179                   RBX, RBX_H,
 180                   RSP, RSP_H,
 181                   R8,  R8_H,
 182                   R9,  R9_H,
 183                   R10, R10_H,
 184                   R11, R11_H,
 185                   R12, R12_H,
 186                   R13, R13_H,
 187                   R14, R14_H,
 188                   R15, R15_H);
 189 
 190 // Class for all int registers
 191 reg_class all_int_reg(RAX
 192                       RDX,
 193                       RBP,
 194                       RDI,
 195                       RSI,
 196                       RCX,
 197                       RBX,
 198                       R8,
 199                       R9,
 200                       R10,
 201                       R11,
 202                       R12,
 203                       R13,
 204                       R14);
 205 
 206 // Class for all pointer registers
 207 reg_class any_reg %{
 208   return _ANY_REG_mask;
 209 %}
 210 
 211 // Class for all pointer registers (excluding RSP)
 212 reg_class ptr_reg %{
 213   return _PTR_REG_mask;
 214 %}
 215 
 216 // Class for all pointer registers (excluding RSP and RBP)
 217 reg_class ptr_reg_no_rbp %{
 218   return _PTR_REG_NO_RBP_mask;
 219 %}
 220 
 221 // Class for all pointer registers (excluding RAX and RSP)
 222 reg_class ptr_no_rax_reg %{
 223   return _PTR_NO_RAX_REG_mask;
 224 %}
 225 
 226 // Class for all pointer registers (excluding RAX, RBX, and RSP)
 227 reg_class ptr_no_rax_rbx_reg %{
 228   return _PTR_NO_RAX_RBX_REG_mask;
 229 %}
 230 
 231 // Class for all long registers (excluding RSP)
 232 reg_class long_reg %{
 233   return _LONG_REG_mask;
 234 %}
 235 
 236 // Class for all long registers (excluding RAX, RDX and RSP)
 237 reg_class long_no_rax_rdx_reg %{
 238   return _LONG_NO_RAX_RDX_REG_mask;
 239 %}
 240 
 241 // Class for all long registers (excluding RCX and RSP)
 242 reg_class long_no_rcx_reg %{
 243   return _LONG_NO_RCX_REG_mask;
 244 %}
 245 
 246 // Class for all int registers (excluding RSP)
 247 reg_class int_reg %{
 248   return _INT_REG_mask;
 249 %}
 250 
 251 // Class for all int registers (excluding RAX, RDX, and RSP)
 252 reg_class int_no_rax_rdx_reg %{
 253   return _INT_NO_RAX_RDX_REG_mask;
 254 %}
 255 
 256 // Class for all int registers (excluding RCX and RSP)
 257 reg_class int_no_rcx_reg %{
 258   return _INT_NO_RCX_REG_mask;
 259 %}
 260 
 261 // Singleton class for RAX pointer register
 262 reg_class ptr_rax_reg(RAX, RAX_H);
 263 
 264 // Singleton class for RBX pointer register
 265 reg_class ptr_rbx_reg(RBX, RBX_H);
 266 
 267 // Singleton class for RSI pointer register
 268 reg_class ptr_rsi_reg(RSI, RSI_H);
 269 
 270 // Singleton class for RDI pointer register
 271 reg_class ptr_rdi_reg(RDI, RDI_H);
 272 
 273 // Singleton class for stack pointer
 274 reg_class ptr_rsp_reg(RSP, RSP_H);
 275 
 276 // Singleton class for TLS pointer
 277 reg_class ptr_r15_reg(R15, R15_H);
 278 
 279 // Singleton class for RAX long register
 280 reg_class long_rax_reg(RAX, RAX_H);
 281 
 282 // Singleton class for RCX long register
 283 reg_class long_rcx_reg(RCX, RCX_H);
 284 
 285 // Singleton class for RDX long register
 286 reg_class long_rdx_reg(RDX, RDX_H);
 287 
 288 // Singleton class for RAX int register
 289 reg_class int_rax_reg(RAX);
 290 
 291 // Singleton class for RBX int register
 292 reg_class int_rbx_reg(RBX);
 293 
 294 // Singleton class for RCX int register
 295 reg_class int_rcx_reg(RCX);
 296 
 297 // Singleton class for RCX int register
 298 reg_class int_rdx_reg(RDX);
 299 
 300 // Singleton class for RCX int register
 301 reg_class int_rdi_reg(RDI);
 302 
 303 // Singleton class for instruction pointer
 304 // reg_class ip_reg(RIP);
 305 
 306 %}
 307 
 308 //----------SOURCE BLOCK-------------------------------------------------------
 309 // This is a block of C++ code which provides values, functions, and
 310 // definitions necessary in the rest of the architecture description
 311 source_hpp %{
 312 
 313 extern RegMask _ANY_REG_mask;
 314 extern RegMask _PTR_REG_mask;
 315 extern RegMask _PTR_REG_NO_RBP_mask;
 316 extern RegMask _PTR_NO_RAX_REG_mask;
 317 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 318 extern RegMask _LONG_REG_mask;
 319 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 320 extern RegMask _LONG_NO_RCX_REG_mask;
 321 extern RegMask _INT_REG_mask;
 322 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 323 extern RegMask _INT_NO_RCX_REG_mask;
 324 
 325 extern RegMask _STACK_OR_PTR_REG_mask;
 326 extern RegMask _STACK_OR_LONG_REG_mask;
 327 extern RegMask _STACK_OR_INT_REG_mask;
 328 
 329 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 330 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 331 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 332 
 333 %}
 334 
 335 source %{
 336 #define   RELOC_IMM64    Assembler::imm_operand
 337 #define   RELOC_DISP32   Assembler::disp32_operand
 338 
 339 #define __ _masm.
 340 
 341 RegMask _ANY_REG_mask;
 342 RegMask _PTR_REG_mask;
 343 RegMask _PTR_REG_NO_RBP_mask;
 344 RegMask _PTR_NO_RAX_REG_mask;
 345 RegMask _PTR_NO_RAX_RBX_REG_mask;
 346 RegMask _LONG_REG_mask;
 347 RegMask _LONG_NO_RAX_RDX_REG_mask;
 348 RegMask _LONG_NO_RCX_REG_mask;
 349 RegMask _INT_REG_mask;
 350 RegMask _INT_NO_RAX_RDX_REG_mask;
 351 RegMask _INT_NO_RCX_REG_mask;
 352 RegMask _STACK_OR_PTR_REG_mask;
 353 RegMask _STACK_OR_LONG_REG_mask;
 354 RegMask _STACK_OR_INT_REG_mask;
 355 
 356 static bool need_r12_heapbase() {
 357   return UseCompressedOops || UseCompressedClassPointers;
 358 }
 359 
 360 void reg_mask_init() {
 361   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 362   // We derive a number of subsets from it.
 363   _ANY_REG_mask = _ALL_REG_mask;
 364 
 365   if (PreserveFramePointer) {
 366     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 367     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 368   }
 369   if (need_r12_heapbase()) {
 370     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 371     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 372   }
 373 
 374   _PTR_REG_mask = _ANY_REG_mask;
 375   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 376   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 377   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 378   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 379 
 380   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
 381   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
 382 
 383   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
 384   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 385   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 386 
 387   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
 388   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 389   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 390 
 391   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
 392   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 393   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 394 
 395   _LONG_REG_mask = _PTR_REG_mask;
 396   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
 397   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
 398 
 399   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
 400   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 401   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 402   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 403   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 404 
 405   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
 406   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 407   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 408 
 409   _INT_REG_mask = _ALL_INT_REG_mask;
 410   if (PreserveFramePointer) {
 411     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 412   }
 413   if (need_r12_heapbase()) {
 414     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 415   }
 416 
 417   _STACK_OR_INT_REG_mask = _INT_REG_mask;
 418   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
 419 
 420   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
 421   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 422   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 423 
 424   _INT_NO_RCX_REG_mask = _INT_REG_mask;
 425   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 426 }
 427 
 428 static bool generate_vzeroupper(Compile* C) {
 429   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 430 }
 431 
 432 static int clear_avx_size() {
 433   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 434 }
 435 
 436 // !!!!! Special hack to get all types of calls to specify the byte offset
 437 //       from the start of the call to the point where the return address
 438 //       will point.
 439 int MachCallStaticJavaNode::ret_addr_offset()
 440 {
 441   int offset = 5; // 5 bytes from start of call to where return address points
 442   offset += clear_avx_size();
 443   return offset;
 444 }
 445 
 446 int MachCallDynamicJavaNode::ret_addr_offset()
 447 {
 448   int offset = 15; // 15 bytes from start of call to where return address points
 449   offset += clear_avx_size();
 450   return offset;
 451 }
 452 
 453 int MachCallRuntimeNode::ret_addr_offset() {
 454   int offset = 13; // movq r10,#addr; callq (r10)
 455   offset += clear_avx_size();
 456   return offset;
 457 }
 458 
 459 // Indicate if the safepoint node needs the polling page as an input,
 460 // it does if the polling page is more than disp32 away.
 461 bool SafePointNode::needs_polling_address_input()
 462 {
 463   return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 464 }
 465 
 466 //
 467 // Compute padding required for nodes which need alignment
 468 //
 469 
 470 // The address of the call instruction needs to be 4-byte aligned to
 471 // ensure that it does not span a cache line so that it can be patched.
 472 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 473 {
 474   current_offset += clear_avx_size(); // skip vzeroupper
 475   current_offset += 1; // skip call opcode byte
 476   return align_up(current_offset, alignment_required()) - current_offset;
 477 }
 478 
 479 // The address of the call instruction needs to be 4-byte aligned to
 480 // ensure that it does not span a cache line so that it can be patched.
 481 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 482 {
 483   current_offset += clear_avx_size(); // skip vzeroupper
 484   current_offset += 11; // skip movq instruction + call opcode byte
 485   return align_up(current_offset, alignment_required()) - current_offset;
 486 }
 487 
 488 // EMIT_RM()
 489 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 490   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 491   cbuf.insts()->emit_int8(c);
 492 }
 493 
 494 // EMIT_CC()
 495 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 496   unsigned char c = (unsigned char) (f1 | f2);
 497   cbuf.insts()->emit_int8(c);
 498 }
 499 
 500 // EMIT_OPCODE()
 501 void emit_opcode(CodeBuffer &cbuf, int code) {
 502   cbuf.insts()->emit_int8((unsigned char) code);
 503 }
 504 
 505 // EMIT_OPCODE() w/ relocation information
 506 void emit_opcode(CodeBuffer &cbuf,
 507                  int code, relocInfo::relocType reloc, int offset, int format)
 508 {
 509   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 510   emit_opcode(cbuf, code);
 511 }
 512 
 513 // EMIT_D8()
 514 void emit_d8(CodeBuffer &cbuf, int d8) {
 515   cbuf.insts()->emit_int8((unsigned char) d8);
 516 }
 517 
 518 // EMIT_D16()
 519 void emit_d16(CodeBuffer &cbuf, int d16) {
 520   cbuf.insts()->emit_int16(d16);
 521 }
 522 
 523 // EMIT_D32()
 524 void emit_d32(CodeBuffer &cbuf, int d32) {
 525   cbuf.insts()->emit_int32(d32);
 526 }
 527 
 528 // EMIT_D64()
 529 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 530   cbuf.insts()->emit_int64(d64);
 531 }
 532 
 533 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 534 void emit_d32_reloc(CodeBuffer& cbuf,
 535                     int d32,
 536                     relocInfo::relocType reloc,
 537                     int format)
 538 {
 539   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 540   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 541   cbuf.insts()->emit_int32(d32);
 542 }
 543 
 544 // emit 32 bit value and construct relocation entry from RelocationHolder
 545 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 546 #ifdef ASSERT
 547   if (rspec.reloc()->type() == relocInfo::oop_type &&
 548       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 549     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
 550     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
 551   }
 552 #endif
 553   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 554   cbuf.insts()->emit_int32(d32);
 555 }
 556 
 557 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 558   address next_ip = cbuf.insts_end() + 4;
 559   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 560                  external_word_Relocation::spec(addr),
 561                  RELOC_DISP32);
 562 }
 563 
 564 
 565 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 566 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 567   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 568   cbuf.insts()->emit_int64(d64);
 569 }
 570 
 571 // emit 64 bit value and construct relocation entry from RelocationHolder
 572 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 573 #ifdef ASSERT
 574   if (rspec.reloc()->type() == relocInfo::oop_type &&
 575       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 576     assert(Universe::heap()->is_in((address)d64), "should be real oop");
 577     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
 578   }
 579 #endif
 580   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 581   cbuf.insts()->emit_int64(d64);
 582 }
 583 
 584 // Access stack slot for load or store
 585 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 586 {
 587   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 588   if (-0x80 <= disp && disp < 0x80) {
 589     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 590     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 591     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 592   } else {
 593     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 594     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 595     emit_d32(cbuf, disp);     // Displacement // R/M byte
 596   }
 597 }
 598 
 599    // rRegI ereg, memory mem) %{    // emit_reg_mem
 600 void encode_RegMem(CodeBuffer &cbuf,
 601                    int reg,
 602                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 603 {
 604   assert(disp_reloc == relocInfo::none, "cannot have disp");
 605   int regenc = reg & 7;
 606   int baseenc = base & 7;
 607   int indexenc = index & 7;
 608 
 609   // There is no index & no scale, use form without SIB byte
 610   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 611     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 612     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 613       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 614     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 615       // If 8-bit displacement, mode 0x1
 616       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 617       emit_d8(cbuf, disp);
 618     } else {
 619       // If 32-bit displacement
 620       if (base == -1) { // Special flag for absolute address
 621         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 622         if (disp_reloc != relocInfo::none) {
 623           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 624         } else {
 625           emit_d32(cbuf, disp);
 626         }
 627       } else {
 628         // Normal base + offset
 629         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 630         if (disp_reloc != relocInfo::none) {
 631           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 632         } else {
 633           emit_d32(cbuf, disp);
 634         }
 635       }
 636     }
 637   } else {
 638     // Else, encode with the SIB byte
 639     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 640     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 641       // If no displacement
 642       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 643       emit_rm(cbuf, scale, indexenc, baseenc);
 644     } else {
 645       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 646         // If 8-bit displacement, mode 0x1
 647         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 648         emit_rm(cbuf, scale, indexenc, baseenc);
 649         emit_d8(cbuf, disp);
 650       } else {
 651         // If 32-bit displacement
 652         if (base == 0x04 ) {
 653           emit_rm(cbuf, 0x2, regenc, 0x4);
 654           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 655         } else {
 656           emit_rm(cbuf, 0x2, regenc, 0x4);
 657           emit_rm(cbuf, scale, indexenc, baseenc); // *
 658         }
 659         if (disp_reloc != relocInfo::none) {
 660           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 661         } else {
 662           emit_d32(cbuf, disp);
 663         }
 664       }
 665     }
 666   }
 667 }
 668 
 669 // This could be in MacroAssembler but it's fairly C2 specific
 670 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 671   Label exit;
 672   __ jccb(Assembler::noParity, exit);
 673   __ pushf();
 674   //
 675   // comiss/ucomiss instructions set ZF,PF,CF flags and
 676   // zero OF,AF,SF for NaN values.
 677   // Fixup flags by zeroing ZF,PF so that compare of NaN
 678   // values returns 'less than' result (CF is set).
 679   // Leave the rest of flags unchanged.
 680   //
 681   //    7 6 5 4 3 2 1 0
 682   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 683   //    0 0 1 0 1 0 1 1   (0x2B)
 684   //
 685   __ andq(Address(rsp, 0), 0xffffff2b);
 686   __ popf();
 687   __ bind(exit);
 688 }
 689 
 690 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 691   Label done;
 692   __ movl(dst, -1);
 693   __ jcc(Assembler::parity, done);
 694   __ jcc(Assembler::below, done);
 695   __ setb(Assembler::notEqual, dst);
 696   __ movzbl(dst, dst);
 697   __ bind(done);
 698 }
 699 
 700 // Math.min()    # Math.max()
 701 // --------------------------
 702 // ucomis[s/d]   #
 703 // ja   -> b     # a
 704 // jp   -> NaN   # NaN
 705 // jb   -> a     # b
 706 // je            #
 707 // |-jz -> a | b # a & b
 708 // |    -> a     #
 709 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
 710                      XMMRegister a, XMMRegister b,
 711                      XMMRegister xmmt, Register rt,
 712                      bool min, bool single) {
 713 
 714   Label nan, zero, below, above, done;
 715 
 716   if (single)
 717     __ ucomiss(a, b);
 718   else
 719     __ ucomisd(a, b);
 720 
 721   if (dst->encoding() != (min ? b : a)->encoding())
 722     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 723   else
 724     __ jccb(Assembler::above, done);
 725 
 726   __ jccb(Assembler::parity, nan);  // PF=1
 727   __ jccb(Assembler::below, below); // CF=1
 728 
 729   // equal
 730   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 731   if (single) {
 732     __ ucomiss(a, xmmt);
 733     __ jccb(Assembler::equal, zero);
 734 
 735     __ movflt(dst, a);
 736     __ jmp(done);
 737   }
 738   else {
 739     __ ucomisd(a, xmmt);
 740     __ jccb(Assembler::equal, zero);
 741 
 742     __ movdbl(dst, a);
 743     __ jmp(done);
 744   }
 745 
 746   __ bind(zero);
 747   if (min)
 748     __ vpor(dst, a, b, Assembler::AVX_128bit);
 749   else
 750     __ vpand(dst, a, b, Assembler::AVX_128bit);
 751 
 752   __ jmp(done);
 753 
 754   __ bind(above);
 755   if (single)
 756     __ movflt(dst, min ? b : a);
 757   else
 758     __ movdbl(dst, min ? b : a);
 759 
 760   __ jmp(done);
 761 
 762   __ bind(nan);
 763   if (single) {
 764     __ movl(rt, 0x7fc00000); // Float.NaN
 765     __ movdl(dst, rt);
 766   }
 767   else {
 768     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 769     __ movdq(dst, rt);
 770   }
 771   __ jmp(done);
 772 
 773   __ bind(below);
 774   if (single)
 775     __ movflt(dst, min ? a : b);
 776   else
 777     __ movdbl(dst, min ? a : b);
 778 
 779   __ bind(done);
 780 }
 781 
 782 //=============================================================================
 783 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 784 
 785 int Compile::ConstantTable::calculate_table_base_offset() const {
 786   return 0;  // absolute addressing, no offset
 787 }
 788 
 789 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 790 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 791   ShouldNotReachHere();
 792 }
 793 
 794 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 795   // Empty encoding
 796 }
 797 
 798 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 799   return 0;
 800 }
 801 
 802 #ifndef PRODUCT
 803 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 804   st->print("# MachConstantBaseNode (empty encoding)");
 805 }
 806 #endif
 807 
 808 
 809 //=============================================================================
 810 #ifndef PRODUCT
 811 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 812   Compile* C = ra_->C;
 813 
 814   int framesize = C->frame_size_in_bytes();
 815   int bangsize = C->bang_size_in_bytes();
 816   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 817   // Remove wordSize for return addr which is already pushed.
 818   framesize -= wordSize;
 819 
 820   if (C->need_stack_bang(bangsize)) {
 821     framesize -= wordSize;
 822     st->print("# stack bang (%d bytes)", bangsize);
 823     st->print("\n\t");
 824     st->print("pushq   rbp\t# Save rbp");
 825     if (PreserveFramePointer) {
 826         st->print("\n\t");
 827         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 828     }
 829     if (framesize) {
 830       st->print("\n\t");
 831       st->print("subq    rsp, #%d\t# Create frame",framesize);
 832     }
 833   } else {
 834     st->print("subq    rsp, #%d\t# Create frame",framesize);
 835     st->print("\n\t");
 836     framesize -= wordSize;
 837     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 838     if (PreserveFramePointer) {
 839       st->print("\n\t");
 840       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 841       if (framesize > 0) {
 842         st->print("\n\t");
 843         st->print("addq    rbp, #%d", framesize);
 844       }
 845     }
 846   }
 847 
 848   if (VerifyStackAtCalls) {
 849     st->print("\n\t");
 850     framesize -= wordSize;
 851     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 852 #ifdef ASSERT
 853     st->print("\n\t");
 854     st->print("# stack alignment check");
 855 #endif
 856   }
 857   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 858     st->print("\n\t");
 859     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
 860     st->print("\n\t");
 861     st->print("je      fast_entry\t");
 862     st->print("\n\t");
 863     st->print("call    #nmethod_entry_barrier_stub\t");
 864     st->print("\n\tfast_entry:");
 865   }
 866   st->cr();
 867 }
 868 #endif
 869 
 870 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 871   Compile* C = ra_->C;
 872   MacroAssembler _masm(&cbuf);
 873 
 874   if (C->clinit_barrier_on_entry()) {
 875     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 876     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 877 
 878     Label L_skip_barrier;
 879     Register klass = rscratch1;
 880 
 881     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 882     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
 883 
 884     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 885 
 886     __ bind(L_skip_barrier);
 887   }
 888 
 889   __ verified_entry(C);
 890   __ bind(*_verified_entry);
 891 
 892   C->set_frame_complete(cbuf.insts_size());
 893 
 894   if (C->has_mach_constant_base_node()) {
 895     // NOTE: We set the table base offset here because users might be
 896     // emitted before MachConstantBaseNode.
 897     Compile::ConstantTable& constant_table = C->constant_table();
 898     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 899   }
 900 }
 901 
 902 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 903 {
 904   return MachNode::size(ra_); // too many variables; just compute it
 905                               // the hard way
 906 }
 907 
 908 int MachPrologNode::reloc() const
 909 {
 910   return 0; // a large enough number
 911 }
 912 
 913 //=============================================================================
 914 #ifndef PRODUCT
 915 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 916 {
 917   Compile* C = ra_->C;
 918   if (generate_vzeroupper(C)) {
 919     st->print("vzeroupper");
 920     st->cr(); st->print("\t");
 921   }
 922 
 923   int framesize = C->frame_size_in_bytes();
 924   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 925   // Remove word for return adr already pushed
 926   // and RBP
 927   framesize -= 2*wordSize;
 928 
 929   if (framesize) {
 930     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 931     st->print("\t");
 932   }
 933 
 934   st->print_cr("popq    rbp");
 935   if (do_polling() && C->is_method_compilation()) {
 936     st->print("\t");
 937     if (SafepointMechanism::uses_thread_local_poll()) {
 938       st->print_cr("movq    rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
 939                    "testl   rax, [rscratch1]\t"
 940                    "# Safepoint: poll for GC");
 941     } else if (Assembler::is_polling_page_far()) {
 942       st->print_cr("movq    rscratch1, #polling_page_address\n\t"
 943                    "testl   rax, [rscratch1]\t"
 944                    "# Safepoint: poll for GC");
 945     } else {
 946       st->print_cr("testl   rax, [rip + #offset_to_poll_page]\t"
 947                    "# Safepoint: poll for GC");
 948     }
 949   }
 950 }
 951 #endif
 952 
 953 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 954 {
 955   Compile* C = ra_->C;
 956   MacroAssembler _masm(&cbuf);
 957 
 958   if (generate_vzeroupper(C)) {
 959     // Clear upper bits of YMM registers when current compiled code uses
 960     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 961     __ vzeroupper();
 962   }
 963 
 964   __ restore_stack(C);
 965 
 966 
 967   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 968     __ reserved_stack_check();
 969   }
 970 
 971   if (do_polling() && C->is_method_compilation()) {
 972     MacroAssembler _masm(&cbuf);
 973     if (SafepointMechanism::uses_thread_local_poll()) {
 974       __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
 975       __ relocate(relocInfo::poll_return_type);
 976       __ testl(rax, Address(rscratch1, 0));
 977     } else {
 978       AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 979       if (Assembler::is_polling_page_far()) {
 980         __ lea(rscratch1, polling_page);
 981         __ relocate(relocInfo::poll_return_type);
 982         __ testl(rax, Address(rscratch1, 0));
 983       } else {
 984         __ testl(rax, polling_page);
 985       }
 986     }
 987   }
 988 }
 989 
 990 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 991 {
 992   return MachNode::size(ra_); // too many variables; just compute it
 993                               // the hard way
 994 }
 995 
 996 int MachEpilogNode::reloc() const
 997 {
 998   return 2; // a large enough number
 999 }
1000 
1001 const Pipeline* MachEpilogNode::pipeline() const
1002 {
1003   return MachNode::pipeline_class();
1004 }
1005 
1006 int MachEpilogNode::safepoint_offset() const
1007 {
1008   return 0;
1009 }
1010 
1011 //=============================================================================
1012 
1013 enum RC {
1014   rc_bad,
1015   rc_int,
1016   rc_float,
1017   rc_stack
1018 };
1019 
1020 static enum RC rc_class(OptoReg::Name reg)
1021 {
1022   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1023 
1024   if (OptoReg::is_stack(reg)) return rc_stack;
1025 
1026   VMReg r = OptoReg::as_VMReg(reg);
1027 
1028   if (r->is_Register()) return rc_int;
1029 
1030   assert(r->is_XMMRegister(), "must be");
1031   return rc_float;
1032 }
1033 
1034 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
1035 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1036                           int src_hi, int dst_hi, uint ireg, outputStream* st);
1037 
1038 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1039                             int stack_offset, int reg, uint ireg, outputStream* st);
1040 
1041 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
1042                                       int dst_offset, uint ireg, outputStream* st) {
1043   if (cbuf) {
1044     MacroAssembler _masm(cbuf);
1045     switch (ireg) {
1046     case Op_VecS:
1047       __ movq(Address(rsp, -8), rax);
1048       __ movl(rax, Address(rsp, src_offset));
1049       __ movl(Address(rsp, dst_offset), rax);
1050       __ movq(rax, Address(rsp, -8));
1051       break;
1052     case Op_VecD:
1053       __ pushq(Address(rsp, src_offset));
1054       __ popq (Address(rsp, dst_offset));
1055       break;
1056     case Op_VecX:
1057       __ pushq(Address(rsp, src_offset));
1058       __ popq (Address(rsp, dst_offset));
1059       __ pushq(Address(rsp, src_offset+8));
1060       __ popq (Address(rsp, dst_offset+8));
1061       break;
1062     case Op_VecY:
1063       __ vmovdqu(Address(rsp, -32), xmm0);
1064       __ vmovdqu(xmm0, Address(rsp, src_offset));
1065       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1066       __ vmovdqu(xmm0, Address(rsp, -32));
1067       break;
1068     case Op_VecZ:
1069       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1070       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1071       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1072       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1073       break;
1074     default:
1075       ShouldNotReachHere();
1076     }
1077 #ifndef PRODUCT
1078   } else {
1079     switch (ireg) {
1080     case Op_VecS:
1081       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1082                 "movl    rax, [rsp + #%d]\n\t"
1083                 "movl    [rsp + #%d], rax\n\t"
1084                 "movq    rax, [rsp - #8]",
1085                 src_offset, dst_offset);
1086       break;
1087     case Op_VecD:
1088       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1089                 "popq    [rsp + #%d]",
1090                 src_offset, dst_offset);
1091       break;
1092      case Op_VecX:
1093       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
1094                 "popq    [rsp + #%d]\n\t"
1095                 "pushq   [rsp + #%d]\n\t"
1096                 "popq    [rsp + #%d]",
1097                 src_offset, dst_offset, src_offset+8, dst_offset+8);
1098       break;
1099     case Op_VecY:
1100       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1101                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1102                 "vmovdqu [rsp + #%d], xmm0\n\t"
1103                 "vmovdqu xmm0, [rsp - #32]",
1104                 src_offset, dst_offset);
1105       break;
1106     case Op_VecZ:
1107       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1108                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1109                 "vmovdqu [rsp + #%d], xmm0\n\t"
1110                 "vmovdqu xmm0, [rsp - #64]",
1111                 src_offset, dst_offset);
1112       break;
1113     default:
1114       ShouldNotReachHere();
1115     }
1116 #endif
1117   }
1118 }
1119 
1120 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1121                                        PhaseRegAlloc* ra_,
1122                                        bool do_size,
1123                                        outputStream* st) const {
1124   assert(cbuf != NULL || st  != NULL, "sanity");
1125   // Get registers to move
1126   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1127   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1128   OptoReg::Name dst_second = ra_->get_reg_second(this);
1129   OptoReg::Name dst_first = ra_->get_reg_first(this);
1130 
1131   enum RC src_second_rc = rc_class(src_second);
1132   enum RC src_first_rc = rc_class(src_first);
1133   enum RC dst_second_rc = rc_class(dst_second);
1134   enum RC dst_first_rc = rc_class(dst_first);
1135 
1136   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1137          "must move at least 1 register" );
1138 
1139   if (src_first == dst_first && src_second == dst_second) {
1140     // Self copy, no move
1141     return 0;
1142   }
1143   if (bottom_type()->isa_vect() != NULL) {
1144     uint ireg = ideal_reg();
1145     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1146     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1147     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1148       // mem -> mem
1149       int src_offset = ra_->reg2offset(src_first);
1150       int dst_offset = ra_->reg2offset(dst_first);
1151       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1152     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1153       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1154     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1155       int stack_offset = ra_->reg2offset(dst_first);
1156       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1157     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1158       int stack_offset = ra_->reg2offset(src_first);
1159       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1160     } else {
1161       ShouldNotReachHere();
1162     }
1163     return 0;
1164   }
1165   if (src_first_rc == rc_stack) {
1166     // mem ->
1167     if (dst_first_rc == rc_stack) {
1168       // mem -> mem
1169       assert(src_second != dst_first, "overlap");
1170       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1171           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1172         // 64-bit
1173         int src_offset = ra_->reg2offset(src_first);
1174         int dst_offset = ra_->reg2offset(dst_first);
1175         if (cbuf) {
1176           MacroAssembler _masm(cbuf);
1177           __ pushq(Address(rsp, src_offset));
1178           __ popq (Address(rsp, dst_offset));
1179 #ifndef PRODUCT
1180         } else {
1181           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1182                     "popq    [rsp + #%d]",
1183                      src_offset, dst_offset);
1184 #endif
1185         }
1186       } else {
1187         // 32-bit
1188         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1189         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1190         // No pushl/popl, so:
1191         int src_offset = ra_->reg2offset(src_first);
1192         int dst_offset = ra_->reg2offset(dst_first);
1193         if (cbuf) {
1194           MacroAssembler _masm(cbuf);
1195           __ movq(Address(rsp, -8), rax);
1196           __ movl(rax, Address(rsp, src_offset));
1197           __ movl(Address(rsp, dst_offset), rax);
1198           __ movq(rax, Address(rsp, -8));
1199 #ifndef PRODUCT
1200         } else {
1201           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1202                     "movl    rax, [rsp + #%d]\n\t"
1203                     "movl    [rsp + #%d], rax\n\t"
1204                     "movq    rax, [rsp - #8]",
1205                      src_offset, dst_offset);
1206 #endif
1207         }
1208       }
1209       return 0;
1210     } else if (dst_first_rc == rc_int) {
1211       // mem -> gpr
1212       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1213           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1214         // 64-bit
1215         int offset = ra_->reg2offset(src_first);
1216         if (cbuf) {
1217           MacroAssembler _masm(cbuf);
1218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1219 #ifndef PRODUCT
1220         } else {
1221           st->print("movq    %s, [rsp + #%d]\t# spill",
1222                      Matcher::regName[dst_first],
1223                      offset);
1224 #endif
1225         }
1226       } else {
1227         // 32-bit
1228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1230         int offset = ra_->reg2offset(src_first);
1231         if (cbuf) {
1232           MacroAssembler _masm(cbuf);
1233           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1234 #ifndef PRODUCT
1235         } else {
1236           st->print("movl    %s, [rsp + #%d]\t# spill",
1237                      Matcher::regName[dst_first],
1238                      offset);
1239 #endif
1240         }
1241       }
1242       return 0;
1243     } else if (dst_first_rc == rc_float) {
1244       // mem-> xmm
1245       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1246           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1247         // 64-bit
1248         int offset = ra_->reg2offset(src_first);
1249         if (cbuf) {
1250           MacroAssembler _masm(cbuf);
1251           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1252 #ifndef PRODUCT
1253         } else {
1254           st->print("%s  %s, [rsp + #%d]\t# spill",
1255                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260       } else {
1261         // 32-bit
1262         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1263         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1264         int offset = ra_->reg2offset(src_first);
1265         if (cbuf) {
1266           MacroAssembler _masm(cbuf);
1267           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1268 #ifndef PRODUCT
1269         } else {
1270           st->print("movss   %s, [rsp + #%d]\t# spill",
1271                      Matcher::regName[dst_first],
1272                      offset);
1273 #endif
1274         }
1275       }
1276       return 0;
1277     }
1278   } else if (src_first_rc == rc_int) {
1279     // gpr ->
1280     if (dst_first_rc == rc_stack) {
1281       // gpr -> mem
1282       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1283           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1284         // 64-bit
1285         int offset = ra_->reg2offset(dst_first);
1286         if (cbuf) {
1287           MacroAssembler _masm(cbuf);
1288           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1289 #ifndef PRODUCT
1290         } else {
1291           st->print("movq    [rsp + #%d], %s\t# spill",
1292                      offset,
1293                      Matcher::regName[src_first]);
1294 #endif
1295         }
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(dst_first);
1301         if (cbuf) {
1302           MacroAssembler _masm(cbuf);
1303           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1304 #ifndef PRODUCT
1305         } else {
1306           st->print("movl    [rsp + #%d], %s\t# spill",
1307                      offset,
1308                      Matcher::regName[src_first]);
1309 #endif
1310         }
1311       }
1312       return 0;
1313     } else if (dst_first_rc == rc_int) {
1314       // gpr -> gpr
1315       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1316           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1317         // 64-bit
1318         if (cbuf) {
1319           MacroAssembler _masm(cbuf);
1320           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1321                   as_Register(Matcher::_regEncode[src_first]));
1322 #ifndef PRODUCT
1323         } else {
1324           st->print("movq    %s, %s\t# spill",
1325                      Matcher::regName[dst_first],
1326                      Matcher::regName[src_first]);
1327 #endif
1328         }
1329         return 0;
1330       } else {
1331         // 32-bit
1332         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1333         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1334         if (cbuf) {
1335           MacroAssembler _masm(cbuf);
1336           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1337                   as_Register(Matcher::_regEncode[src_first]));
1338 #ifndef PRODUCT
1339         } else {
1340           st->print("movl    %s, %s\t# spill",
1341                      Matcher::regName[dst_first],
1342                      Matcher::regName[src_first]);
1343 #endif
1344         }
1345         return 0;
1346       }
1347     } else if (dst_first_rc == rc_float) {
1348       // gpr -> xmm
1349       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1350           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1351         // 64-bit
1352         if (cbuf) {
1353           MacroAssembler _masm(cbuf);
1354           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1355 #ifndef PRODUCT
1356         } else {
1357           st->print("movdq   %s, %s\t# spill",
1358                      Matcher::regName[dst_first],
1359                      Matcher::regName[src_first]);
1360 #endif
1361         }
1362       } else {
1363         // 32-bit
1364         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1365         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1366         if (cbuf) {
1367           MacroAssembler _masm(cbuf);
1368           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1369 #ifndef PRODUCT
1370         } else {
1371           st->print("movdl   %s, %s\t# spill",
1372                      Matcher::regName[dst_first],
1373                      Matcher::regName[src_first]);
1374 #endif
1375         }
1376       }
1377       return 0;
1378     }
1379   } else if (src_first_rc == rc_float) {
1380     // xmm ->
1381     if (dst_first_rc == rc_stack) {
1382       // xmm -> mem
1383       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1384           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1385         // 64-bit
1386         int offset = ra_->reg2offset(dst_first);
1387         if (cbuf) {
1388           MacroAssembler _masm(cbuf);
1389           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1390 #ifndef PRODUCT
1391         } else {
1392           st->print("movsd   [rsp + #%d], %s\t# spill",
1393                      offset,
1394                      Matcher::regName[src_first]);
1395 #endif
1396         }
1397       } else {
1398         // 32-bit
1399         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1400         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1401         int offset = ra_->reg2offset(dst_first);
1402         if (cbuf) {
1403           MacroAssembler _masm(cbuf);
1404           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1405 #ifndef PRODUCT
1406         } else {
1407           st->print("movss   [rsp + #%d], %s\t# spill",
1408                      offset,
1409                      Matcher::regName[src_first]);
1410 #endif
1411         }
1412       }
1413       return 0;
1414     } else if (dst_first_rc == rc_int) {
1415       // xmm -> gpr
1416       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1417           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1418         // 64-bit
1419         if (cbuf) {
1420           MacroAssembler _masm(cbuf);
1421           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1422 #ifndef PRODUCT
1423         } else {
1424           st->print("movdq   %s, %s\t# spill",
1425                      Matcher::regName[dst_first],
1426                      Matcher::regName[src_first]);
1427 #endif
1428         }
1429       } else {
1430         // 32-bit
1431         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1432         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1433         if (cbuf) {
1434           MacroAssembler _masm(cbuf);
1435           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1436 #ifndef PRODUCT
1437         } else {
1438           st->print("movdl   %s, %s\t# spill",
1439                      Matcher::regName[dst_first],
1440                      Matcher::regName[src_first]);
1441 #endif
1442         }
1443       }
1444       return 0;
1445     } else if (dst_first_rc == rc_float) {
1446       // xmm -> xmm
1447       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1448           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1449         // 64-bit
1450         if (cbuf) {
1451           MacroAssembler _masm(cbuf);
1452           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1453 #ifndef PRODUCT
1454         } else {
1455           st->print("%s  %s, %s\t# spill",
1456                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1457                      Matcher::regName[dst_first],
1458                      Matcher::regName[src_first]);
1459 #endif
1460         }
1461       } else {
1462         // 32-bit
1463         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1464         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1465         if (cbuf) {
1466           MacroAssembler _masm(cbuf);
1467           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1468 #ifndef PRODUCT
1469         } else {
1470           st->print("%s  %s, %s\t# spill",
1471                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1472                      Matcher::regName[dst_first],
1473                      Matcher::regName[src_first]);
1474 #endif
1475         }
1476       }
1477       return 0;
1478     }
1479   }
1480 
1481   assert(0," foo ");
1482   Unimplemented();
1483   return 0;
1484 }
1485 
1486 #ifndef PRODUCT
1487 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1488   implementation(NULL, ra_, false, st);
1489 }
1490 #endif
1491 
1492 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1493   implementation(&cbuf, ra_, false, NULL);
1494 }
1495 
1496 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1497   return MachNode::size(ra_);
1498 }
1499 
1500 //=============================================================================
1501 #ifndef PRODUCT
1502 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1503 {
1504   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1505   int reg = ra_->get_reg_first(this);
1506   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1507             Matcher::regName[reg], offset);
1508 }
1509 #endif
1510 
1511 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1512 {
1513   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1514   int reg = ra_->get_encode(this);
1515   if (offset >= 0x80) {
1516     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1517     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1518     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1519     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1520     emit_d32(cbuf, offset);
1521   } else {
1522     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1523     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1524     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1525     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1526     emit_d8(cbuf, offset);
1527   }
1528 }
1529 
1530 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1531 {
1532   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1533   return (offset < 0x80) ? 5 : 8; // REX
1534 }
1535 
1536 //=============================================================================
1537 #ifndef PRODUCT
1538 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1539 {
1540   st->print_cr("MachVEPNode");
1541 }
1542 #endif
1543 
1544 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1545 {
1546   MacroAssembler masm(&cbuf);
1547   if (!_verified) {  
1548     uint insts_size = cbuf.insts_size();
1549     if (UseCompressedClassPointers) {
1550       masm.load_klass(rscratch1, j_rarg0);
1551       masm.cmpptr(rax, rscratch1);
1552     } else {
1553       masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1554     }
1555     masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1556   } else {
1557     // Unpack value type args passed as oop and then jump to
1558     // the verified entry point (skipping the unverified entry).
1559     masm.unpack_value_args(ra_->C, _receiver_only);
1560     masm.jmp(*_verified_entry);
1561   }
1562 }
1563 
1564 uint MachVEPNode::size(PhaseRegAlloc* ra_) const
1565 {
1566   return MachNode::size(ra_); // too many variables; just compute it the hard way
1567 }
1568 
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573   if (UseCompressedClassPointers) {
1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1576     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1577   } else {
1578     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1579                  "# Inline cache check");
1580   }
1581   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1582   st->print_cr("\tnop\t# nops to align entry point");
1583 }
1584 #endif
1585 
1586 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1587 {
1588   MacroAssembler masm(&cbuf);
1589   uint insts_size = cbuf.insts_size();
1590   if (UseCompressedClassPointers) {
1591     masm.load_klass(rscratch1, j_rarg0);
1592     masm.cmpptr(rax, rscratch1);
1593   } else {
1594     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1595   }
1596 
1597   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1598 
1599   /* WARNING these NOPs are critical so that verified entry point is properly
1600      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1601   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1602   if (OptoBreakpoint) {
1603     // Leave space for int3
1604     nops_cnt -= 1;
1605   }
1606   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1607   if (nops_cnt > 0)
1608     masm.nop(nops_cnt);
1609 }
1610 
1611 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1612 {
1613   return MachNode::size(ra_); // too many variables; just compute it
1614                               // the hard way
1615 }
1616 
1617 
1618 //=============================================================================
1619 
1620 int Matcher::regnum_to_fpu_offset(int regnum)
1621 {
1622   return regnum - 32; // The FP registers are in the second chunk
1623 }
1624 
1625 // This is UltraSparc specific, true just means we have fast l2f conversion
1626 const bool Matcher::convL2FSupported(void) {
1627   return true;
1628 }
1629 
1630 // Is this branch offset short enough that a short branch can be used?
1631 //
1632 // NOTE: If the platform does not provide any short branch variants, then
1633 //       this method should return false for offset 0.
1634 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1635   // The passed offset is relative to address of the branch.
1636   // On 86 a branch displacement is calculated relative to address
1637   // of a next instruction.
1638   offset -= br_size;
1639 
1640   // the short version of jmpConUCF2 contains multiple branches,
1641   // making the reach slightly less
1642   if (rule == jmpConUCF2_rule)
1643     return (-126 <= offset && offset <= 125);
1644   return (-128 <= offset && offset <= 127);
1645 }
1646 
1647 const bool Matcher::isSimpleConstant64(jlong value) {
1648   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1649   //return value == (int) value;  // Cf. storeImmL and immL32.
1650 
1651   // Probably always true, even if a temp register is required.
1652   return true;
1653 }
1654 
1655 // The ecx parameter to rep stosq for the ClearArray node is in words.
1656 const bool Matcher::init_array_count_is_in_bytes = false;
1657 
1658 // No additional cost for CMOVL.
1659 const int Matcher::long_cmove_cost() { return 0; }
1660 
1661 // No CMOVF/CMOVD with SSE2
1662 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1663 
1664 // Does the CPU require late expand (see block.cpp for description of late expand)?
1665 const bool Matcher::require_postalloc_expand = false;
1666 
1667 // Do we need to mask the count passed to shift instructions or does
1668 // the cpu only look at the lower 5/6 bits anyway?
1669 const bool Matcher::need_masked_shift_count = false;
1670 
1671 bool Matcher::narrow_oop_use_complex_address() {
1672   assert(UseCompressedOops, "only for compressed oops code");
1673   return (LogMinObjAlignmentInBytes <= 3);
1674 }
1675 
1676 bool Matcher::narrow_klass_use_complex_address() {
1677   assert(UseCompressedClassPointers, "only for compressed klass code");
1678   return (LogKlassAlignmentInBytes <= 3);
1679 }
1680 
1681 bool Matcher::const_oop_prefer_decode() {
1682   // Prefer ConN+DecodeN over ConP.
1683   return true;
1684 }
1685 
1686 bool Matcher::const_klass_prefer_decode() {
1687   // TODO: Either support matching DecodeNKlass (heap-based) in operand
1688   //       or condisider the following:
1689   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1690   //return CompressedKlassPointers::base() == NULL;
1691   return true;
1692 }
1693 
1694 // Is it better to copy float constants, or load them directly from
1695 // memory?  Intel can load a float constant from a direct address,
1696 // requiring no extra registers.  Most RISCs will have to materialize
1697 // an address into a register first, so they would do better to copy
1698 // the constant from stack.
1699 const bool Matcher::rematerialize_float_constants = true; // XXX
1700 
1701 // If CPU can load and store mis-aligned doubles directly then no
1702 // fixup is needed.  Else we split the double into 2 integer pieces
1703 // and move it piece-by-piece.  Only happens when passing doubles into
1704 // C code as the Java calling convention forces doubles to be aligned.
1705 const bool Matcher::misaligned_doubles_ok = true;
1706 
1707 // No-op on amd64
1708 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1709 
1710 // Advertise here if the CPU requires explicit rounding operations to
1711 // implement the UseStrictFP mode.
1712 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1713 
1714 // Are floats conerted to double when stored to stack during deoptimization?
1715 // On x64 it is stored without convertion so we can use normal access.
1716 bool Matcher::float_in_double() { return false; }
1717 
1718 // Do ints take an entire long register or just half?
1719 const bool Matcher::int_in_long = true;
1720 
1721 // Return whether or not this register is ever used as an argument.
1722 // This function is used on startup to build the trampoline stubs in
1723 // generateOptoStub.  Registers not mentioned will be killed by the VM
1724 // call in the trampoline, and arguments in those registers not be
1725 // available to the callee.
1726 bool Matcher::can_be_java_arg(int reg)
1727 {
1728   return
1729     reg ==  RDI_num || reg == RDI_H_num ||
1730     reg ==  RSI_num || reg == RSI_H_num ||
1731     reg ==  RDX_num || reg == RDX_H_num ||
1732     reg ==  RCX_num || reg == RCX_H_num ||
1733     reg ==   R8_num || reg ==  R8_H_num ||
1734     reg ==   R9_num || reg ==  R9_H_num ||
1735     reg ==  R12_num || reg == R12_H_num ||
1736     reg == XMM0_num || reg == XMM0b_num ||
1737     reg == XMM1_num || reg == XMM1b_num ||
1738     reg == XMM2_num || reg == XMM2b_num ||
1739     reg == XMM3_num || reg == XMM3b_num ||
1740     reg == XMM4_num || reg == XMM4b_num ||
1741     reg == XMM5_num || reg == XMM5b_num ||
1742     reg == XMM6_num || reg == XMM6b_num ||
1743     reg == XMM7_num || reg == XMM7b_num;
1744 }
1745 
1746 bool Matcher::is_spillable_arg(int reg)
1747 {
1748   return can_be_java_arg(reg);
1749 }
1750 
1751 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1752   // In 64 bit mode a code which use multiply when
1753   // devisor is constant is faster than hardware
1754   // DIV instruction (it uses MulHiL).
1755   return false;
1756 }
1757 
1758 // Register for DIVI projection of divmodI
1759 RegMask Matcher::divI_proj_mask() {
1760   return INT_RAX_REG_mask();
1761 }
1762 
1763 // Register for MODI projection of divmodI
1764 RegMask Matcher::modI_proj_mask() {
1765   return INT_RDX_REG_mask();
1766 }
1767 
1768 // Register for DIVL projection of divmodL
1769 RegMask Matcher::divL_proj_mask() {
1770   return LONG_RAX_REG_mask();
1771 }
1772 
1773 // Register for MODL projection of divmodL
1774 RegMask Matcher::modL_proj_mask() {
1775   return LONG_RDX_REG_mask();
1776 }
1777 
1778 // Register for saving SP into on method handle invokes. Not used on x86_64.
1779 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1780     return NO_REG_mask();
1781 }
1782 
1783 %}
1784 
1785 //----------ENCODING BLOCK-----------------------------------------------------
1786 // This block specifies the encoding classes used by the compiler to
1787 // output byte streams.  Encoding classes are parameterized macros
1788 // used by Machine Instruction Nodes in order to generate the bit
1789 // encoding of the instruction.  Operands specify their base encoding
1790 // interface with the interface keyword.  There are currently
1791 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1792 // COND_INTER.  REG_INTER causes an operand to generate a function
1793 // which returns its register number when queried.  CONST_INTER causes
1794 // an operand to generate a function which returns the value of the
1795 // constant when queried.  MEMORY_INTER causes an operand to generate
1796 // four functions which return the Base Register, the Index Register,
1797 // the Scale Value, and the Offset Value of the operand when queried.
1798 // COND_INTER causes an operand to generate six functions which return
1799 // the encoding code (ie - encoding bits for the instruction)
1800 // associated with each basic boolean condition for a conditional
1801 // instruction.
1802 //
1803 // Instructions specify two basic values for encoding.  Again, a
1804 // function is available to check if the constant displacement is an
1805 // oop. They use the ins_encode keyword to specify their encoding
1806 // classes (which must be a sequence of enc_class names, and their
1807 // parameters, specified in the encoding block), and they use the
1808 // opcode keyword to specify, in order, their primary, secondary, and
1809 // tertiary opcode.  Only the opcode sections which a particular
1810 // instruction needs for encoding need to be specified.
1811 encode %{
1812   // Build emit functions for each basic byte or larger field in the
1813   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1814   // from C++ code in the enc_class source block.  Emit functions will
1815   // live in the main source block for now.  In future, we can
1816   // generalize this by adding a syntax that specifies the sizes of
1817   // fields in an order, so that the adlc can build the emit functions
1818   // automagically
1819 
1820   // Emit primary opcode
1821   enc_class OpcP
1822   %{
1823     emit_opcode(cbuf, $primary);
1824   %}
1825 
1826   // Emit secondary opcode
1827   enc_class OpcS
1828   %{
1829     emit_opcode(cbuf, $secondary);
1830   %}
1831 
1832   // Emit tertiary opcode
1833   enc_class OpcT
1834   %{
1835     emit_opcode(cbuf, $tertiary);
1836   %}
1837 
1838   // Emit opcode directly
1839   enc_class Opcode(immI d8)
1840   %{
1841     emit_opcode(cbuf, $d8$$constant);
1842   %}
1843 
1844   // Emit size prefix
1845   enc_class SizePrefix
1846   %{
1847     emit_opcode(cbuf, 0x66);
1848   %}
1849 
1850   enc_class reg(rRegI reg)
1851   %{
1852     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1853   %}
1854 
1855   enc_class reg_reg(rRegI dst, rRegI src)
1856   %{
1857     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1858   %}
1859 
1860   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1861   %{
1862     emit_opcode(cbuf, $opcode$$constant);
1863     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1864   %}
1865 
1866   enc_class cdql_enc(no_rax_rdx_RegI div)
1867   %{
1868     // Full implementation of Java idiv and irem; checks for
1869     // special case as described in JVM spec., p.243 & p.271.
1870     //
1871     //         normal case                           special case
1872     //
1873     // input : rax: dividend                         min_int
1874     //         reg: divisor                          -1
1875     //
1876     // output: rax: quotient  (= rax idiv reg)       min_int
1877     //         rdx: remainder (= rax irem reg)       0
1878     //
1879     //  Code sequnce:
1880     //
1881     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1882     //    5:   75 07/08                jne    e <normal>
1883     //    7:   33 d2                   xor    %edx,%edx
1884     //  [div >= 8 -> offset + 1]
1885     //  [REX_B]
1886     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1887     //    c:   74 03/04                je     11 <done>
1888     // 000000000000000e <normal>:
1889     //    e:   99                      cltd
1890     //  [div >= 8 -> offset + 1]
1891     //  [REX_B]
1892     //    f:   f7 f9                   idiv   $div
1893     // 0000000000000011 <done>:
1894 
1895     // cmp    $0x80000000,%eax
1896     emit_opcode(cbuf, 0x3d);
1897     emit_d8(cbuf, 0x00);
1898     emit_d8(cbuf, 0x00);
1899     emit_d8(cbuf, 0x00);
1900     emit_d8(cbuf, 0x80);
1901 
1902     // jne    e <normal>
1903     emit_opcode(cbuf, 0x75);
1904     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1905 
1906     // xor    %edx,%edx
1907     emit_opcode(cbuf, 0x33);
1908     emit_d8(cbuf, 0xD2);
1909 
1910     // cmp    $0xffffffffffffffff,%ecx
1911     if ($div$$reg >= 8) {
1912       emit_opcode(cbuf, Assembler::REX_B);
1913     }
1914     emit_opcode(cbuf, 0x83);
1915     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1916     emit_d8(cbuf, 0xFF);
1917 
1918     // je     11 <done>
1919     emit_opcode(cbuf, 0x74);
1920     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1921 
1922     // <normal>
1923     // cltd
1924     emit_opcode(cbuf, 0x99);
1925 
1926     // idivl (note: must be emitted by the user of this rule)
1927     // <done>
1928   %}
1929 
1930   enc_class cdqq_enc(no_rax_rdx_RegL div)
1931   %{
1932     // Full implementation of Java ldiv and lrem; checks for
1933     // special case as described in JVM spec., p.243 & p.271.
1934     //
1935     //         normal case                           special case
1936     //
1937     // input : rax: dividend                         min_long
1938     //         reg: divisor                          -1
1939     //
1940     // output: rax: quotient  (= rax idiv reg)       min_long
1941     //         rdx: remainder (= rax irem reg)       0
1942     //
1943     //  Code sequnce:
1944     //
1945     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1946     //    7:   00 00 80
1947     //    a:   48 39 d0                cmp    %rdx,%rax
1948     //    d:   75 08                   jne    17 <normal>
1949     //    f:   33 d2                   xor    %edx,%edx
1950     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1951     //   15:   74 05                   je     1c <done>
1952     // 0000000000000017 <normal>:
1953     //   17:   48 99                   cqto
1954     //   19:   48 f7 f9                idiv   $div
1955     // 000000000000001c <done>:
1956 
1957     // mov    $0x8000000000000000,%rdx
1958     emit_opcode(cbuf, Assembler::REX_W);
1959     emit_opcode(cbuf, 0xBA);
1960     emit_d8(cbuf, 0x00);
1961     emit_d8(cbuf, 0x00);
1962     emit_d8(cbuf, 0x00);
1963     emit_d8(cbuf, 0x00);
1964     emit_d8(cbuf, 0x00);
1965     emit_d8(cbuf, 0x00);
1966     emit_d8(cbuf, 0x00);
1967     emit_d8(cbuf, 0x80);
1968 
1969     // cmp    %rdx,%rax
1970     emit_opcode(cbuf, Assembler::REX_W);
1971     emit_opcode(cbuf, 0x39);
1972     emit_d8(cbuf, 0xD0);
1973 
1974     // jne    17 <normal>
1975     emit_opcode(cbuf, 0x75);
1976     emit_d8(cbuf, 0x08);
1977 
1978     // xor    %edx,%edx
1979     emit_opcode(cbuf, 0x33);
1980     emit_d8(cbuf, 0xD2);
1981 
1982     // cmp    $0xffffffffffffffff,$div
1983     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1984     emit_opcode(cbuf, 0x83);
1985     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1986     emit_d8(cbuf, 0xFF);
1987 
1988     // je     1e <done>
1989     emit_opcode(cbuf, 0x74);
1990     emit_d8(cbuf, 0x05);
1991 
1992     // <normal>
1993     // cqto
1994     emit_opcode(cbuf, Assembler::REX_W);
1995     emit_opcode(cbuf, 0x99);
1996 
1997     // idivq (note: must be emitted by the user of this rule)
1998     // <done>
1999   %}
2000 
2001   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2002   enc_class OpcSE(immI imm)
2003   %{
2004     // Emit primary opcode and set sign-extend bit
2005     // Check for 8-bit immediate, and set sign extend bit in opcode
2006     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2007       emit_opcode(cbuf, $primary | 0x02);
2008     } else {
2009       // 32-bit immediate
2010       emit_opcode(cbuf, $primary);
2011     }
2012   %}
2013 
2014   enc_class OpcSErm(rRegI dst, immI imm)
2015   %{
2016     // OpcSEr/m
2017     int dstenc = $dst$$reg;
2018     if (dstenc >= 8) {
2019       emit_opcode(cbuf, Assembler::REX_B);
2020       dstenc -= 8;
2021     }
2022     // Emit primary opcode and set sign-extend bit
2023     // Check for 8-bit immediate, and set sign extend bit in opcode
2024     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2025       emit_opcode(cbuf, $primary | 0x02);
2026     } else {
2027       // 32-bit immediate
2028       emit_opcode(cbuf, $primary);
2029     }
2030     // Emit r/m byte with secondary opcode, after primary opcode.
2031     emit_rm(cbuf, 0x3, $secondary, dstenc);
2032   %}
2033 
2034   enc_class OpcSErm_wide(rRegL dst, immI imm)
2035   %{
2036     // OpcSEr/m
2037     int dstenc = $dst$$reg;
2038     if (dstenc < 8) {
2039       emit_opcode(cbuf, Assembler::REX_W);
2040     } else {
2041       emit_opcode(cbuf, Assembler::REX_WB);
2042       dstenc -= 8;
2043     }
2044     // Emit primary opcode and set sign-extend bit
2045     // Check for 8-bit immediate, and set sign extend bit in opcode
2046     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2047       emit_opcode(cbuf, $primary | 0x02);
2048     } else {
2049       // 32-bit immediate
2050       emit_opcode(cbuf, $primary);
2051     }
2052     // Emit r/m byte with secondary opcode, after primary opcode.
2053     emit_rm(cbuf, 0x3, $secondary, dstenc);
2054   %}
2055 
2056   enc_class Con8or32(immI imm)
2057   %{
2058     // Check for 8-bit immediate, and set sign extend bit in opcode
2059     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2060       $$$emit8$imm$$constant;
2061     } else {
2062       // 32-bit immediate
2063       $$$emit32$imm$$constant;
2064     }
2065   %}
2066 
2067   enc_class opc2_reg(rRegI dst)
2068   %{
2069     // BSWAP
2070     emit_cc(cbuf, $secondary, $dst$$reg);
2071   %}
2072 
2073   enc_class opc3_reg(rRegI dst)
2074   %{
2075     // BSWAP
2076     emit_cc(cbuf, $tertiary, $dst$$reg);
2077   %}
2078 
2079   enc_class reg_opc(rRegI div)
2080   %{
2081     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2082     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2083   %}
2084 
2085   enc_class enc_cmov(cmpOp cop)
2086   %{
2087     // CMOV
2088     $$$emit8$primary;
2089     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2090   %}
2091 
2092   enc_class enc_PartialSubtypeCheck()
2093   %{
2094     Register Rrdi = as_Register(RDI_enc); // result register
2095     Register Rrax = as_Register(RAX_enc); // super class
2096     Register Rrcx = as_Register(RCX_enc); // killed
2097     Register Rrsi = as_Register(RSI_enc); // sub class
2098     Label miss;
2099     const bool set_cond_codes = true;
2100 
2101     MacroAssembler _masm(&cbuf);
2102     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2103                                      NULL, &miss,
2104                                      /*set_cond_codes:*/ true);
2105     if ($primary) {
2106       __ xorptr(Rrdi, Rrdi);
2107     }
2108     __ bind(miss);
2109   %}
2110 
2111   enc_class clear_avx %{
2112     debug_only(int off0 = cbuf.insts_size());
2113     if (generate_vzeroupper(Compile::current())) {
2114       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2115       // Clear upper bits of YMM registers when current compiled code uses
2116       // wide vectors to avoid AVX <-> SSE transition penalty during call.
2117       MacroAssembler _masm(&cbuf);
2118       __ vzeroupper();
2119     }
2120     debug_only(int off1 = cbuf.insts_size());
2121     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2122   %}
2123 
2124   enc_class Java_To_Runtime(method meth) %{
2125     // No relocation needed
2126     MacroAssembler _masm(&cbuf);
2127     __ mov64(r10, (int64_t) $meth$$method);
2128     __ call(r10);
2129   %}
2130 
2131   enc_class Java_To_Interpreter(method meth)
2132   %{
2133     // CALL Java_To_Interpreter
2134     // This is the instruction starting address for relocation info.
2135     cbuf.set_insts_mark();
2136     $$$emit8$primary;
2137     // CALL directly to the runtime
2138     emit_d32_reloc(cbuf,
2139                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2140                    runtime_call_Relocation::spec(),
2141                    RELOC_DISP32);
2142   %}
2143 
2144   enc_class Java_Static_Call(method meth)
2145   %{
2146     // JAVA STATIC CALL
2147     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2148     // determine who we intended to call.
2149     cbuf.set_insts_mark();
2150     $$$emit8$primary;
2151 
2152     if (!_method) {
2153       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2154                      runtime_call_Relocation::spec(),
2155                      RELOC_DISP32);
2156     } else {
2157       int method_index = resolved_method_index(cbuf);
2158       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2159                                                   : static_call_Relocation::spec(method_index);
2160       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2161                      rspec, RELOC_DISP32);
2162       // Emit stubs for static call.
2163       address mark = cbuf.insts_mark();
2164       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2165       if (stub == NULL) {
2166         ciEnv::current()->record_failure("CodeCache is full");
2167         return;
2168       }
2169 #if INCLUDE_AOT
2170       CompiledStaticCall::emit_to_aot_stub(cbuf, mark);
2171 #endif
2172     }
2173   %}
2174 
2175   enc_class Java_Dynamic_Call(method meth) %{
2176     MacroAssembler _masm(&cbuf);
2177     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2178   %}
2179 
2180   enc_class Java_Compiled_Call(method meth)
2181   %{
2182     // JAVA COMPILED CALL
2183     int disp = in_bytes(Method:: from_compiled_offset());
2184 
2185     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2186     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2187 
2188     // callq *disp(%rax)
2189     cbuf.set_insts_mark();
2190     $$$emit8$primary;
2191     if (disp < 0x80) {
2192       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2193       emit_d8(cbuf, disp); // Displacement
2194     } else {
2195       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2196       emit_d32(cbuf, disp); // Displacement
2197     }
2198   %}
2199 
2200   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2201   %{
2202     // SAL, SAR, SHR
2203     int dstenc = $dst$$reg;
2204     if (dstenc >= 8) {
2205       emit_opcode(cbuf, Assembler::REX_B);
2206       dstenc -= 8;
2207     }
2208     $$$emit8$primary;
2209     emit_rm(cbuf, 0x3, $secondary, dstenc);
2210     $$$emit8$shift$$constant;
2211   %}
2212 
2213   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2214   %{
2215     // SAL, SAR, SHR
2216     int dstenc = $dst$$reg;
2217     if (dstenc < 8) {
2218       emit_opcode(cbuf, Assembler::REX_W);
2219     } else {
2220       emit_opcode(cbuf, Assembler::REX_WB);
2221       dstenc -= 8;
2222     }
2223     $$$emit8$primary;
2224     emit_rm(cbuf, 0x3, $secondary, dstenc);
2225     $$$emit8$shift$$constant;
2226   %}
2227 
2228   enc_class load_immI(rRegI dst, immI src)
2229   %{
2230     int dstenc = $dst$$reg;
2231     if (dstenc >= 8) {
2232       emit_opcode(cbuf, Assembler::REX_B);
2233       dstenc -= 8;
2234     }
2235     emit_opcode(cbuf, 0xB8 | dstenc);
2236     $$$emit32$src$$constant;
2237   %}
2238 
2239   enc_class load_immL(rRegL dst, immL src)
2240   %{
2241     int dstenc = $dst$$reg;
2242     if (dstenc < 8) {
2243       emit_opcode(cbuf, Assembler::REX_W);
2244     } else {
2245       emit_opcode(cbuf, Assembler::REX_WB);
2246       dstenc -= 8;
2247     }
2248     emit_opcode(cbuf, 0xB8 | dstenc);
2249     emit_d64(cbuf, $src$$constant);
2250   %}
2251 
2252   enc_class load_immUL32(rRegL dst, immUL32 src)
2253   %{
2254     // same as load_immI, but this time we care about zeroes in the high word
2255     int dstenc = $dst$$reg;
2256     if (dstenc >= 8) {
2257       emit_opcode(cbuf, Assembler::REX_B);
2258       dstenc -= 8;
2259     }
2260     emit_opcode(cbuf, 0xB8 | dstenc);
2261     $$$emit32$src$$constant;
2262   %}
2263 
2264   enc_class load_immL32(rRegL dst, immL32 src)
2265   %{
2266     int dstenc = $dst$$reg;
2267     if (dstenc < 8) {
2268       emit_opcode(cbuf, Assembler::REX_W);
2269     } else {
2270       emit_opcode(cbuf, Assembler::REX_WB);
2271       dstenc -= 8;
2272     }
2273     emit_opcode(cbuf, 0xC7);
2274     emit_rm(cbuf, 0x03, 0x00, dstenc);
2275     $$$emit32$src$$constant;
2276   %}
2277 
2278   enc_class load_immP31(rRegP dst, immP32 src)
2279   %{
2280     // same as load_immI, but this time we care about zeroes in the high word
2281     int dstenc = $dst$$reg;
2282     if (dstenc >= 8) {
2283       emit_opcode(cbuf, Assembler::REX_B);
2284       dstenc -= 8;
2285     }
2286     emit_opcode(cbuf, 0xB8 | dstenc);
2287     $$$emit32$src$$constant;
2288   %}
2289 
2290   enc_class load_immP(rRegP dst, immP src)
2291   %{
2292     int dstenc = $dst$$reg;
2293     if (dstenc < 8) {
2294       emit_opcode(cbuf, Assembler::REX_W);
2295     } else {
2296       emit_opcode(cbuf, Assembler::REX_WB);
2297       dstenc -= 8;
2298     }
2299     emit_opcode(cbuf, 0xB8 | dstenc);
2300     // This next line should be generated from ADLC
2301     if ($src->constant_reloc() != relocInfo::none) {
2302       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2303     } else {
2304       emit_d64(cbuf, $src$$constant);
2305     }
2306   %}
2307 
2308   enc_class Con32(immI src)
2309   %{
2310     // Output immediate
2311     $$$emit32$src$$constant;
2312   %}
2313 
2314   enc_class Con32F_as_bits(immF src)
2315   %{
2316     // Output Float immediate bits
2317     jfloat jf = $src$$constant;
2318     jint jf_as_bits = jint_cast(jf);
2319     emit_d32(cbuf, jf_as_bits);
2320   %}
2321 
2322   enc_class Con16(immI src)
2323   %{
2324     // Output immediate
2325     $$$emit16$src$$constant;
2326   %}
2327 
2328   // How is this different from Con32??? XXX
2329   enc_class Con_d32(immI src)
2330   %{
2331     emit_d32(cbuf,$src$$constant);
2332   %}
2333 
2334   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2335     // Output immediate memory reference
2336     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2337     emit_d32(cbuf, 0x00);
2338   %}
2339 
2340   enc_class lock_prefix()
2341   %{
2342     emit_opcode(cbuf, 0xF0); // lock
2343   %}
2344 
2345   enc_class REX_mem(memory mem)
2346   %{
2347     if ($mem$$base >= 8) {
2348       if ($mem$$index < 8) {
2349         emit_opcode(cbuf, Assembler::REX_B);
2350       } else {
2351         emit_opcode(cbuf, Assembler::REX_XB);
2352       }
2353     } else {
2354       if ($mem$$index >= 8) {
2355         emit_opcode(cbuf, Assembler::REX_X);
2356       }
2357     }
2358   %}
2359 
2360   enc_class REX_mem_wide(memory mem)
2361   %{
2362     if ($mem$$base >= 8) {
2363       if ($mem$$index < 8) {
2364         emit_opcode(cbuf, Assembler::REX_WB);
2365       } else {
2366         emit_opcode(cbuf, Assembler::REX_WXB);
2367       }
2368     } else {
2369       if ($mem$$index < 8) {
2370         emit_opcode(cbuf, Assembler::REX_W);
2371       } else {
2372         emit_opcode(cbuf, Assembler::REX_WX);
2373       }
2374     }
2375   %}
2376 
2377   // for byte regs
2378   enc_class REX_breg(rRegI reg)
2379   %{
2380     if ($reg$$reg >= 4) {
2381       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2382     }
2383   %}
2384 
2385   // for byte regs
2386   enc_class REX_reg_breg(rRegI dst, rRegI src)
2387   %{
2388     if ($dst$$reg < 8) {
2389       if ($src$$reg >= 4) {
2390         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2391       }
2392     } else {
2393       if ($src$$reg < 8) {
2394         emit_opcode(cbuf, Assembler::REX_R);
2395       } else {
2396         emit_opcode(cbuf, Assembler::REX_RB);
2397       }
2398     }
2399   %}
2400 
2401   // for byte regs
2402   enc_class REX_breg_mem(rRegI reg, memory mem)
2403   %{
2404     if ($reg$$reg < 8) {
2405       if ($mem$$base < 8) {
2406         if ($mem$$index >= 8) {
2407           emit_opcode(cbuf, Assembler::REX_X);
2408         } else if ($reg$$reg >= 4) {
2409           emit_opcode(cbuf, Assembler::REX);
2410         }
2411       } else {
2412         if ($mem$$index < 8) {
2413           emit_opcode(cbuf, Assembler::REX_B);
2414         } else {
2415           emit_opcode(cbuf, Assembler::REX_XB);
2416         }
2417       }
2418     } else {
2419       if ($mem$$base < 8) {
2420         if ($mem$$index < 8) {
2421           emit_opcode(cbuf, Assembler::REX_R);
2422         } else {
2423           emit_opcode(cbuf, Assembler::REX_RX);
2424         }
2425       } else {
2426         if ($mem$$index < 8) {
2427           emit_opcode(cbuf, Assembler::REX_RB);
2428         } else {
2429           emit_opcode(cbuf, Assembler::REX_RXB);
2430         }
2431       }
2432     }
2433   %}
2434 
2435   enc_class REX_reg(rRegI reg)
2436   %{
2437     if ($reg$$reg >= 8) {
2438       emit_opcode(cbuf, Assembler::REX_B);
2439     }
2440   %}
2441 
2442   enc_class REX_reg_wide(rRegI reg)
2443   %{
2444     if ($reg$$reg < 8) {
2445       emit_opcode(cbuf, Assembler::REX_W);
2446     } else {
2447       emit_opcode(cbuf, Assembler::REX_WB);
2448     }
2449   %}
2450 
2451   enc_class REX_reg_reg(rRegI dst, rRegI src)
2452   %{
2453     if ($dst$$reg < 8) {
2454       if ($src$$reg >= 8) {
2455         emit_opcode(cbuf, Assembler::REX_B);
2456       }
2457     } else {
2458       if ($src$$reg < 8) {
2459         emit_opcode(cbuf, Assembler::REX_R);
2460       } else {
2461         emit_opcode(cbuf, Assembler::REX_RB);
2462       }
2463     }
2464   %}
2465 
2466   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2467   %{
2468     if ($dst$$reg < 8) {
2469       if ($src$$reg < 8) {
2470         emit_opcode(cbuf, Assembler::REX_W);
2471       } else {
2472         emit_opcode(cbuf, Assembler::REX_WB);
2473       }
2474     } else {
2475       if ($src$$reg < 8) {
2476         emit_opcode(cbuf, Assembler::REX_WR);
2477       } else {
2478         emit_opcode(cbuf, Assembler::REX_WRB);
2479       }
2480     }
2481   %}
2482 
2483   enc_class REX_reg_mem(rRegI reg, memory mem)
2484   %{
2485     if ($reg$$reg < 8) {
2486       if ($mem$$base < 8) {
2487         if ($mem$$index >= 8) {
2488           emit_opcode(cbuf, Assembler::REX_X);
2489         }
2490       } else {
2491         if ($mem$$index < 8) {
2492           emit_opcode(cbuf, Assembler::REX_B);
2493         } else {
2494           emit_opcode(cbuf, Assembler::REX_XB);
2495         }
2496       }
2497     } else {
2498       if ($mem$$base < 8) {
2499         if ($mem$$index < 8) {
2500           emit_opcode(cbuf, Assembler::REX_R);
2501         } else {
2502           emit_opcode(cbuf, Assembler::REX_RX);
2503         }
2504       } else {
2505         if ($mem$$index < 8) {
2506           emit_opcode(cbuf, Assembler::REX_RB);
2507         } else {
2508           emit_opcode(cbuf, Assembler::REX_RXB);
2509         }
2510       }
2511     }
2512   %}
2513 
2514   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2515   %{
2516     if ($reg$$reg < 8) {
2517       if ($mem$$base < 8) {
2518         if ($mem$$index < 8) {
2519           emit_opcode(cbuf, Assembler::REX_W);
2520         } else {
2521           emit_opcode(cbuf, Assembler::REX_WX);
2522         }
2523       } else {
2524         if ($mem$$index < 8) {
2525           emit_opcode(cbuf, Assembler::REX_WB);
2526         } else {
2527           emit_opcode(cbuf, Assembler::REX_WXB);
2528         }
2529       }
2530     } else {
2531       if ($mem$$base < 8) {
2532         if ($mem$$index < 8) {
2533           emit_opcode(cbuf, Assembler::REX_WR);
2534         } else {
2535           emit_opcode(cbuf, Assembler::REX_WRX);
2536         }
2537       } else {
2538         if ($mem$$index < 8) {
2539           emit_opcode(cbuf, Assembler::REX_WRB);
2540         } else {
2541           emit_opcode(cbuf, Assembler::REX_WRXB);
2542         }
2543       }
2544     }
2545   %}
2546 
2547   enc_class reg_mem(rRegI ereg, memory mem)
2548   %{
2549     // High registers handle in encode_RegMem
2550     int reg = $ereg$$reg;
2551     int base = $mem$$base;
2552     int index = $mem$$index;
2553     int scale = $mem$$scale;
2554     int disp = $mem$$disp;
2555     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2556 
2557     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2558   %}
2559 
2560   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2561   %{
2562     int rm_byte_opcode = $rm_opcode$$constant;
2563 
2564     // High registers handle in encode_RegMem
2565     int base = $mem$$base;
2566     int index = $mem$$index;
2567     int scale = $mem$$scale;
2568     int displace = $mem$$disp;
2569 
2570     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2571                                             // working with static
2572                                             // globals
2573     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2574                   disp_reloc);
2575   %}
2576 
2577   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2578   %{
2579     int reg_encoding = $dst$$reg;
2580     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2581     int index        = 0x04;            // 0x04 indicates no index
2582     int scale        = 0x00;            // 0x00 indicates no scale
2583     int displace     = $src1$$constant; // 0x00 indicates no displacement
2584     relocInfo::relocType disp_reloc = relocInfo::none;
2585     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2586                   disp_reloc);
2587   %}
2588 
2589   enc_class neg_reg(rRegI dst)
2590   %{
2591     int dstenc = $dst$$reg;
2592     if (dstenc >= 8) {
2593       emit_opcode(cbuf, Assembler::REX_B);
2594       dstenc -= 8;
2595     }
2596     // NEG $dst
2597     emit_opcode(cbuf, 0xF7);
2598     emit_rm(cbuf, 0x3, 0x03, dstenc);
2599   %}
2600 
2601   enc_class neg_reg_wide(rRegI dst)
2602   %{
2603     int dstenc = $dst$$reg;
2604     if (dstenc < 8) {
2605       emit_opcode(cbuf, Assembler::REX_W);
2606     } else {
2607       emit_opcode(cbuf, Assembler::REX_WB);
2608       dstenc -= 8;
2609     }
2610     // NEG $dst
2611     emit_opcode(cbuf, 0xF7);
2612     emit_rm(cbuf, 0x3, 0x03, dstenc);
2613   %}
2614 
2615   enc_class setLT_reg(rRegI dst)
2616   %{
2617     int dstenc = $dst$$reg;
2618     if (dstenc >= 8) {
2619       emit_opcode(cbuf, Assembler::REX_B);
2620       dstenc -= 8;
2621     } else if (dstenc >= 4) {
2622       emit_opcode(cbuf, Assembler::REX);
2623     }
2624     // SETLT $dst
2625     emit_opcode(cbuf, 0x0F);
2626     emit_opcode(cbuf, 0x9C);
2627     emit_rm(cbuf, 0x3, 0x0, dstenc);
2628   %}
2629 
2630   enc_class setNZ_reg(rRegI dst)
2631   %{
2632     int dstenc = $dst$$reg;
2633     if (dstenc >= 8) {
2634       emit_opcode(cbuf, Assembler::REX_B);
2635       dstenc -= 8;
2636     } else if (dstenc >= 4) {
2637       emit_opcode(cbuf, Assembler::REX);
2638     }
2639     // SETNZ $dst
2640     emit_opcode(cbuf, 0x0F);
2641     emit_opcode(cbuf, 0x95);
2642     emit_rm(cbuf, 0x3, 0x0, dstenc);
2643   %}
2644 
2645 
2646   // Compare the lonogs and set -1, 0, or 1 into dst
2647   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2648   %{
2649     int src1enc = $src1$$reg;
2650     int src2enc = $src2$$reg;
2651     int dstenc = $dst$$reg;
2652 
2653     // cmpq $src1, $src2
2654     if (src1enc < 8) {
2655       if (src2enc < 8) {
2656         emit_opcode(cbuf, Assembler::REX_W);
2657       } else {
2658         emit_opcode(cbuf, Assembler::REX_WB);
2659       }
2660     } else {
2661       if (src2enc < 8) {
2662         emit_opcode(cbuf, Assembler::REX_WR);
2663       } else {
2664         emit_opcode(cbuf, Assembler::REX_WRB);
2665       }
2666     }
2667     emit_opcode(cbuf, 0x3B);
2668     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2669 
2670     // movl $dst, -1
2671     if (dstenc >= 8) {
2672       emit_opcode(cbuf, Assembler::REX_B);
2673     }
2674     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2675     emit_d32(cbuf, -1);
2676 
2677     // jl,s done
2678     emit_opcode(cbuf, 0x7C);
2679     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2680 
2681     // setne $dst
2682     if (dstenc >= 4) {
2683       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2684     }
2685     emit_opcode(cbuf, 0x0F);
2686     emit_opcode(cbuf, 0x95);
2687     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2688 
2689     // movzbl $dst, $dst
2690     if (dstenc >= 4) {
2691       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2692     }
2693     emit_opcode(cbuf, 0x0F);
2694     emit_opcode(cbuf, 0xB6);
2695     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2696   %}
2697 
2698   enc_class Push_ResultXD(regD dst) %{
2699     MacroAssembler _masm(&cbuf);
2700     __ fstp_d(Address(rsp, 0));
2701     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2702     __ addptr(rsp, 8);
2703   %}
2704 
2705   enc_class Push_SrcXD(regD src) %{
2706     MacroAssembler _masm(&cbuf);
2707     __ subptr(rsp, 8);
2708     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2709     __ fld_d(Address(rsp, 0));
2710   %}
2711 
2712 
2713   enc_class enc_rethrow()
2714   %{
2715     cbuf.set_insts_mark();
2716     emit_opcode(cbuf, 0xE9); // jmp entry
2717     emit_d32_reloc(cbuf,
2718                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2719                    runtime_call_Relocation::spec(),
2720                    RELOC_DISP32);
2721   %}
2722 
2723 %}
2724 
2725 
2726 
2727 //----------FRAME--------------------------------------------------------------
2728 // Definition of frame structure and management information.
2729 //
2730 //  S T A C K   L A Y O U T    Allocators stack-slot number
2731 //                             |   (to get allocators register number
2732 //  G  Owned by    |        |  v    add OptoReg::stack0())
2733 //  r   CALLER     |        |
2734 //  o     |        +--------+      pad to even-align allocators stack-slot
2735 //  w     V        |  pad0  |        numbers; owned by CALLER
2736 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2737 //  h     ^        |   in   |  5
2738 //        |        |  args  |  4   Holes in incoming args owned by SELF
2739 //  |     |        |        |  3
2740 //  |     |        +--------+
2741 //  V     |        | old out|      Empty on Intel, window on Sparc
2742 //        |    old |preserve|      Must be even aligned.
2743 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2744 //        |        |   in   |  3   area for Intel ret address
2745 //     Owned by    |preserve|      Empty on Sparc.
2746 //       SELF      +--------+
2747 //        |        |  pad2  |  2   pad to align old SP
2748 //        |        +--------+  1
2749 //        |        | locks  |  0
2750 //        |        +--------+----> OptoReg::stack0(), even aligned
2751 //        |        |  pad1  | 11   pad to align new SP
2752 //        |        +--------+
2753 //        |        |        | 10
2754 //        |        | spills |  9   spills
2755 //        V        |        |  8   (pad0 slot for callee)
2756 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2757 //        ^        |  out   |  7
2758 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2759 //     Owned by    +--------+
2760 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2761 //        |    new |preserve|      Must be even-aligned.
2762 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2763 //        |        |        |
2764 //
2765 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2766 //         known from SELF's arguments and the Java calling convention.
2767 //         Region 6-7 is determined per call site.
2768 // Note 2: If the calling convention leaves holes in the incoming argument
2769 //         area, those holes are owned by SELF.  Holes in the outgoing area
2770 //         are owned by the CALLEE.  Holes should not be nessecary in the
2771 //         incoming area, as the Java calling convention is completely under
2772 //         the control of the AD file.  Doubles can be sorted and packed to
2773 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2774 //         varargs C calling conventions.
2775 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2776 //         even aligned with pad0 as needed.
2777 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2778 //         region 6-11 is even aligned; it may be padded out more so that
2779 //         the region from SP to FP meets the minimum stack alignment.
2780 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2781 //         alignment.  Region 11, pad1, may be dynamically extended so that
2782 //         SP meets the minimum alignment.
2783 
2784 frame
2785 %{
2786   // What direction does stack grow in (assumed to be same for C & Java)
2787   stack_direction(TOWARDS_LOW);
2788 
2789   // These three registers define part of the calling convention
2790   // between compiled code and the interpreter.
2791   inline_cache_reg(RAX);                // Inline Cache Register
2792   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2793                                         // calling interpreter
2794 
2795   // Optional: name the operand used by cisc-spilling to access
2796   // [stack_pointer + offset]
2797   cisc_spilling_operand_name(indOffset32);
2798 
2799   // Number of stack slots consumed by locking an object
2800   sync_stack_slots(2);
2801 
2802   // Compiled code's Frame Pointer
2803   frame_pointer(RSP);
2804 
2805   // Interpreter stores its frame pointer in a register which is
2806   // stored to the stack by I2CAdaptors.
2807   // I2CAdaptors convert from interpreted java to compiled java.
2808   interpreter_frame_pointer(RBP);
2809 
2810   // Stack alignment requirement
2811   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2812 
2813   // Number of stack slots between incoming argument block and the start of
2814   // a new frame.  The PROLOG must add this many slots to the stack.  The
2815   // EPILOG must remove this many slots.  amd64 needs two slots for
2816   // return address.
2817   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2818 
2819   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2820   // for calls to C.  Supports the var-args backing area for register parms.
2821   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2822 
2823   // The after-PROLOG location of the return address.  Location of
2824   // return address specifies a type (REG or STACK) and a number
2825   // representing the register number (i.e. - use a register name) or
2826   // stack slot.
2827   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2828   // Otherwise, it is above the locks and verification slot and alignment word
2829   return_addr(STACK - 2 +
2830               align_up((Compile::current()->in_preserve_stack_slots() +
2831                         Compile::current()->fixed_slots()),
2832                        stack_alignment_in_slots()));
2833 
2834   // Body of function which returns an integer array locating
2835   // arguments either in registers or in stack slots.  Passed an array
2836   // of ideal registers called "sig" and a "length" count.  Stack-slot
2837   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2838   // arguments for a CALLEE.  Incoming stack arguments are
2839   // automatically biased by the preserve_stack_slots field above.
2840 
2841   calling_convention
2842   %{
2843     // No difference between ingoing/outgoing just pass false
2844     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2845   %}
2846 
2847   c_calling_convention
2848   %{
2849     // This is obviously always outgoing
2850     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2851   %}
2852 
2853   // Location of compiled Java return values.  Same as C for now.
2854   return_value
2855   %{
2856     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2857            "only return normal values");
2858 
2859     static const int lo[Op_RegL + 1] = {
2860       0,
2861       0,
2862       RAX_num,  // Op_RegN
2863       RAX_num,  // Op_RegI
2864       RAX_num,  // Op_RegP
2865       XMM0_num, // Op_RegF
2866       XMM0_num, // Op_RegD
2867       RAX_num   // Op_RegL
2868     };
2869     static const int hi[Op_RegL + 1] = {
2870       0,
2871       0,
2872       OptoReg::Bad, // Op_RegN
2873       OptoReg::Bad, // Op_RegI
2874       RAX_H_num,    // Op_RegP
2875       OptoReg::Bad, // Op_RegF
2876       XMM0b_num,    // Op_RegD
2877       RAX_H_num     // Op_RegL
2878     };
2879     // Excluded flags and vector registers.
2880     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
2881     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2882   %}
2883 %}
2884 
2885 //----------ATTRIBUTES---------------------------------------------------------
2886 //----------Operand Attributes-------------------------------------------------
2887 op_attrib op_cost(0);        // Required cost attribute
2888 
2889 //----------Instruction Attributes---------------------------------------------
2890 ins_attrib ins_cost(100);       // Required cost attribute
2891 ins_attrib ins_size(8);         // Required size attribute (in bits)
2892 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2893                                 // a non-matching short branch variant
2894                                 // of some long branch?
2895 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2896                                 // be a power of 2) specifies the
2897                                 // alignment that some part of the
2898                                 // instruction (not necessarily the
2899                                 // start) requires.  If > 1, a
2900                                 // compute_padding() function must be
2901                                 // provided for the instruction
2902 
2903 //----------OPERANDS-----------------------------------------------------------
2904 // Operand definitions must precede instruction definitions for correct parsing
2905 // in the ADLC because operands constitute user defined types which are used in
2906 // instruction definitions.
2907 
2908 //----------Simple Operands----------------------------------------------------
2909 // Immediate Operands
2910 // Integer Immediate
2911 operand immI()
2912 %{
2913   match(ConI);
2914 
2915   op_cost(10);
2916   format %{ %}
2917   interface(CONST_INTER);
2918 %}
2919 
2920 // Constant for test vs zero
2921 operand immI0()
2922 %{
2923   predicate(n->get_int() == 0);
2924   match(ConI);
2925 
2926   op_cost(0);
2927   format %{ %}
2928   interface(CONST_INTER);
2929 %}
2930 
2931 // Constant for increment
2932 operand immI1()
2933 %{
2934   predicate(n->get_int() == 1);
2935   match(ConI);
2936 
2937   op_cost(0);
2938   format %{ %}
2939   interface(CONST_INTER);
2940 %}
2941 
2942 // Constant for decrement
2943 operand immI_M1()
2944 %{
2945   predicate(n->get_int() == -1);
2946   match(ConI);
2947 
2948   op_cost(0);
2949   format %{ %}
2950   interface(CONST_INTER);
2951 %}
2952 
2953 // Valid scale values for addressing modes
2954 operand immI2()
2955 %{
2956   predicate(0 <= n->get_int() && (n->get_int() <= 3));
2957   match(ConI);
2958 
2959   format %{ %}
2960   interface(CONST_INTER);
2961 %}
2962 
2963 operand immI8()
2964 %{
2965   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
2966   match(ConI);
2967 
2968   op_cost(5);
2969   format %{ %}
2970   interface(CONST_INTER);
2971 %}
2972 
2973 operand immU8()
2974 %{
2975   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
2976   match(ConI);
2977 
2978   op_cost(5);
2979   format %{ %}
2980   interface(CONST_INTER);
2981 %}
2982 
2983 operand immI16()
2984 %{
2985   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
2986   match(ConI);
2987 
2988   op_cost(10);
2989   format %{ %}
2990   interface(CONST_INTER);
2991 %}
2992 
2993 // Int Immediate non-negative
2994 operand immU31()
2995 %{
2996   predicate(n->get_int() >= 0);
2997   match(ConI);
2998 
2999   op_cost(0);
3000   format %{ %}
3001   interface(CONST_INTER);
3002 %}
3003 
3004 // Constant for long shifts
3005 operand immI_32()
3006 %{
3007   predicate( n->get_int() == 32 );
3008   match(ConI);
3009 
3010   op_cost(0);
3011   format %{ %}
3012   interface(CONST_INTER);
3013 %}
3014 
3015 // Constant for long shifts
3016 operand immI_64()
3017 %{
3018   predicate( n->get_int() == 64 );
3019   match(ConI);
3020 
3021   op_cost(0);
3022   format %{ %}
3023   interface(CONST_INTER);
3024 %}
3025 
3026 // Pointer Immediate
3027 operand immP()
3028 %{
3029   match(ConP);
3030 
3031   op_cost(10);
3032   format %{ %}
3033   interface(CONST_INTER);
3034 %}
3035 
3036 // NULL Pointer Immediate
3037 operand immP0()
3038 %{
3039   predicate(n->get_ptr() == 0);
3040   match(ConP);
3041 
3042   op_cost(5);
3043   format %{ %}
3044   interface(CONST_INTER);
3045 %}
3046 
3047 // Pointer Immediate
3048 operand immN() %{
3049   match(ConN);
3050 
3051   op_cost(10);
3052   format %{ %}
3053   interface(CONST_INTER);
3054 %}
3055 
3056 operand immNKlass() %{
3057   match(ConNKlass);
3058 
3059   op_cost(10);
3060   format %{ %}
3061   interface(CONST_INTER);
3062 %}
3063 
3064 // NULL Pointer Immediate
3065 operand immN0() %{
3066   predicate(n->get_narrowcon() == 0);
3067   match(ConN);
3068 
3069   op_cost(5);
3070   format %{ %}
3071   interface(CONST_INTER);
3072 %}
3073 
3074 operand immP31()
3075 %{
3076   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3077             && (n->get_ptr() >> 31) == 0);
3078   match(ConP);
3079 
3080   op_cost(5);
3081   format %{ %}
3082   interface(CONST_INTER);
3083 %}
3084 
3085 
3086 // Long Immediate
3087 operand immL()
3088 %{
3089   match(ConL);
3090 
3091   op_cost(20);
3092   format %{ %}
3093   interface(CONST_INTER);
3094 %}
3095 
3096 // Long Immediate 8-bit
3097 operand immL8()
3098 %{
3099   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3100   match(ConL);
3101 
3102   op_cost(5);
3103   format %{ %}
3104   interface(CONST_INTER);
3105 %}
3106 
3107 // Long Immediate 32-bit unsigned
3108 operand immUL32()
3109 %{
3110   predicate(n->get_long() == (unsigned int) (n->get_long()));
3111   match(ConL);
3112 
3113   op_cost(10);
3114   format %{ %}
3115   interface(CONST_INTER);
3116 %}
3117 
3118 // Long Immediate 32-bit signed
3119 operand immL32()
3120 %{
3121   predicate(n->get_long() == (int) (n->get_long()));
3122   match(ConL);
3123 
3124   op_cost(15);
3125   format %{ %}
3126   interface(CONST_INTER);
3127 %}
3128 
3129 // Long Immediate zero
3130 operand immL0()
3131 %{
3132   predicate(n->get_long() == 0L);
3133   match(ConL);
3134 
3135   op_cost(10);
3136   format %{ %}
3137   interface(CONST_INTER);
3138 %}
3139 
3140 // Constant for increment
3141 operand immL1()
3142 %{
3143   predicate(n->get_long() == 1);
3144   match(ConL);
3145 
3146   format %{ %}
3147   interface(CONST_INTER);
3148 %}
3149 
3150 // Constant for decrement
3151 operand immL_M1()
3152 %{
3153   predicate(n->get_long() == -1);
3154   match(ConL);
3155 
3156   format %{ %}
3157   interface(CONST_INTER);
3158 %}
3159 
3160 // Long Immediate: the value 10
3161 operand immL10()
3162 %{
3163   predicate(n->get_long() == 10);
3164   match(ConL);
3165 
3166   format %{ %}
3167   interface(CONST_INTER);
3168 %}
3169 
3170 // Long immediate from 0 to 127.
3171 // Used for a shorter form of long mul by 10.
3172 operand immL_127()
3173 %{
3174   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3175   match(ConL);
3176 
3177   op_cost(10);
3178   format %{ %}
3179   interface(CONST_INTER);
3180 %}
3181 
3182 // Long Immediate: low 32-bit mask
3183 operand immL_32bits()
3184 %{
3185   predicate(n->get_long() == 0xFFFFFFFFL);
3186   match(ConL);
3187   op_cost(20);
3188 
3189   format %{ %}
3190   interface(CONST_INTER);
3191 %}
3192 
3193 // Float Immediate zero
3194 operand immF0()
3195 %{
3196   predicate(jint_cast(n->getf()) == 0);
3197   match(ConF);
3198 
3199   op_cost(5);
3200   format %{ %}
3201   interface(CONST_INTER);
3202 %}
3203 
3204 // Float Immediate
3205 operand immF()
3206 %{
3207   match(ConF);
3208 
3209   op_cost(15);
3210   format %{ %}
3211   interface(CONST_INTER);
3212 %}
3213 
3214 // Double Immediate zero
3215 operand immD0()
3216 %{
3217   predicate(jlong_cast(n->getd()) == 0);
3218   match(ConD);
3219 
3220   op_cost(5);
3221   format %{ %}
3222   interface(CONST_INTER);
3223 %}
3224 
3225 // Double Immediate
3226 operand immD()
3227 %{
3228   match(ConD);
3229 
3230   op_cost(15);
3231   format %{ %}
3232   interface(CONST_INTER);
3233 %}
3234 
3235 // Immediates for special shifts (sign extend)
3236 
3237 // Constants for increment
3238 operand immI_16()
3239 %{
3240   predicate(n->get_int() == 16);
3241   match(ConI);
3242 
3243   format %{ %}
3244   interface(CONST_INTER);
3245 %}
3246 
3247 operand immI_24()
3248 %{
3249   predicate(n->get_int() == 24);
3250   match(ConI);
3251 
3252   format %{ %}
3253   interface(CONST_INTER);
3254 %}
3255 
3256 // Constant for byte-wide masking
3257 operand immI_255()
3258 %{
3259   predicate(n->get_int() == 255);
3260   match(ConI);
3261 
3262   format %{ %}
3263   interface(CONST_INTER);
3264 %}
3265 
3266 // Constant for short-wide masking
3267 operand immI_65535()
3268 %{
3269   predicate(n->get_int() == 65535);
3270   match(ConI);
3271 
3272   format %{ %}
3273   interface(CONST_INTER);
3274 %}
3275 
3276 // Constant for byte-wide masking
3277 operand immL_255()
3278 %{
3279   predicate(n->get_long() == 255);
3280   match(ConL);
3281 
3282   format %{ %}
3283   interface(CONST_INTER);
3284 %}
3285 
3286 // Constant for short-wide masking
3287 operand immL_65535()
3288 %{
3289   predicate(n->get_long() == 65535);
3290   match(ConL);
3291 
3292   format %{ %}
3293   interface(CONST_INTER);
3294 %}
3295 
3296 // Register Operands
3297 // Integer Register
3298 operand rRegI()
3299 %{
3300   constraint(ALLOC_IN_RC(int_reg));
3301   match(RegI);
3302 
3303   match(rax_RegI);
3304   match(rbx_RegI);
3305   match(rcx_RegI);
3306   match(rdx_RegI);
3307   match(rdi_RegI);
3308 
3309   format %{ %}
3310   interface(REG_INTER);
3311 %}
3312 
3313 // Special Registers
3314 operand rax_RegI()
3315 %{
3316   constraint(ALLOC_IN_RC(int_rax_reg));
3317   match(RegI);
3318   match(rRegI);
3319 
3320   format %{ "RAX" %}
3321   interface(REG_INTER);
3322 %}
3323 
3324 // Special Registers
3325 operand rbx_RegI()
3326 %{
3327   constraint(ALLOC_IN_RC(int_rbx_reg));
3328   match(RegI);
3329   match(rRegI);
3330 
3331   format %{ "RBX" %}
3332   interface(REG_INTER);
3333 %}
3334 
3335 operand rcx_RegI()
3336 %{
3337   constraint(ALLOC_IN_RC(int_rcx_reg));
3338   match(RegI);
3339   match(rRegI);
3340 
3341   format %{ "RCX" %}
3342   interface(REG_INTER);
3343 %}
3344 
3345 operand rdx_RegI()
3346 %{
3347   constraint(ALLOC_IN_RC(int_rdx_reg));
3348   match(RegI);
3349   match(rRegI);
3350 
3351   format %{ "RDX" %}
3352   interface(REG_INTER);
3353 %}
3354 
3355 operand rdi_RegI()
3356 %{
3357   constraint(ALLOC_IN_RC(int_rdi_reg));
3358   match(RegI);
3359   match(rRegI);
3360 
3361   format %{ "RDI" %}
3362   interface(REG_INTER);
3363 %}
3364 
3365 operand no_rcx_RegI()
3366 %{
3367   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3368   match(RegI);
3369   match(rax_RegI);
3370   match(rbx_RegI);
3371   match(rdx_RegI);
3372   match(rdi_RegI);
3373 
3374   format %{ %}
3375   interface(REG_INTER);
3376 %}
3377 
3378 operand no_rax_rdx_RegI()
3379 %{
3380   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3381   match(RegI);
3382   match(rbx_RegI);
3383   match(rcx_RegI);
3384   match(rdi_RegI);
3385 
3386   format %{ %}
3387   interface(REG_INTER);
3388 %}
3389 
3390 // Pointer Register
3391 operand any_RegP()
3392 %{
3393   constraint(ALLOC_IN_RC(any_reg));
3394   match(RegP);
3395   match(rax_RegP);
3396   match(rbx_RegP);
3397   match(rdi_RegP);
3398   match(rsi_RegP);
3399   match(rbp_RegP);
3400   match(r15_RegP);
3401   match(rRegP);
3402 
3403   format %{ %}
3404   interface(REG_INTER);
3405 %}
3406 
3407 operand rRegP()
3408 %{
3409   constraint(ALLOC_IN_RC(ptr_reg));
3410   match(RegP);
3411   match(rax_RegP);
3412   match(rbx_RegP);
3413   match(rdi_RegP);
3414   match(rsi_RegP);
3415   match(rbp_RegP);  // See Q&A below about
3416   match(r15_RegP);  // r15_RegP and rbp_RegP.
3417 
3418   format %{ %}
3419   interface(REG_INTER);
3420 %}
3421 
3422 operand rRegN() %{
3423   constraint(ALLOC_IN_RC(int_reg));
3424   match(RegN);
3425 
3426   format %{ %}
3427   interface(REG_INTER);
3428 %}
3429 
3430 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3431 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3432 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
3433 // The output of an instruction is controlled by the allocator, which respects
3434 // register class masks, not match rules.  Unless an instruction mentions
3435 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3436 // by the allocator as an input.
3437 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
3438 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
3439 // result, RBP is not included in the output of the instruction either.
3440 
3441 operand no_rax_RegP()
3442 %{
3443   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3444   match(RegP);
3445   match(rbx_RegP);
3446   match(rsi_RegP);
3447   match(rdi_RegP);
3448 
3449   format %{ %}
3450   interface(REG_INTER);
3451 %}
3452 
3453 // This operand is not allowed to use RBP even if
3454 // RBP is not used to hold the frame pointer.
3455 operand no_rbp_RegP()
3456 %{
3457   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
3458   match(RegP);
3459   match(rbx_RegP);
3460   match(rsi_RegP);
3461   match(rdi_RegP);
3462 
3463   format %{ %}
3464   interface(REG_INTER);
3465 %}
3466 
3467 operand no_rax_rbx_RegP()
3468 %{
3469   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3470   match(RegP);
3471   match(rsi_RegP);
3472   match(rdi_RegP);
3473 
3474   format %{ %}
3475   interface(REG_INTER);
3476 %}
3477 
3478 // Special Registers
3479 // Return a pointer value
3480 operand rax_RegP()
3481 %{
3482   constraint(ALLOC_IN_RC(ptr_rax_reg));
3483   match(RegP);
3484   match(rRegP);
3485 
3486   format %{ %}
3487   interface(REG_INTER);
3488 %}
3489 
3490 // Special Registers
3491 // Return a compressed pointer value
3492 operand rax_RegN()
3493 %{
3494   constraint(ALLOC_IN_RC(int_rax_reg));
3495   match(RegN);
3496   match(rRegN);
3497 
3498   format %{ %}
3499   interface(REG_INTER);
3500 %}
3501 
3502 // Used in AtomicAdd
3503 operand rbx_RegP()
3504 %{
3505   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3506   match(RegP);
3507   match(rRegP);
3508 
3509   format %{ %}
3510   interface(REG_INTER);
3511 %}
3512 
3513 operand rsi_RegP()
3514 %{
3515   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3516   match(RegP);
3517   match(rRegP);
3518 
3519   format %{ %}
3520   interface(REG_INTER);
3521 %}
3522 
3523 // Used in rep stosq
3524 operand rdi_RegP()
3525 %{
3526   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3527   match(RegP);
3528   match(rRegP);
3529 
3530   format %{ %}
3531   interface(REG_INTER);
3532 %}
3533 
3534 operand r15_RegP()
3535 %{
3536   constraint(ALLOC_IN_RC(ptr_r15_reg));
3537   match(RegP);
3538   match(rRegP);
3539 
3540   format %{ %}
3541   interface(REG_INTER);
3542 %}
3543 
3544 operand rRegL()
3545 %{
3546   constraint(ALLOC_IN_RC(long_reg));
3547   match(RegL);
3548   match(rax_RegL);
3549   match(rdx_RegL);
3550 
3551   format %{ %}
3552   interface(REG_INTER);
3553 %}
3554 
3555 // Special Registers
3556 operand no_rax_rdx_RegL()
3557 %{
3558   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3559   match(RegL);
3560   match(rRegL);
3561 
3562   format %{ %}
3563   interface(REG_INTER);
3564 %}
3565 
3566 operand no_rax_RegL()
3567 %{
3568   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3569   match(RegL);
3570   match(rRegL);
3571   match(rdx_RegL);
3572 
3573   format %{ %}
3574   interface(REG_INTER);
3575 %}
3576 
3577 operand no_rcx_RegL()
3578 %{
3579   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3580   match(RegL);
3581   match(rRegL);
3582 
3583   format %{ %}
3584   interface(REG_INTER);
3585 %}
3586 
3587 operand rax_RegL()
3588 %{
3589   constraint(ALLOC_IN_RC(long_rax_reg));
3590   match(RegL);
3591   match(rRegL);
3592 
3593   format %{ "RAX" %}
3594   interface(REG_INTER);
3595 %}
3596 
3597 operand rcx_RegL()
3598 %{
3599   constraint(ALLOC_IN_RC(long_rcx_reg));
3600   match(RegL);
3601   match(rRegL);
3602 
3603   format %{ %}
3604   interface(REG_INTER);
3605 %}
3606 
3607 operand rdx_RegL()
3608 %{
3609   constraint(ALLOC_IN_RC(long_rdx_reg));
3610   match(RegL);
3611   match(rRegL);
3612 
3613   format %{ %}
3614   interface(REG_INTER);
3615 %}
3616 
3617 // Flags register, used as output of compare instructions
3618 operand rFlagsReg()
3619 %{
3620   constraint(ALLOC_IN_RC(int_flags));
3621   match(RegFlags);
3622 
3623   format %{ "RFLAGS" %}
3624   interface(REG_INTER);
3625 %}
3626 
3627 // Flags register, used as output of FLOATING POINT compare instructions
3628 operand rFlagsRegU()
3629 %{
3630   constraint(ALLOC_IN_RC(int_flags));
3631   match(RegFlags);
3632 
3633   format %{ "RFLAGS_U" %}
3634   interface(REG_INTER);
3635 %}
3636 
3637 operand rFlagsRegUCF() %{
3638   constraint(ALLOC_IN_RC(int_flags));
3639   match(RegFlags);
3640   predicate(false);
3641 
3642   format %{ "RFLAGS_U_CF" %}
3643   interface(REG_INTER);
3644 %}
3645 
3646 // Float register operands
3647 operand regF() %{
3648    constraint(ALLOC_IN_RC(float_reg));
3649    match(RegF);
3650 
3651    format %{ %}
3652    interface(REG_INTER);
3653 %}
3654 
3655 // Float register operands
3656 operand legRegF() %{
3657    constraint(ALLOC_IN_RC(float_reg_legacy));
3658    match(RegF);
3659 
3660    format %{ %}
3661    interface(REG_INTER);
3662 %}
3663 
3664 // Float register operands
3665 operand vlRegF() %{
3666    constraint(ALLOC_IN_RC(float_reg_vl));
3667    match(RegF);
3668 
3669    format %{ %}
3670    interface(REG_INTER);
3671 %}
3672 
3673 // Double register operands
3674 operand regD() %{
3675    constraint(ALLOC_IN_RC(double_reg));
3676    match(RegD);
3677 
3678    format %{ %}
3679    interface(REG_INTER);
3680 %}
3681 
3682 // Double register operands
3683 operand legRegD() %{
3684    constraint(ALLOC_IN_RC(double_reg_legacy));
3685    match(RegD);
3686 
3687    format %{ %}
3688    interface(REG_INTER);
3689 %}
3690 
3691 // Double register operands
3692 operand vlRegD() %{
3693    constraint(ALLOC_IN_RC(double_reg_vl));
3694    match(RegD);
3695 
3696    format %{ %}
3697    interface(REG_INTER);
3698 %}
3699 
3700 // Vectors
3701 operand vecS() %{
3702   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
3703   match(VecS);
3704 
3705   format %{ %}
3706   interface(REG_INTER);
3707 %}
3708 
3709 // Vectors
3710 operand legVecS() %{
3711   constraint(ALLOC_IN_RC(vectors_reg_legacy));
3712   match(VecS);
3713 
3714   format %{ %}
3715   interface(REG_INTER);
3716 %}
3717 
3718 operand vecD() %{
3719   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
3720   match(VecD);
3721 
3722   format %{ %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 operand legVecD() %{
3727   constraint(ALLOC_IN_RC(vectord_reg_legacy));
3728   match(VecD);
3729 
3730   format %{ %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand vecX() %{
3735   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
3736   match(VecX);
3737 
3738   format %{ %}
3739   interface(REG_INTER);
3740 %}
3741 
3742 operand legVecX() %{
3743   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
3744   match(VecX);
3745 
3746   format %{ %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand vecY() %{
3751   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
3752   match(VecY);
3753 
3754   format %{ %}
3755   interface(REG_INTER);
3756 %}
3757 
3758 operand legVecY() %{
3759   constraint(ALLOC_IN_RC(vectory_reg_legacy));
3760   match(VecY);
3761 
3762   format %{ %}
3763   interface(REG_INTER);
3764 %}
3765 
3766 //----------Memory Operands----------------------------------------------------
3767 // Direct Memory Operand
3768 // operand direct(immP addr)
3769 // %{
3770 //   match(addr);
3771 
3772 //   format %{ "[$addr]" %}
3773 //   interface(MEMORY_INTER) %{
3774 //     base(0xFFFFFFFF);
3775 //     index(0x4);
3776 //     scale(0x0);
3777 //     disp($addr);
3778 //   %}
3779 // %}
3780 
3781 // Indirect Memory Operand
3782 operand indirect(any_RegP reg)
3783 %{
3784   constraint(ALLOC_IN_RC(ptr_reg));
3785   match(reg);
3786 
3787   format %{ "[$reg]" %}
3788   interface(MEMORY_INTER) %{
3789     base($reg);
3790     index(0x4);
3791     scale(0x0);
3792     disp(0x0);
3793   %}
3794 %}
3795 
3796 // Indirect Memory Plus Short Offset Operand
3797 operand indOffset8(any_RegP reg, immL8 off)
3798 %{
3799   constraint(ALLOC_IN_RC(ptr_reg));
3800   match(AddP reg off);
3801 
3802   format %{ "[$reg + $off (8-bit)]" %}
3803   interface(MEMORY_INTER) %{
3804     base($reg);
3805     index(0x4);
3806     scale(0x0);
3807     disp($off);
3808   %}
3809 %}
3810 
3811 // Indirect Memory Plus Long Offset Operand
3812 operand indOffset32(any_RegP reg, immL32 off)
3813 %{
3814   constraint(ALLOC_IN_RC(ptr_reg));
3815   match(AddP reg off);
3816 
3817   format %{ "[$reg + $off (32-bit)]" %}
3818   interface(MEMORY_INTER) %{
3819     base($reg);
3820     index(0x4);
3821     scale(0x0);
3822     disp($off);
3823   %}
3824 %}
3825 
3826 // Indirect Memory Plus Index Register Plus Offset Operand
3827 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3828 %{
3829   constraint(ALLOC_IN_RC(ptr_reg));
3830   match(AddP (AddP reg lreg) off);
3831 
3832   op_cost(10);
3833   format %{"[$reg + $off + $lreg]" %}
3834   interface(MEMORY_INTER) %{
3835     base($reg);
3836     index($lreg);
3837     scale(0x0);
3838     disp($off);
3839   %}
3840 %}
3841 
3842 // Indirect Memory Plus Index Register Plus Offset Operand
3843 operand indIndex(any_RegP reg, rRegL lreg)
3844 %{
3845   constraint(ALLOC_IN_RC(ptr_reg));
3846   match(AddP reg lreg);
3847 
3848   op_cost(10);
3849   format %{"[$reg + $lreg]" %}
3850   interface(MEMORY_INTER) %{
3851     base($reg);
3852     index($lreg);
3853     scale(0x0);
3854     disp(0x0);
3855   %}
3856 %}
3857 
3858 // Indirect Memory Times Scale Plus Index Register
3859 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3860 %{
3861   constraint(ALLOC_IN_RC(ptr_reg));
3862   match(AddP reg (LShiftL lreg scale));
3863 
3864   op_cost(10);
3865   format %{"[$reg + $lreg << $scale]" %}
3866   interface(MEMORY_INTER) %{
3867     base($reg);
3868     index($lreg);
3869     scale($scale);
3870     disp(0x0);
3871   %}
3872 %}
3873 
3874 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
3875 %{
3876   constraint(ALLOC_IN_RC(ptr_reg));
3877   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3878   match(AddP reg (LShiftL (ConvI2L idx) scale));
3879 
3880   op_cost(10);
3881   format %{"[$reg + pos $idx << $scale]" %}
3882   interface(MEMORY_INTER) %{
3883     base($reg);
3884     index($idx);
3885     scale($scale);
3886     disp(0x0);
3887   %}
3888 %}
3889 
3890 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3891 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3892 %{
3893   constraint(ALLOC_IN_RC(ptr_reg));
3894   match(AddP (AddP reg (LShiftL lreg scale)) off);
3895 
3896   op_cost(10);
3897   format %{"[$reg + $off + $lreg << $scale]" %}
3898   interface(MEMORY_INTER) %{
3899     base($reg);
3900     index($lreg);
3901     scale($scale);
3902     disp($off);
3903   %}
3904 %}
3905 
3906 // Indirect Memory Plus Positive Index Register Plus Offset Operand
3907 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
3908 %{
3909   constraint(ALLOC_IN_RC(ptr_reg));
3910   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
3911   match(AddP (AddP reg (ConvI2L idx)) off);
3912 
3913   op_cost(10);
3914   format %{"[$reg + $off + $idx]" %}
3915   interface(MEMORY_INTER) %{
3916     base($reg);
3917     index($idx);
3918     scale(0x0);
3919     disp($off);
3920   %}
3921 %}
3922 
3923 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3924 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3925 %{
3926   constraint(ALLOC_IN_RC(ptr_reg));
3927   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3928   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3929 
3930   op_cost(10);
3931   format %{"[$reg + $off + $idx << $scale]" %}
3932   interface(MEMORY_INTER) %{
3933     base($reg);
3934     index($idx);
3935     scale($scale);
3936     disp($off);
3937   %}
3938 %}
3939 
3940 // Indirect Narrow Oop Operand
3941 operand indCompressedOop(rRegN reg) %{
3942   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3943   constraint(ALLOC_IN_RC(ptr_reg));
3944   match(DecodeN reg);
3945 
3946   op_cost(10);
3947   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3948   interface(MEMORY_INTER) %{
3949     base(0xc); // R12
3950     index($reg);
3951     scale(0x3);
3952     disp(0x0);
3953   %}
3954 %}
3955 
3956 // Indirect Narrow Oop Plus Offset Operand
3957 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3958 // we can't free r12 even with CompressedOops::base() == NULL.
3959 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3960   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3961   constraint(ALLOC_IN_RC(ptr_reg));
3962   match(AddP (DecodeN reg) off);
3963 
3964   op_cost(10);
3965   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3966   interface(MEMORY_INTER) %{
3967     base(0xc); // R12
3968     index($reg);
3969     scale(0x3);
3970     disp($off);
3971   %}
3972 %}
3973 
3974 // Indirect Memory Operand
3975 operand indirectNarrow(rRegN reg)
3976 %{
3977   predicate(CompressedOops::shift() == 0);
3978   constraint(ALLOC_IN_RC(ptr_reg));
3979   match(DecodeN reg);
3980 
3981   format %{ "[$reg]" %}
3982   interface(MEMORY_INTER) %{
3983     base($reg);
3984     index(0x4);
3985     scale(0x0);
3986     disp(0x0);
3987   %}
3988 %}
3989 
3990 // Indirect Memory Plus Short Offset Operand
3991 operand indOffset8Narrow(rRegN reg, immL8 off)
3992 %{
3993   predicate(CompressedOops::shift() == 0);
3994   constraint(ALLOC_IN_RC(ptr_reg));
3995   match(AddP (DecodeN reg) off);
3996 
3997   format %{ "[$reg + $off (8-bit)]" %}
3998   interface(MEMORY_INTER) %{
3999     base($reg);
4000     index(0x4);
4001     scale(0x0);
4002     disp($off);
4003   %}
4004 %}
4005 
4006 // Indirect Memory Plus Long Offset Operand
4007 operand indOffset32Narrow(rRegN reg, immL32 off)
4008 %{
4009   predicate(CompressedOops::shift() == 0);
4010   constraint(ALLOC_IN_RC(ptr_reg));
4011   match(AddP (DecodeN reg) off);
4012 
4013   format %{ "[$reg + $off (32-bit)]" %}
4014   interface(MEMORY_INTER) %{
4015     base($reg);
4016     index(0x4);
4017     scale(0x0);
4018     disp($off);
4019   %}
4020 %}
4021 
4022 // Indirect Memory Plus Index Register Plus Offset Operand
4023 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4024 %{
4025   predicate(CompressedOops::shift() == 0);
4026   constraint(ALLOC_IN_RC(ptr_reg));
4027   match(AddP (AddP (DecodeN reg) lreg) off);
4028 
4029   op_cost(10);
4030   format %{"[$reg + $off + $lreg]" %}
4031   interface(MEMORY_INTER) %{
4032     base($reg);
4033     index($lreg);
4034     scale(0x0);
4035     disp($off);
4036   %}
4037 %}
4038 
4039 // Indirect Memory Plus Index Register Plus Offset Operand
4040 operand indIndexNarrow(rRegN reg, rRegL lreg)
4041 %{
4042   predicate(CompressedOops::shift() == 0);
4043   constraint(ALLOC_IN_RC(ptr_reg));
4044   match(AddP (DecodeN reg) lreg);
4045 
4046   op_cost(10);
4047   format %{"[$reg + $lreg]" %}
4048   interface(MEMORY_INTER) %{
4049     base($reg);
4050     index($lreg);
4051     scale(0x0);
4052     disp(0x0);
4053   %}
4054 %}
4055 
4056 // Indirect Memory Times Scale Plus Index Register
4057 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4058 %{
4059   predicate(CompressedOops::shift() == 0);
4060   constraint(ALLOC_IN_RC(ptr_reg));
4061   match(AddP (DecodeN reg) (LShiftL lreg scale));
4062 
4063   op_cost(10);
4064   format %{"[$reg + $lreg << $scale]" %}
4065   interface(MEMORY_INTER) %{
4066     base($reg);
4067     index($lreg);
4068     scale($scale);
4069     disp(0x0);
4070   %}
4071 %}
4072 
4073 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4074 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4075 %{
4076   predicate(CompressedOops::shift() == 0);
4077   constraint(ALLOC_IN_RC(ptr_reg));
4078   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4079 
4080   op_cost(10);
4081   format %{"[$reg + $off + $lreg << $scale]" %}
4082   interface(MEMORY_INTER) %{
4083     base($reg);
4084     index($lreg);
4085     scale($scale);
4086     disp($off);
4087   %}
4088 %}
4089 
4090 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
4091 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
4092 %{
4093   constraint(ALLOC_IN_RC(ptr_reg));
4094   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
4095   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
4096 
4097   op_cost(10);
4098   format %{"[$reg + $off + $idx]" %}
4099   interface(MEMORY_INTER) %{
4100     base($reg);
4101     index($idx);
4102     scale(0x0);
4103     disp($off);
4104   %}
4105 %}
4106 
4107 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4108 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4109 %{
4110   constraint(ALLOC_IN_RC(ptr_reg));
4111   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4112   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4113 
4114   op_cost(10);
4115   format %{"[$reg + $off + $idx << $scale]" %}
4116   interface(MEMORY_INTER) %{
4117     base($reg);
4118     index($idx);
4119     scale($scale);
4120     disp($off);
4121   %}
4122 %}
4123 
4124 //----------Special Memory Operands--------------------------------------------
4125 // Stack Slot Operand - This operand is used for loading and storing temporary
4126 //                      values on the stack where a match requires a value to
4127 //                      flow through memory.
4128 operand stackSlotP(sRegP reg)
4129 %{
4130   constraint(ALLOC_IN_RC(stack_slots));
4131   // No match rule because this operand is only generated in matching
4132 
4133   format %{ "[$reg]" %}
4134   interface(MEMORY_INTER) %{
4135     base(0x4);   // RSP
4136     index(0x4);  // No Index
4137     scale(0x0);  // No Scale
4138     disp($reg);  // Stack Offset
4139   %}
4140 %}
4141 
4142 operand stackSlotI(sRegI reg)
4143 %{
4144   constraint(ALLOC_IN_RC(stack_slots));
4145   // No match rule because this operand is only generated in matching
4146 
4147   format %{ "[$reg]" %}
4148   interface(MEMORY_INTER) %{
4149     base(0x4);   // RSP
4150     index(0x4);  // No Index
4151     scale(0x0);  // No Scale
4152     disp($reg);  // Stack Offset
4153   %}
4154 %}
4155 
4156 operand stackSlotF(sRegF reg)
4157 %{
4158   constraint(ALLOC_IN_RC(stack_slots));
4159   // No match rule because this operand is only generated in matching
4160 
4161   format %{ "[$reg]" %}
4162   interface(MEMORY_INTER) %{
4163     base(0x4);   // RSP
4164     index(0x4);  // No Index
4165     scale(0x0);  // No Scale
4166     disp($reg);  // Stack Offset
4167   %}
4168 %}
4169 
4170 operand stackSlotD(sRegD reg)
4171 %{
4172   constraint(ALLOC_IN_RC(stack_slots));
4173   // No match rule because this operand is only generated in matching
4174 
4175   format %{ "[$reg]" %}
4176   interface(MEMORY_INTER) %{
4177     base(0x4);   // RSP
4178     index(0x4);  // No Index
4179     scale(0x0);  // No Scale
4180     disp($reg);  // Stack Offset
4181   %}
4182 %}
4183 operand stackSlotL(sRegL reg)
4184 %{
4185   constraint(ALLOC_IN_RC(stack_slots));
4186   // No match rule because this operand is only generated in matching
4187 
4188   format %{ "[$reg]" %}
4189   interface(MEMORY_INTER) %{
4190     base(0x4);   // RSP
4191     index(0x4);  // No Index
4192     scale(0x0);  // No Scale
4193     disp($reg);  // Stack Offset
4194   %}
4195 %}
4196 
4197 //----------Conditional Branch Operands----------------------------------------
4198 // Comparison Op  - This is the operation of the comparison, and is limited to
4199 //                  the following set of codes:
4200 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4201 //
4202 // Other attributes of the comparison, such as unsignedness, are specified
4203 // by the comparison instruction that sets a condition code flags register.
4204 // That result is represented by a flags operand whose subtype is appropriate
4205 // to the unsignedness (etc.) of the comparison.
4206 //
4207 // Later, the instruction which matches both the Comparison Op (a Bool) and
4208 // the flags (produced by the Cmp) specifies the coding of the comparison op
4209 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4210 
4211 // Comparision Code
4212 operand cmpOp()
4213 %{
4214   match(Bool);
4215 
4216   format %{ "" %}
4217   interface(COND_INTER) %{
4218     equal(0x4, "e");
4219     not_equal(0x5, "ne");
4220     less(0xC, "l");
4221     greater_equal(0xD, "ge");
4222     less_equal(0xE, "le");
4223     greater(0xF, "g");
4224     overflow(0x0, "o");
4225     no_overflow(0x1, "no");
4226   %}
4227 %}
4228 
4229 // Comparison Code, unsigned compare.  Used by FP also, with
4230 // C2 (unordered) turned into GT or LT already.  The other bits
4231 // C0 and C3 are turned into Carry & Zero flags.
4232 operand cmpOpU()
4233 %{
4234   match(Bool);
4235 
4236   format %{ "" %}
4237   interface(COND_INTER) %{
4238     equal(0x4, "e");
4239     not_equal(0x5, "ne");
4240     less(0x2, "b");
4241     greater_equal(0x3, "nb");
4242     less_equal(0x6, "be");
4243     greater(0x7, "nbe");
4244     overflow(0x0, "o");
4245     no_overflow(0x1, "no");
4246   %}
4247 %}
4248 
4249 
4250 // Floating comparisons that don't require any fixup for the unordered case
4251 operand cmpOpUCF() %{
4252   match(Bool);
4253   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4254             n->as_Bool()->_test._test == BoolTest::ge ||
4255             n->as_Bool()->_test._test == BoolTest::le ||
4256             n->as_Bool()->_test._test == BoolTest::gt);
4257   format %{ "" %}
4258   interface(COND_INTER) %{
4259     equal(0x4, "e");
4260     not_equal(0x5, "ne");
4261     less(0x2, "b");
4262     greater_equal(0x3, "nb");
4263     less_equal(0x6, "be");
4264     greater(0x7, "nbe");
4265     overflow(0x0, "o");
4266     no_overflow(0x1, "no");
4267   %}
4268 %}
4269 
4270 
4271 // Floating comparisons that can be fixed up with extra conditional jumps
4272 operand cmpOpUCF2() %{
4273   match(Bool);
4274   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4275             n->as_Bool()->_test._test == BoolTest::eq);
4276   format %{ "" %}
4277   interface(COND_INTER) %{
4278     equal(0x4, "e");
4279     not_equal(0x5, "ne");
4280     less(0x2, "b");
4281     greater_equal(0x3, "nb");
4282     less_equal(0x6, "be");
4283     greater(0x7, "nbe");
4284     overflow(0x0, "o");
4285     no_overflow(0x1, "no");
4286   %}
4287 %}
4288 
4289 // Operands for bound floating pointer register arguments
4290 operand rxmm0() %{
4291   constraint(ALLOC_IN_RC(xmm0_reg));
4292   match(VecX);
4293   format%{%}
4294   interface(REG_INTER);
4295 %}
4296 operand rxmm1() %{
4297   constraint(ALLOC_IN_RC(xmm1_reg));
4298   match(VecX);
4299   format%{%}
4300   interface(REG_INTER);
4301 %}
4302 operand rxmm2() %{
4303   constraint(ALLOC_IN_RC(xmm2_reg));
4304   match(VecX);
4305   format%{%}
4306   interface(REG_INTER);
4307 %}
4308 operand rxmm3() %{
4309   constraint(ALLOC_IN_RC(xmm3_reg));
4310   match(VecX);
4311   format%{%}
4312   interface(REG_INTER);
4313 %}
4314 operand rxmm4() %{
4315   constraint(ALLOC_IN_RC(xmm4_reg));
4316   match(VecX);
4317   format%{%}
4318   interface(REG_INTER);
4319 %}
4320 operand rxmm5() %{
4321   constraint(ALLOC_IN_RC(xmm5_reg));
4322   match(VecX);
4323   format%{%}
4324   interface(REG_INTER);
4325 %}
4326 operand rxmm6() %{
4327   constraint(ALLOC_IN_RC(xmm6_reg));
4328   match(VecX);
4329   format%{%}
4330   interface(REG_INTER);
4331 %}
4332 operand rxmm7() %{
4333   constraint(ALLOC_IN_RC(xmm7_reg));
4334   match(VecX);
4335   format%{%}
4336   interface(REG_INTER);
4337 %}
4338 operand rxmm8() %{
4339   constraint(ALLOC_IN_RC(xmm8_reg));
4340   match(VecX);
4341   format%{%}
4342   interface(REG_INTER);
4343 %}
4344 operand rxmm9() %{
4345   constraint(ALLOC_IN_RC(xmm9_reg));
4346   match(VecX);
4347   format%{%}
4348   interface(REG_INTER);
4349 %}
4350 operand rxmm10() %{
4351   constraint(ALLOC_IN_RC(xmm10_reg));
4352   match(VecX);
4353   format%{%}
4354   interface(REG_INTER);
4355 %}
4356 operand rxmm11() %{
4357   constraint(ALLOC_IN_RC(xmm11_reg));
4358   match(VecX);
4359   format%{%}
4360   interface(REG_INTER);
4361 %}
4362 operand rxmm12() %{
4363   constraint(ALLOC_IN_RC(xmm12_reg));
4364   match(VecX);
4365   format%{%}
4366   interface(REG_INTER);
4367 %}
4368 operand rxmm13() %{
4369   constraint(ALLOC_IN_RC(xmm13_reg));
4370   match(VecX);
4371   format%{%}
4372   interface(REG_INTER);
4373 %}
4374 operand rxmm14() %{
4375   constraint(ALLOC_IN_RC(xmm14_reg));
4376   match(VecX);
4377   format%{%}
4378   interface(REG_INTER);
4379 %}
4380 operand rxmm15() %{
4381   constraint(ALLOC_IN_RC(xmm15_reg));
4382   match(VecX);
4383   format%{%}
4384   interface(REG_INTER);
4385 %}
4386 operand rxmm16() %{
4387   constraint(ALLOC_IN_RC(xmm16_reg));
4388   match(VecX);
4389   format%{%}
4390   interface(REG_INTER);
4391 %}
4392 operand rxmm17() %{
4393   constraint(ALLOC_IN_RC(xmm17_reg));
4394   match(VecX);
4395   format%{%}
4396   interface(REG_INTER);
4397 %}
4398 operand rxmm18() %{
4399   constraint(ALLOC_IN_RC(xmm18_reg));
4400   match(VecX);
4401   format%{%}
4402   interface(REG_INTER);
4403 %}
4404 operand rxmm19() %{
4405   constraint(ALLOC_IN_RC(xmm19_reg));
4406   match(VecX);
4407   format%{%}
4408   interface(REG_INTER);
4409 %}
4410 operand rxmm20() %{
4411   constraint(ALLOC_IN_RC(xmm20_reg));
4412   match(VecX);
4413   format%{%}
4414   interface(REG_INTER);
4415 %}
4416 operand rxmm21() %{
4417   constraint(ALLOC_IN_RC(xmm21_reg));
4418   match(VecX);
4419   format%{%}
4420   interface(REG_INTER);
4421 %}
4422 operand rxmm22() %{
4423   constraint(ALLOC_IN_RC(xmm22_reg));
4424   match(VecX);
4425   format%{%}
4426   interface(REG_INTER);
4427 %}
4428 operand rxmm23() %{
4429   constraint(ALLOC_IN_RC(xmm23_reg));
4430   match(VecX);
4431   format%{%}
4432   interface(REG_INTER);
4433 %}
4434 operand rxmm24() %{
4435   constraint(ALLOC_IN_RC(xmm24_reg));
4436   match(VecX);
4437   format%{%}
4438   interface(REG_INTER);
4439 %}
4440 operand rxmm25() %{
4441   constraint(ALLOC_IN_RC(xmm25_reg));
4442   match(VecX);
4443   format%{%}
4444   interface(REG_INTER);
4445 %}
4446 operand rxmm26() %{
4447   constraint(ALLOC_IN_RC(xmm26_reg));
4448   match(VecX);
4449   format%{%}
4450   interface(REG_INTER);
4451 %}
4452 operand rxmm27() %{
4453   constraint(ALLOC_IN_RC(xmm27_reg));
4454   match(VecX);
4455   format%{%}
4456   interface(REG_INTER);
4457 %}
4458 operand rxmm28() %{
4459   constraint(ALLOC_IN_RC(xmm28_reg));
4460   match(VecX);
4461   format%{%}
4462   interface(REG_INTER);
4463 %}
4464 operand rxmm29() %{
4465   constraint(ALLOC_IN_RC(xmm29_reg));
4466   match(VecX);
4467   format%{%}
4468   interface(REG_INTER);
4469 %}
4470 operand rxmm30() %{
4471   constraint(ALLOC_IN_RC(xmm30_reg));
4472   match(VecX);
4473   format%{%}
4474   interface(REG_INTER);
4475 %}
4476 operand rxmm31() %{
4477   constraint(ALLOC_IN_RC(xmm31_reg));
4478   match(VecX);
4479   format%{%}
4480   interface(REG_INTER);
4481 %}
4482 
4483 //----------OPERAND CLASSES----------------------------------------------------
4484 // Operand Classes are groups of operands that are used as to simplify
4485 // instruction definitions by not requiring the AD writer to specify separate
4486 // instructions for every form of operand when the instruction accepts
4487 // multiple operand types with the same basic encoding and format.  The classic
4488 // case of this is memory operands.
4489 
4490 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4491                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4492                indCompressedOop, indCompressedOopOffset,
4493                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4494                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4495                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4496 
4497 //----------PIPELINE-----------------------------------------------------------
4498 // Rules which define the behavior of the target architectures pipeline.
4499 pipeline %{
4500 
4501 //----------ATTRIBUTES---------------------------------------------------------
4502 attributes %{
4503   variable_size_instructions;        // Fixed size instructions
4504   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4505   instruction_unit_size = 1;         // An instruction is 1 bytes long
4506   instruction_fetch_unit_size = 16;  // The processor fetches one line
4507   instruction_fetch_units = 1;       // of 16 bytes
4508 
4509   // List of nop instructions
4510   nops( MachNop );
4511 %}
4512 
4513 //----------RESOURCES----------------------------------------------------------
4514 // Resources are the functional units available to the machine
4515 
4516 // Generic P2/P3 pipeline
4517 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4518 // 3 instructions decoded per cycle.
4519 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4520 // 3 ALU op, only ALU0 handles mul instructions.
4521 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4522            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4523            BR, FPU,
4524            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4525 
4526 //----------PIPELINE DESCRIPTION-----------------------------------------------
4527 // Pipeline Description specifies the stages in the machine's pipeline
4528 
4529 // Generic P2/P3 pipeline
4530 pipe_desc(S0, S1, S2, S3, S4, S5);
4531 
4532 //----------PIPELINE CLASSES---------------------------------------------------
4533 // Pipeline Classes describe the stages in which input and output are
4534 // referenced by the hardware pipeline.
4535 
4536 // Naming convention: ialu or fpu
4537 // Then: _reg
4538 // Then: _reg if there is a 2nd register
4539 // Then: _long if it's a pair of instructions implementing a long
4540 // Then: _fat if it requires the big decoder
4541 //   Or: _mem if it requires the big decoder and a memory unit.
4542 
4543 // Integer ALU reg operation
4544 pipe_class ialu_reg(rRegI dst)
4545 %{
4546     single_instruction;
4547     dst    : S4(write);
4548     dst    : S3(read);
4549     DECODE : S0;        // any decoder
4550     ALU    : S3;        // any alu
4551 %}
4552 
4553 // Long ALU reg operation
4554 pipe_class ialu_reg_long(rRegL dst)
4555 %{
4556     instruction_count(2);
4557     dst    : S4(write);
4558     dst    : S3(read);
4559     DECODE : S0(2);     // any 2 decoders
4560     ALU    : S3(2);     // both alus
4561 %}
4562 
4563 // Integer ALU reg operation using big decoder
4564 pipe_class ialu_reg_fat(rRegI dst)
4565 %{
4566     single_instruction;
4567     dst    : S4(write);
4568     dst    : S3(read);
4569     D0     : S0;        // big decoder only
4570     ALU    : S3;        // any alu
4571 %}
4572 
4573 // Long ALU reg operation using big decoder
4574 pipe_class ialu_reg_long_fat(rRegL dst)
4575 %{
4576     instruction_count(2);
4577     dst    : S4(write);
4578     dst    : S3(read);
4579     D0     : S0(2);     // big decoder only; twice
4580     ALU    : S3(2);     // any 2 alus
4581 %}
4582 
4583 // Integer ALU reg-reg operation
4584 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4585 %{
4586     single_instruction;
4587     dst    : S4(write);
4588     src    : S3(read);
4589     DECODE : S0;        // any decoder
4590     ALU    : S3;        // any alu
4591 %}
4592 
4593 // Long ALU reg-reg operation
4594 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4595 %{
4596     instruction_count(2);
4597     dst    : S4(write);
4598     src    : S3(read);
4599     DECODE : S0(2);     // any 2 decoders
4600     ALU    : S3(2);     // both alus
4601 %}
4602 
4603 // Integer ALU reg-reg operation
4604 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4605 %{
4606     single_instruction;
4607     dst    : S4(write);
4608     src    : S3(read);
4609     D0     : S0;        // big decoder only
4610     ALU    : S3;        // any alu
4611 %}
4612 
4613 // Long ALU reg-reg operation
4614 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4615 %{
4616     instruction_count(2);
4617     dst    : S4(write);
4618     src    : S3(read);
4619     D0     : S0(2);     // big decoder only; twice
4620     ALU    : S3(2);     // both alus
4621 %}
4622 
4623 // Integer ALU reg-mem operation
4624 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4625 %{
4626     single_instruction;
4627     dst    : S5(write);
4628     mem    : S3(read);
4629     D0     : S0;        // big decoder only
4630     ALU    : S4;        // any alu
4631     MEM    : S3;        // any mem
4632 %}
4633 
4634 // Integer mem operation (prefetch)
4635 pipe_class ialu_mem(memory mem)
4636 %{
4637     single_instruction;
4638     mem    : S3(read);
4639     D0     : S0;        // big decoder only
4640     MEM    : S3;        // any mem
4641 %}
4642 
4643 // Integer Store to Memory
4644 pipe_class ialu_mem_reg(memory mem, rRegI src)
4645 %{
4646     single_instruction;
4647     mem    : S3(read);
4648     src    : S5(read);
4649     D0     : S0;        // big decoder only
4650     ALU    : S4;        // any alu
4651     MEM    : S3;
4652 %}
4653 
4654 // // Long Store to Memory
4655 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4656 // %{
4657 //     instruction_count(2);
4658 //     mem    : S3(read);
4659 //     src    : S5(read);
4660 //     D0     : S0(2);          // big decoder only; twice
4661 //     ALU    : S4(2);     // any 2 alus
4662 //     MEM    : S3(2);  // Both mems
4663 // %}
4664 
4665 // Integer Store to Memory
4666 pipe_class ialu_mem_imm(memory mem)
4667 %{
4668     single_instruction;
4669     mem    : S3(read);
4670     D0     : S0;        // big decoder only
4671     ALU    : S4;        // any alu
4672     MEM    : S3;
4673 %}
4674 
4675 // Integer ALU0 reg-reg operation
4676 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4677 %{
4678     single_instruction;
4679     dst    : S4(write);
4680     src    : S3(read);
4681     D0     : S0;        // Big decoder only
4682     ALU0   : S3;        // only alu0
4683 %}
4684 
4685 // Integer ALU0 reg-mem operation
4686 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4687 %{
4688     single_instruction;
4689     dst    : S5(write);
4690     mem    : S3(read);
4691     D0     : S0;        // big decoder only
4692     ALU0   : S4;        // ALU0 only
4693     MEM    : S3;        // any mem
4694 %}
4695 
4696 // Integer ALU reg-reg operation
4697 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4698 %{
4699     single_instruction;
4700     cr     : S4(write);
4701     src1   : S3(read);
4702     src2   : S3(read);
4703     DECODE : S0;        // any decoder
4704     ALU    : S3;        // any alu
4705 %}
4706 
4707 // Integer ALU reg-imm operation
4708 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4709 %{
4710     single_instruction;
4711     cr     : S4(write);
4712     src1   : S3(read);
4713     DECODE : S0;        // any decoder
4714     ALU    : S3;        // any alu
4715 %}
4716 
4717 // Integer ALU reg-mem operation
4718 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4719 %{
4720     single_instruction;
4721     cr     : S4(write);
4722     src1   : S3(read);
4723     src2   : S3(read);
4724     D0     : S0;        // big decoder only
4725     ALU    : S4;        // any alu
4726     MEM    : S3;
4727 %}
4728 
4729 // Conditional move reg-reg
4730 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4731 %{
4732     instruction_count(4);
4733     y      : S4(read);
4734     q      : S3(read);
4735     p      : S3(read);
4736     DECODE : S0(4);     // any decoder
4737 %}
4738 
4739 // Conditional move reg-reg
4740 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4741 %{
4742     single_instruction;
4743     dst    : S4(write);
4744     src    : S3(read);
4745     cr     : S3(read);
4746     DECODE : S0;        // any decoder
4747 %}
4748 
4749 // Conditional move reg-mem
4750 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4751 %{
4752     single_instruction;
4753     dst    : S4(write);
4754     src    : S3(read);
4755     cr     : S3(read);
4756     DECODE : S0;        // any decoder
4757     MEM    : S3;
4758 %}
4759 
4760 // Conditional move reg-reg long
4761 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4762 %{
4763     single_instruction;
4764     dst    : S4(write);
4765     src    : S3(read);
4766     cr     : S3(read);
4767     DECODE : S0(2);     // any 2 decoders
4768 %}
4769 
4770 // XXX
4771 // // Conditional move double reg-reg
4772 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4773 // %{
4774 //     single_instruction;
4775 //     dst    : S4(write);
4776 //     src    : S3(read);
4777 //     cr     : S3(read);
4778 //     DECODE : S0;     // any decoder
4779 // %}
4780 
4781 // Float reg-reg operation
4782 pipe_class fpu_reg(regD dst)
4783 %{
4784     instruction_count(2);
4785     dst    : S3(read);
4786     DECODE : S0(2);     // any 2 decoders
4787     FPU    : S3;
4788 %}
4789 
4790 // Float reg-reg operation
4791 pipe_class fpu_reg_reg(regD dst, regD src)
4792 %{
4793     instruction_count(2);
4794     dst    : S4(write);
4795     src    : S3(read);
4796     DECODE : S0(2);     // any 2 decoders
4797     FPU    : S3;
4798 %}
4799 
4800 // Float reg-reg operation
4801 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4802 %{
4803     instruction_count(3);
4804     dst    : S4(write);
4805     src1   : S3(read);
4806     src2   : S3(read);
4807     DECODE : S0(3);     // any 3 decoders
4808     FPU    : S3(2);
4809 %}
4810 
4811 // Float reg-reg operation
4812 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4813 %{
4814     instruction_count(4);
4815     dst    : S4(write);
4816     src1   : S3(read);
4817     src2   : S3(read);
4818     src3   : S3(read);
4819     DECODE : S0(4);     // any 3 decoders
4820     FPU    : S3(2);
4821 %}
4822 
4823 // Float reg-reg operation
4824 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4825 %{
4826     instruction_count(4);
4827     dst    : S4(write);
4828     src1   : S3(read);
4829     src2   : S3(read);
4830     src3   : S3(read);
4831     DECODE : S1(3);     // any 3 decoders
4832     D0     : S0;        // Big decoder only
4833     FPU    : S3(2);
4834     MEM    : S3;
4835 %}
4836 
4837 // Float reg-mem operation
4838 pipe_class fpu_reg_mem(regD dst, memory mem)
4839 %{
4840     instruction_count(2);
4841     dst    : S5(write);
4842     mem    : S3(read);
4843     D0     : S0;        // big decoder only
4844     DECODE : S1;        // any decoder for FPU POP
4845     FPU    : S4;
4846     MEM    : S3;        // any mem
4847 %}
4848 
4849 // Float reg-mem operation
4850 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4851 %{
4852     instruction_count(3);
4853     dst    : S5(write);
4854     src1   : S3(read);
4855     mem    : S3(read);
4856     D0     : S0;        // big decoder only
4857     DECODE : S1(2);     // any decoder for FPU POP
4858     FPU    : S4;
4859     MEM    : S3;        // any mem
4860 %}
4861 
4862 // Float mem-reg operation
4863 pipe_class fpu_mem_reg(memory mem, regD src)
4864 %{
4865     instruction_count(2);
4866     src    : S5(read);
4867     mem    : S3(read);
4868     DECODE : S0;        // any decoder for FPU PUSH
4869     D0     : S1;        // big decoder only
4870     FPU    : S4;
4871     MEM    : S3;        // any mem
4872 %}
4873 
4874 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4875 %{
4876     instruction_count(3);
4877     src1   : S3(read);
4878     src2   : S3(read);
4879     mem    : S3(read);
4880     DECODE : S0(2);     // any decoder for FPU PUSH
4881     D0     : S1;        // big decoder only
4882     FPU    : S4;
4883     MEM    : S3;        // any mem
4884 %}
4885 
4886 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4887 %{
4888     instruction_count(3);
4889     src1   : S3(read);
4890     src2   : S3(read);
4891     mem    : S4(read);
4892     DECODE : S0;        // any decoder for FPU PUSH
4893     D0     : S0(2);     // big decoder only
4894     FPU    : S4;
4895     MEM    : S3(2);     // any mem
4896 %}
4897 
4898 pipe_class fpu_mem_mem(memory dst, memory src1)
4899 %{
4900     instruction_count(2);
4901     src1   : S3(read);
4902     dst    : S4(read);
4903     D0     : S0(2);     // big decoder only
4904     MEM    : S3(2);     // any mem
4905 %}
4906 
4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4908 %{
4909     instruction_count(3);
4910     src1   : S3(read);
4911     src2   : S3(read);
4912     dst    : S4(read);
4913     D0     : S0(3);     // big decoder only
4914     FPU    : S4;
4915     MEM    : S3(3);     // any mem
4916 %}
4917 
4918 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4919 %{
4920     instruction_count(3);
4921     src1   : S4(read);
4922     mem    : S4(read);
4923     DECODE : S0;        // any decoder for FPU PUSH
4924     D0     : S0(2);     // big decoder only
4925     FPU    : S4;
4926     MEM    : S3(2);     // any mem
4927 %}
4928 
4929 // Float load constant
4930 pipe_class fpu_reg_con(regD dst)
4931 %{
4932     instruction_count(2);
4933     dst    : S5(write);
4934     D0     : S0;        // big decoder only for the load
4935     DECODE : S1;        // any decoder for FPU POP
4936     FPU    : S4;
4937     MEM    : S3;        // any mem
4938 %}
4939 
4940 // Float load constant
4941 pipe_class fpu_reg_reg_con(regD dst, regD src)
4942 %{
4943     instruction_count(3);
4944     dst    : S5(write);
4945     src    : S3(read);
4946     D0     : S0;        // big decoder only for the load
4947     DECODE : S1(2);     // any decoder for FPU POP
4948     FPU    : S4;
4949     MEM    : S3;        // any mem
4950 %}
4951 
4952 // UnConditional branch
4953 pipe_class pipe_jmp(label labl)
4954 %{
4955     single_instruction;
4956     BR   : S3;
4957 %}
4958 
4959 // Conditional branch
4960 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4961 %{
4962     single_instruction;
4963     cr    : S1(read);
4964     BR    : S3;
4965 %}
4966 
4967 // Allocation idiom
4968 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4969 %{
4970     instruction_count(1); force_serialization;
4971     fixed_latency(6);
4972     heap_ptr : S3(read);
4973     DECODE   : S0(3);
4974     D0       : S2;
4975     MEM      : S3;
4976     ALU      : S3(2);
4977     dst      : S5(write);
4978     BR       : S5;
4979 %}
4980 
4981 // Generic big/slow expanded idiom
4982 pipe_class pipe_slow()
4983 %{
4984     instruction_count(10); multiple_bundles; force_serialization;
4985     fixed_latency(100);
4986     D0  : S0(2);
4987     MEM : S3(2);
4988 %}
4989 
4990 // The real do-nothing guy
4991 pipe_class empty()
4992 %{
4993     instruction_count(0);
4994 %}
4995 
4996 // Define the class for the Nop node
4997 define
4998 %{
4999    MachNop = empty;
5000 %}
5001 
5002 %}
5003 
5004 //----------INSTRUCTIONS-------------------------------------------------------
5005 //
5006 // match      -- States which machine-independent subtree may be replaced
5007 //               by this instruction.
5008 // ins_cost   -- The estimated cost of this instruction is used by instruction
5009 //               selection to identify a minimum cost tree of machine
5010 //               instructions that matches a tree of machine-independent
5011 //               instructions.
5012 // format     -- A string providing the disassembly for this instruction.
5013 //               The value of an instruction's operand may be inserted
5014 //               by referring to it with a '$' prefix.
5015 // opcode     -- Three instruction opcodes may be provided.  These are referred
5016 //               to within an encode class as $primary, $secondary, and $tertiary
5017 //               rrspectively.  The primary opcode is commonly used to
5018 //               indicate the type of machine instruction, while secondary
5019 //               and tertiary are often used for prefix options or addressing
5020 //               modes.
5021 // ins_encode -- A list of encode classes with parameters. The encode class
5022 //               name must have been defined in an 'enc_class' specification
5023 //               in the encode section of the architecture description.
5024 
5025 
5026 //----------Load/Store/Move Instructions---------------------------------------
5027 //----------Load Instructions--------------------------------------------------
5028 
5029 // Load Byte (8 bit signed)
5030 instruct loadB(rRegI dst, memory mem)
5031 %{
5032   match(Set dst (LoadB mem));
5033 
5034   ins_cost(125);
5035   format %{ "movsbl  $dst, $mem\t# byte" %}
5036 
5037   ins_encode %{
5038     __ movsbl($dst$$Register, $mem$$Address);
5039   %}
5040 
5041   ins_pipe(ialu_reg_mem);
5042 %}
5043 
5044 // Load Byte (8 bit signed) into Long Register
5045 instruct loadB2L(rRegL dst, memory mem)
5046 %{
5047   match(Set dst (ConvI2L (LoadB mem)));
5048 
5049   ins_cost(125);
5050   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5051 
5052   ins_encode %{
5053     __ movsbq($dst$$Register, $mem$$Address);
5054   %}
5055 
5056   ins_pipe(ialu_reg_mem);
5057 %}
5058 
5059 // Load Unsigned Byte (8 bit UNsigned)
5060 instruct loadUB(rRegI dst, memory mem)
5061 %{
5062   match(Set dst (LoadUB mem));
5063 
5064   ins_cost(125);
5065   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5066 
5067   ins_encode %{
5068     __ movzbl($dst$$Register, $mem$$Address);
5069   %}
5070 
5071   ins_pipe(ialu_reg_mem);
5072 %}
5073 
5074 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5075 instruct loadUB2L(rRegL dst, memory mem)
5076 %{
5077   match(Set dst (ConvI2L (LoadUB mem)));
5078 
5079   ins_cost(125);
5080   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5081 
5082   ins_encode %{
5083     __ movzbq($dst$$Register, $mem$$Address);
5084   %}
5085 
5086   ins_pipe(ialu_reg_mem);
5087 %}
5088 
5089 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
5090 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5091   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5092   effect(KILL cr);
5093 
5094   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
5095             "andl    $dst, right_n_bits($mask, 8)" %}
5096   ins_encode %{
5097     Register Rdst = $dst$$Register;
5098     __ movzbq(Rdst, $mem$$Address);
5099     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5100   %}
5101   ins_pipe(ialu_reg_mem);
5102 %}
5103 
5104 // Load Short (16 bit signed)
5105 instruct loadS(rRegI dst, memory mem)
5106 %{
5107   match(Set dst (LoadS mem));
5108 
5109   ins_cost(125);
5110   format %{ "movswl $dst, $mem\t# short" %}
5111 
5112   ins_encode %{
5113     __ movswl($dst$$Register, $mem$$Address);
5114   %}
5115 
5116   ins_pipe(ialu_reg_mem);
5117 %}
5118 
5119 // Load Short (16 bit signed) to Byte (8 bit signed)
5120 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5121   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5122 
5123   ins_cost(125);
5124   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5125   ins_encode %{
5126     __ movsbl($dst$$Register, $mem$$Address);
5127   %}
5128   ins_pipe(ialu_reg_mem);
5129 %}
5130 
5131 // Load Short (16 bit signed) into Long Register
5132 instruct loadS2L(rRegL dst, memory mem)
5133 %{
5134   match(Set dst (ConvI2L (LoadS mem)));
5135 
5136   ins_cost(125);
5137   format %{ "movswq $dst, $mem\t# short -> long" %}
5138 
5139   ins_encode %{
5140     __ movswq($dst$$Register, $mem$$Address);
5141   %}
5142 
5143   ins_pipe(ialu_reg_mem);
5144 %}
5145 
5146 // Load Unsigned Short/Char (16 bit UNsigned)
5147 instruct loadUS(rRegI dst, memory mem)
5148 %{
5149   match(Set dst (LoadUS mem));
5150 
5151   ins_cost(125);
5152   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5153 
5154   ins_encode %{
5155     __ movzwl($dst$$Register, $mem$$Address);
5156   %}
5157 
5158   ins_pipe(ialu_reg_mem);
5159 %}
5160 
5161 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5162 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5163   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5164 
5165   ins_cost(125);
5166   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5167   ins_encode %{
5168     __ movsbl($dst$$Register, $mem$$Address);
5169   %}
5170   ins_pipe(ialu_reg_mem);
5171 %}
5172 
5173 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5174 instruct loadUS2L(rRegL dst, memory mem)
5175 %{
5176   match(Set dst (ConvI2L (LoadUS mem)));
5177 
5178   ins_cost(125);
5179   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5180 
5181   ins_encode %{
5182     __ movzwq($dst$$Register, $mem$$Address);
5183   %}
5184 
5185   ins_pipe(ialu_reg_mem);
5186 %}
5187 
5188 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5189 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5190   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5191 
5192   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5193   ins_encode %{
5194     __ movzbq($dst$$Register, $mem$$Address);
5195   %}
5196   ins_pipe(ialu_reg_mem);
5197 %}
5198 
5199 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
5200 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5201   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5202   effect(KILL cr);
5203 
5204   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5205             "andl    $dst, right_n_bits($mask, 16)" %}
5206   ins_encode %{
5207     Register Rdst = $dst$$Register;
5208     __ movzwq(Rdst, $mem$$Address);
5209     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5210   %}
5211   ins_pipe(ialu_reg_mem);
5212 %}
5213 
5214 // Load Integer
5215 instruct loadI(rRegI dst, memory mem)
5216 %{
5217   match(Set dst (LoadI mem));
5218 
5219   ins_cost(125);
5220   format %{ "movl    $dst, $mem\t# int" %}
5221 
5222   ins_encode %{
5223     __ movl($dst$$Register, $mem$$Address);
5224   %}
5225 
5226   ins_pipe(ialu_reg_mem);
5227 %}
5228 
5229 // Load Integer (32 bit signed) to Byte (8 bit signed)
5230 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5231   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5232 
5233   ins_cost(125);
5234   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5235   ins_encode %{
5236     __ movsbl($dst$$Register, $mem$$Address);
5237   %}
5238   ins_pipe(ialu_reg_mem);
5239 %}
5240 
5241 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5242 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5243   match(Set dst (AndI (LoadI mem) mask));
5244 
5245   ins_cost(125);
5246   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5247   ins_encode %{
5248     __ movzbl($dst$$Register, $mem$$Address);
5249   %}
5250   ins_pipe(ialu_reg_mem);
5251 %}
5252 
5253 // Load Integer (32 bit signed) to Short (16 bit signed)
5254 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5255   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5256 
5257   ins_cost(125);
5258   format %{ "movswl  $dst, $mem\t# int -> short" %}
5259   ins_encode %{
5260     __ movswl($dst$$Register, $mem$$Address);
5261   %}
5262   ins_pipe(ialu_reg_mem);
5263 %}
5264 
5265 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5266 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5267   match(Set dst (AndI (LoadI mem) mask));
5268 
5269   ins_cost(125);
5270   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5271   ins_encode %{
5272     __ movzwl($dst$$Register, $mem$$Address);
5273   %}
5274   ins_pipe(ialu_reg_mem);
5275 %}
5276 
5277 // Load Integer into Long Register
5278 instruct loadI2L(rRegL dst, memory mem)
5279 %{
5280   match(Set dst (ConvI2L (LoadI mem)));
5281 
5282   ins_cost(125);
5283   format %{ "movslq  $dst, $mem\t# int -> long" %}
5284 
5285   ins_encode %{
5286     __ movslq($dst$$Register, $mem$$Address);
5287   %}
5288 
5289   ins_pipe(ialu_reg_mem);
5290 %}
5291 
5292 // Load Integer with mask 0xFF into Long Register
5293 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5294   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5295 
5296   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5297   ins_encode %{
5298     __ movzbq($dst$$Register, $mem$$Address);
5299   %}
5300   ins_pipe(ialu_reg_mem);
5301 %}
5302 
5303 // Load Integer with mask 0xFFFF into Long Register
5304 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5305   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5306 
5307   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5308   ins_encode %{
5309     __ movzwq($dst$$Register, $mem$$Address);
5310   %}
5311   ins_pipe(ialu_reg_mem);
5312 %}
5313 
5314 // Load Integer with a 31-bit mask into Long Register
5315 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5316   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5317   effect(KILL cr);
5318 
5319   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5320             "andl    $dst, $mask" %}
5321   ins_encode %{
5322     Register Rdst = $dst$$Register;
5323     __ movl(Rdst, $mem$$Address);
5324     __ andl(Rdst, $mask$$constant);
5325   %}
5326   ins_pipe(ialu_reg_mem);
5327 %}
5328 
5329 // Load Unsigned Integer into Long Register
5330 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
5331 %{
5332   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5333 
5334   ins_cost(125);
5335   format %{ "movl    $dst, $mem\t# uint -> long" %}
5336 
5337   ins_encode %{
5338     __ movl($dst$$Register, $mem$$Address);
5339   %}
5340 
5341   ins_pipe(ialu_reg_mem);
5342 %}
5343 
5344 // Load Long
5345 instruct loadL(rRegL dst, memory mem)
5346 %{
5347   match(Set dst (LoadL mem));
5348 
5349   ins_cost(125);
5350   format %{ "movq    $dst, $mem\t# long" %}
5351 
5352   ins_encode %{
5353     __ movq($dst$$Register, $mem$$Address);
5354   %}
5355 
5356   ins_pipe(ialu_reg_mem); // XXX
5357 %}
5358 
5359 // Load Range
5360 instruct loadRange(rRegI dst, memory mem)
5361 %{
5362   match(Set dst (LoadRange mem));
5363 
5364   ins_cost(125); // XXX
5365   format %{ "movl    $dst, $mem\t# range" %}
5366   opcode(0x8B);
5367   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5368   ins_pipe(ialu_reg_mem);
5369 %}
5370 
5371 // Load Pointer
5372 instruct loadP(rRegP dst, memory mem)
5373 %{
5374   match(Set dst (LoadP mem));
5375 
5376   ins_cost(125); // XXX
5377   format %{ "movq    $dst, $mem\t# ptr" %}
5378   opcode(0x8B);
5379   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5380   ins_pipe(ialu_reg_mem); // XXX
5381 %}
5382 
5383 // Load Compressed Pointer
5384 instruct loadN(rRegN dst, memory mem)
5385 %{
5386    match(Set dst (LoadN mem));
5387 
5388    ins_cost(125); // XXX
5389    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5390    ins_encode %{
5391      __ movl($dst$$Register, $mem$$Address);
5392    %}
5393    ins_pipe(ialu_reg_mem); // XXX
5394 %}
5395 
5396 
5397 // Load Klass Pointer
5398 instruct loadKlass(rRegP dst, memory mem)
5399 %{
5400   match(Set dst (LoadKlass mem));
5401 
5402   ins_cost(125); // XXX
5403   format %{ "movq    $dst, $mem\t# class" %}
5404   opcode(0x8B);
5405   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5406   ins_pipe(ialu_reg_mem); // XXX
5407 %}
5408 
5409 // Load narrow Klass Pointer
5410 instruct loadNKlass(rRegN dst, memory mem)
5411 %{
5412   match(Set dst (LoadNKlass mem));
5413 
5414   ins_cost(125); // XXX
5415   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5416   ins_encode %{
5417     __ movl($dst$$Register, $mem$$Address);
5418   %}
5419   ins_pipe(ialu_reg_mem); // XXX
5420 %}
5421 
5422 // Load Float
5423 instruct loadF(regF dst, memory mem)
5424 %{
5425   match(Set dst (LoadF mem));
5426 
5427   ins_cost(145); // XXX
5428   format %{ "movss   $dst, $mem\t# float" %}
5429   ins_encode %{
5430     __ movflt($dst$$XMMRegister, $mem$$Address);
5431   %}
5432   ins_pipe(pipe_slow); // XXX
5433 %}
5434 
5435 // Load Float
5436 instruct MoveF2VL(vlRegF dst, regF src) %{
5437   match(Set dst src);
5438   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5439   ins_encode %{
5440     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5441   %}
5442   ins_pipe( fpu_reg_reg );
5443 %}
5444 
5445 // Load Float
5446 instruct MoveF2LEG(legRegF dst, regF src) %{
5447   match(Set dst src);
5448   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5449   ins_encode %{
5450     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5451   %}
5452   ins_pipe( fpu_reg_reg );
5453 %}
5454 
5455 // Load Float
5456 instruct MoveVL2F(regF dst, vlRegF src) %{
5457   match(Set dst src);
5458   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5459   ins_encode %{
5460     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5461   %}
5462   ins_pipe( fpu_reg_reg );
5463 %}
5464 
5465 // Load Float
5466 instruct MoveLEG2F(regF dst, legRegF src) %{
5467   match(Set dst src);
5468   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5469   ins_encode %{
5470     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5471   %}
5472   ins_pipe( fpu_reg_reg );
5473 %}
5474 
5475 // Load Double
5476 instruct loadD_partial(regD dst, memory mem)
5477 %{
5478   predicate(!UseXmmLoadAndClearUpper);
5479   match(Set dst (LoadD mem));
5480 
5481   ins_cost(145); // XXX
5482   format %{ "movlpd  $dst, $mem\t# double" %}
5483   ins_encode %{
5484     __ movdbl($dst$$XMMRegister, $mem$$Address);
5485   %}
5486   ins_pipe(pipe_slow); // XXX
5487 %}
5488 
5489 instruct loadD(regD dst, memory mem)
5490 %{
5491   predicate(UseXmmLoadAndClearUpper);
5492   match(Set dst (LoadD mem));
5493 
5494   ins_cost(145); // XXX
5495   format %{ "movsd   $dst, $mem\t# double" %}
5496   ins_encode %{
5497     __ movdbl($dst$$XMMRegister, $mem$$Address);
5498   %}
5499   ins_pipe(pipe_slow); // XXX
5500 %}
5501 
5502 // Load Double
5503 instruct MoveD2VL(vlRegD dst, regD src) %{
5504   match(Set dst src);
5505   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5506   ins_encode %{
5507     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5508   %}
5509   ins_pipe( fpu_reg_reg );
5510 %}
5511 
5512 // Load Double
5513 instruct MoveD2LEG(legRegD dst, regD src) %{
5514   match(Set dst src);
5515   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5516   ins_encode %{
5517     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5518   %}
5519   ins_pipe( fpu_reg_reg );
5520 %}
5521 
5522 // Load Double
5523 instruct MoveVL2D(regD dst, vlRegD src) %{
5524   match(Set dst src);
5525   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5526   ins_encode %{
5527     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5528   %}
5529   ins_pipe( fpu_reg_reg );
5530 %}
5531 
5532 // Load Double
5533 instruct MoveLEG2D(regD dst, legRegD src) %{
5534   match(Set dst src);
5535   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5536   ins_encode %{
5537     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5538   %}
5539   ins_pipe( fpu_reg_reg );
5540 %}
5541 
5542 // Following pseudo code describes the algorithm for max[FD]:
5543 // Min algorithm is on similar lines
5544 //  btmp = (b < +0.0) ? a : b
5545 //  atmp = (b < +0.0) ? b : a
5546 //  Tmp  = Max_Float(atmp , btmp)
5547 //  Res  = (atmp == NaN) ? atmp : Tmp
5548 
5549 // max = java.lang.Math.max(float a, float b)
5550 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
5551   predicate(UseAVX > 0 && !n->is_reduction());
5552   match(Set dst (MaxF a b));
5553   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
5554   format %{
5555      "blendvps         $btmp,$b,$a,$b           \n\t"
5556      "blendvps         $atmp,$a,$b,$b           \n\t"
5557      "vmaxss           $tmp,$atmp,$btmp         \n\t"
5558      "cmpps.unordered  $btmp,$atmp,$atmp        \n\t"
5559      "blendvps         $dst,$tmp,$atmp,$btmp    \n\t"
5560   %}
5561   ins_encode %{
5562     int vector_len = Assembler::AVX_128bit;
5563     __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
5564     __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
5565     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
5566     __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
5567     __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
5568  %}
5569   ins_pipe( pipe_slow );
5570 %}
5571 
5572 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
5573   predicate(UseAVX > 0 && n->is_reduction());
5574   match(Set dst (MaxF a b));
5575   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
5576 
5577   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
5578   ins_encode %{
5579     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
5580                     false /*min*/, true /*single*/);
5581   %}
5582   ins_pipe( pipe_slow );
5583 %}
5584 
5585 // max = java.lang.Math.max(double a, double b)
5586 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
5587   predicate(UseAVX > 0 && !n->is_reduction());
5588   match(Set dst (MaxD a b));
5589   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
5590   format %{
5591      "blendvpd         $btmp,$b,$a,$b            \n\t"
5592      "blendvpd         $atmp,$a,$b,$b            \n\t"
5593      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
5594      "cmppd.unordered  $btmp,$atmp,$atmp         \n\t"
5595      "blendvpd         $dst,$tmp,$atmp,$btmp     \n\t"
5596   %}
5597   ins_encode %{
5598     int vector_len = Assembler::AVX_128bit;
5599     __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
5600     __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
5601     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
5602     __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
5603     __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
5604   %}
5605   ins_pipe( pipe_slow );
5606 %}
5607 
5608 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
5609   predicate(UseAVX > 0 && n->is_reduction());
5610   match(Set dst (MaxD a b));
5611   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
5612 
5613   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
5614   ins_encode %{
5615     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
5616                     false /*min*/, false /*single*/);
5617   %}
5618   ins_pipe( pipe_slow );
5619 %}
5620 
5621 // min = java.lang.Math.min(float a, float b)
5622 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
5623   predicate(UseAVX > 0 && !n->is_reduction());
5624   match(Set dst (MinF a b));
5625   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
5626   format %{
5627      "blendvps         $atmp,$a,$b,$a             \n\t"
5628      "blendvps         $btmp,$b,$a,$a             \n\t"
5629      "vminss           $tmp,$atmp,$btmp           \n\t"
5630      "cmpps.unordered  $btmp,$atmp,$atmp          \n\t"
5631      "blendvps         $dst,$tmp,$atmp,$btmp      \n\t"
5632   %}
5633   ins_encode %{
5634     int vector_len = Assembler::AVX_128bit;
5635     __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
5636     __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
5637     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
5638     __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
5639     __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
5640   %}
5641   ins_pipe( pipe_slow );
5642 %}
5643 
5644 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
5645   predicate(UseAVX > 0 && n->is_reduction());
5646   match(Set dst (MinF a b));
5647   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
5648 
5649   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
5650   ins_encode %{
5651     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
5652                     true /*min*/, true /*single*/);
5653   %}
5654   ins_pipe( pipe_slow );
5655 %}
5656 
5657 // min = java.lang.Math.min(double a, double b)
5658 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
5659   predicate(UseAVX > 0 && !n->is_reduction());
5660   match(Set dst (MinD a b));
5661   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
5662   format %{
5663      "blendvpd         $atmp,$a,$b,$a           \n\t"
5664      "blendvpd         $btmp,$b,$a,$a           \n\t"
5665      "vminsd           $tmp,$atmp,$btmp         \n\t"
5666      "cmppd.unordered  $btmp,$atmp,$atmp        \n\t"
5667      "blendvpd         $dst,$tmp,$atmp,$btmp    \n\t"
5668   %}
5669   ins_encode %{
5670     int vector_len = Assembler::AVX_128bit;
5671     __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
5672     __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
5673     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
5674     __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
5675     __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
5676   %}
5677   ins_pipe( pipe_slow );
5678 %}
5679 
5680 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
5681   predicate(UseAVX > 0 && n->is_reduction());
5682   match(Set dst (MinD a b));
5683   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
5684 
5685   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
5686   ins_encode %{
5687     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
5688                     true /*min*/, false /*single*/);
5689   %}
5690   ins_pipe( pipe_slow );
5691 %}
5692 
5693 // Load Effective Address
5694 instruct leaP8(rRegP dst, indOffset8 mem)
5695 %{
5696   match(Set dst mem);
5697 
5698   ins_cost(110); // XXX
5699   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5700   opcode(0x8D);
5701   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5702   ins_pipe(ialu_reg_reg_fat);
5703 %}
5704 
5705 instruct leaP32(rRegP dst, indOffset32 mem)
5706 %{
5707   match(Set dst mem);
5708 
5709   ins_cost(110);
5710   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5711   opcode(0x8D);
5712   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5713   ins_pipe(ialu_reg_reg_fat);
5714 %}
5715 
5716 // instruct leaPIdx(rRegP dst, indIndex mem)
5717 // %{
5718 //   match(Set dst mem);
5719 
5720 //   ins_cost(110);
5721 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5722 //   opcode(0x8D);
5723 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5724 //   ins_pipe(ialu_reg_reg_fat);
5725 // %}
5726 
5727 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5728 %{
5729   match(Set dst mem);
5730 
5731   ins_cost(110);
5732   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5733   opcode(0x8D);
5734   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5735   ins_pipe(ialu_reg_reg_fat);
5736 %}
5737 
5738 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5739 %{
5740   match(Set dst mem);
5741 
5742   ins_cost(110);
5743   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5744   opcode(0x8D);
5745   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5746   ins_pipe(ialu_reg_reg_fat);
5747 %}
5748 
5749 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
5750 %{
5751   match(Set dst mem);
5752 
5753   ins_cost(110);
5754   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5755   opcode(0x8D);
5756   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5757   ins_pipe(ialu_reg_reg_fat);
5758 %}
5759 
5760 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5761 %{
5762   match(Set dst mem);
5763 
5764   ins_cost(110);
5765   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5766   opcode(0x8D);
5767   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5768   ins_pipe(ialu_reg_reg_fat);
5769 %}
5770 
5771 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
5772 %{
5773   match(Set dst mem);
5774 
5775   ins_cost(110);
5776   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
5777   opcode(0x8D);
5778   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5779   ins_pipe(ialu_reg_reg_fat);
5780 %}
5781 
5782 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5783 %{
5784   match(Set dst mem);
5785 
5786   ins_cost(110);
5787   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5788   opcode(0x8D);
5789   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5790   ins_pipe(ialu_reg_reg_fat);
5791 %}
5792 
5793 // Load Effective Address which uses Narrow (32-bits) oop
5794 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5795 %{
5796   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
5797   match(Set dst mem);
5798 
5799   ins_cost(110);
5800   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5801   opcode(0x8D);
5802   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5803   ins_pipe(ialu_reg_reg_fat);
5804 %}
5805 
5806 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5807 %{
5808   predicate(CompressedOops::shift() == 0);
5809   match(Set dst mem);
5810 
5811   ins_cost(110); // XXX
5812   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5813   opcode(0x8D);
5814   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5815   ins_pipe(ialu_reg_reg_fat);
5816 %}
5817 
5818 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5819 %{
5820   predicate(CompressedOops::shift() == 0);
5821   match(Set dst mem);
5822 
5823   ins_cost(110);
5824   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5825   opcode(0x8D);
5826   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5827   ins_pipe(ialu_reg_reg_fat);
5828 %}
5829 
5830 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5831 %{
5832   predicate(CompressedOops::shift() == 0);
5833   match(Set dst mem);
5834 
5835   ins_cost(110);
5836   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5837   opcode(0x8D);
5838   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5839   ins_pipe(ialu_reg_reg_fat);
5840 %}
5841 
5842 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5843 %{
5844   predicate(CompressedOops::shift() == 0);
5845   match(Set dst mem);
5846 
5847   ins_cost(110);
5848   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5849   opcode(0x8D);
5850   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5851   ins_pipe(ialu_reg_reg_fat);
5852 %}
5853 
5854 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5855 %{
5856   predicate(CompressedOops::shift() == 0);
5857   match(Set dst mem);
5858 
5859   ins_cost(110);
5860   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5861   opcode(0x8D);
5862   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5863   ins_pipe(ialu_reg_reg_fat);
5864 %}
5865 
5866 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
5867 %{
5868   predicate(CompressedOops::shift() == 0);
5869   match(Set dst mem);
5870 
5871   ins_cost(110);
5872   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
5873   opcode(0x8D);
5874   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5875   ins_pipe(ialu_reg_reg_fat);
5876 %}
5877 
5878 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5879 %{
5880   predicate(CompressedOops::shift() == 0);
5881   match(Set dst mem);
5882 
5883   ins_cost(110);
5884   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5885   opcode(0x8D);
5886   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5887   ins_pipe(ialu_reg_reg_fat);
5888 %}
5889 
5890 instruct loadConI(rRegI dst, immI src)
5891 %{
5892   match(Set dst src);
5893 
5894   format %{ "movl    $dst, $src\t# int" %}
5895   ins_encode(load_immI(dst, src));
5896   ins_pipe(ialu_reg_fat); // XXX
5897 %}
5898 
5899 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5900 %{
5901   match(Set dst src);
5902   effect(KILL cr);
5903 
5904   ins_cost(50);
5905   format %{ "xorl    $dst, $dst\t# int" %}
5906   opcode(0x33); /* + rd */
5907   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5908   ins_pipe(ialu_reg);
5909 %}
5910 
5911 instruct loadConL(rRegL dst, immL src)
5912 %{
5913   match(Set dst src);
5914 
5915   ins_cost(150);
5916   format %{ "movq    $dst, $src\t# long" %}
5917   ins_encode(load_immL(dst, src));
5918   ins_pipe(ialu_reg);
5919 %}
5920 
5921 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5922 %{
5923   match(Set dst src);
5924   effect(KILL cr);
5925 
5926   ins_cost(50);
5927   format %{ "xorl    $dst, $dst\t# long" %}
5928   opcode(0x33); /* + rd */
5929   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5930   ins_pipe(ialu_reg); // XXX
5931 %}
5932 
5933 instruct loadConUL32(rRegL dst, immUL32 src)
5934 %{
5935   match(Set dst src);
5936 
5937   ins_cost(60);
5938   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5939   ins_encode(load_immUL32(dst, src));
5940   ins_pipe(ialu_reg);
5941 %}
5942 
5943 instruct loadConL32(rRegL dst, immL32 src)
5944 %{
5945   match(Set dst src);
5946 
5947   ins_cost(70);
5948   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5949   ins_encode(load_immL32(dst, src));
5950   ins_pipe(ialu_reg);
5951 %}
5952 
5953 instruct loadConP(rRegP dst, immP con) %{
5954   match(Set dst con);
5955 
5956   format %{ "movq    $dst, $con\t# ptr" %}
5957   ins_encode(load_immP(dst, con));
5958   ins_pipe(ialu_reg_fat); // XXX
5959 %}
5960 
5961 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5962 %{
5963   match(Set dst src);
5964   effect(KILL cr);
5965 
5966   ins_cost(50);
5967   format %{ "xorl    $dst, $dst\t# ptr" %}
5968   opcode(0x33); /* + rd */
5969   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5970   ins_pipe(ialu_reg);
5971 %}
5972 
5973 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5974 %{
5975   match(Set dst src);
5976   effect(KILL cr);
5977 
5978   ins_cost(60);
5979   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5980   ins_encode(load_immP31(dst, src));
5981   ins_pipe(ialu_reg);
5982 %}
5983 
5984 instruct loadConF(regF dst, immF con) %{
5985   match(Set dst con);
5986   ins_cost(125);
5987   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5988   ins_encode %{
5989     __ movflt($dst$$XMMRegister, $constantaddress($con));
5990   %}
5991   ins_pipe(pipe_slow);
5992 %}
5993 
5994 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5995   match(Set dst src);
5996   effect(KILL cr);
5997   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5998   ins_encode %{
5999     __ xorq($dst$$Register, $dst$$Register);
6000   %}
6001   ins_pipe(ialu_reg);
6002 %}
6003 
6004 instruct loadConN(rRegN dst, immN src) %{
6005   match(Set dst src);
6006 
6007   ins_cost(125);
6008   format %{ "movl    $dst, $src\t# compressed ptr" %}
6009   ins_encode %{
6010     address con = (address)$src$$constant;
6011     if (con == NULL) {
6012       ShouldNotReachHere();
6013     } else {
6014       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6015     }
6016   %}
6017   ins_pipe(ialu_reg_fat); // XXX
6018 %}
6019 
6020 instruct loadConNKlass(rRegN dst, immNKlass src) %{
6021   match(Set dst src);
6022 
6023   ins_cost(125);
6024   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
6025   ins_encode %{
6026     address con = (address)$src$$constant;
6027     if (con == NULL) {
6028       ShouldNotReachHere();
6029     } else {
6030       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
6031     }
6032   %}
6033   ins_pipe(ialu_reg_fat); // XXX
6034 %}
6035 
6036 instruct loadConF0(regF dst, immF0 src)
6037 %{
6038   match(Set dst src);
6039   ins_cost(100);
6040 
6041   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6042   ins_encode %{
6043     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6044   %}
6045   ins_pipe(pipe_slow);
6046 %}
6047 
6048 // Use the same format since predicate() can not be used here.
6049 instruct loadConD(regD dst, immD con) %{
6050   match(Set dst con);
6051   ins_cost(125);
6052   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6053   ins_encode %{
6054     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6055   %}
6056   ins_pipe(pipe_slow);
6057 %}
6058 
6059 instruct loadConD0(regD dst, immD0 src)
6060 %{
6061   match(Set dst src);
6062   ins_cost(100);
6063 
6064   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6065   ins_encode %{
6066     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6067   %}
6068   ins_pipe(pipe_slow);
6069 %}
6070 
6071 instruct loadSSI(rRegI dst, stackSlotI src)
6072 %{
6073   match(Set dst src);
6074 
6075   ins_cost(125);
6076   format %{ "movl    $dst, $src\t# int stk" %}
6077   opcode(0x8B);
6078   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6079   ins_pipe(ialu_reg_mem);
6080 %}
6081 
6082 instruct loadSSL(rRegL dst, stackSlotL src)
6083 %{
6084   match(Set dst src);
6085 
6086   ins_cost(125);
6087   format %{ "movq    $dst, $src\t# long stk" %}
6088   opcode(0x8B);
6089   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6090   ins_pipe(ialu_reg_mem);
6091 %}
6092 
6093 instruct loadSSP(rRegP dst, stackSlotP src)
6094 %{
6095   match(Set dst src);
6096 
6097   ins_cost(125);
6098   format %{ "movq    $dst, $src\t# ptr stk" %}
6099   opcode(0x8B);
6100   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6101   ins_pipe(ialu_reg_mem);
6102 %}
6103 
6104 instruct loadSSF(regF dst, stackSlotF src)
6105 %{
6106   match(Set dst src);
6107 
6108   ins_cost(125);
6109   format %{ "movss   $dst, $src\t# float stk" %}
6110   ins_encode %{
6111     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
6112   %}
6113   ins_pipe(pipe_slow); // XXX
6114 %}
6115 
6116 // Use the same format since predicate() can not be used here.
6117 instruct loadSSD(regD dst, stackSlotD src)
6118 %{
6119   match(Set dst src);
6120 
6121   ins_cost(125);
6122   format %{ "movsd   $dst, $src\t# double stk" %}
6123   ins_encode  %{
6124     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6125   %}
6126   ins_pipe(pipe_slow); // XXX
6127 %}
6128 
6129 // Prefetch instructions for allocation.
6130 // Must be safe to execute with invalid address (cannot fault).
6131 
6132 instruct prefetchAlloc( memory mem ) %{
6133   predicate(AllocatePrefetchInstr==3);
6134   match(PrefetchAllocation mem);
6135   ins_cost(125);
6136 
6137   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
6138   ins_encode %{
6139     __ prefetchw($mem$$Address);
6140   %}
6141   ins_pipe(ialu_mem);
6142 %}
6143 
6144 instruct prefetchAllocNTA( memory mem ) %{
6145   predicate(AllocatePrefetchInstr==0);
6146   match(PrefetchAllocation mem);
6147   ins_cost(125);
6148 
6149   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
6150   ins_encode %{
6151     __ prefetchnta($mem$$Address);
6152   %}
6153   ins_pipe(ialu_mem);
6154 %}
6155 
6156 instruct prefetchAllocT0( memory mem ) %{
6157   predicate(AllocatePrefetchInstr==1);
6158   match(PrefetchAllocation mem);
6159   ins_cost(125);
6160 
6161   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
6162   ins_encode %{
6163     __ prefetcht0($mem$$Address);
6164   %}
6165   ins_pipe(ialu_mem);
6166 %}
6167 
6168 instruct prefetchAllocT2( memory mem ) %{
6169   predicate(AllocatePrefetchInstr==2);
6170   match(PrefetchAllocation mem);
6171   ins_cost(125);
6172 
6173   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
6174   ins_encode %{
6175     __ prefetcht2($mem$$Address);
6176   %}
6177   ins_pipe(ialu_mem);
6178 %}
6179 
6180 //----------Store Instructions-------------------------------------------------
6181 
6182 // Store Byte
6183 instruct storeB(memory mem, rRegI src)
6184 %{
6185   match(Set mem (StoreB mem src));
6186 
6187   ins_cost(125); // XXX
6188   format %{ "movb    $mem, $src\t# byte" %}
6189   opcode(0x88);
6190   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6191   ins_pipe(ialu_mem_reg);
6192 %}
6193 
6194 // Store Char/Short
6195 instruct storeC(memory mem, rRegI src)
6196 %{
6197   match(Set mem (StoreC mem src));
6198 
6199   ins_cost(125); // XXX
6200   format %{ "movw    $mem, $src\t# char/short" %}
6201   opcode(0x89);
6202   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6203   ins_pipe(ialu_mem_reg);
6204 %}
6205 
6206 // Store Integer
6207 instruct storeI(memory mem, rRegI src)
6208 %{
6209   match(Set mem (StoreI mem src));
6210 
6211   ins_cost(125); // XXX
6212   format %{ "movl    $mem, $src\t# int" %}
6213   opcode(0x89);
6214   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6215   ins_pipe(ialu_mem_reg);
6216 %}
6217 
6218 // Store Long
6219 instruct storeL(memory mem, rRegL src)
6220 %{
6221   match(Set mem (StoreL mem src));
6222 
6223   ins_cost(125); // XXX
6224   format %{ "movq    $mem, $src\t# long" %}
6225   opcode(0x89);
6226   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6227   ins_pipe(ialu_mem_reg); // XXX
6228 %}
6229 
6230 // Store Pointer
6231 instruct storeP(memory mem, any_RegP src)
6232 %{
6233   match(Set mem (StoreP mem src));
6234 
6235   ins_cost(125); // XXX
6236   format %{ "movq    $mem, $src\t# ptr" %}
6237   opcode(0x89);
6238   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6239   ins_pipe(ialu_mem_reg);
6240 %}
6241 
6242 instruct storeImmP0(memory mem, immP0 zero)
6243 %{
6244   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6245   match(Set mem (StoreP mem zero));
6246 
6247   ins_cost(125); // XXX
6248   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6249   ins_encode %{
6250     __ movq($mem$$Address, r12);
6251   %}
6252   ins_pipe(ialu_mem_reg);
6253 %}
6254 
6255 // Store NULL Pointer, mark word, or other simple pointer constant.
6256 instruct storeImmP(memory mem, immP31 src)
6257 %{
6258   match(Set mem (StoreP mem src));
6259 
6260   ins_cost(150); // XXX
6261   format %{ "movq    $mem, $src\t# ptr" %}
6262   opcode(0xC7); /* C7 /0 */
6263   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6264   ins_pipe(ialu_mem_imm);
6265 %}
6266 
6267 // Store Compressed Pointer
6268 instruct storeN(memory mem, rRegN src)
6269 %{
6270   match(Set mem (StoreN mem src));
6271 
6272   ins_cost(125); // XXX
6273   format %{ "movl    $mem, $src\t# compressed ptr" %}
6274   ins_encode %{
6275     __ movl($mem$$Address, $src$$Register);
6276   %}
6277   ins_pipe(ialu_mem_reg);
6278 %}
6279 
6280 instruct storeNKlass(memory mem, rRegN src)
6281 %{
6282   match(Set mem (StoreNKlass mem src));
6283 
6284   ins_cost(125); // XXX
6285   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6286   ins_encode %{
6287     __ movl($mem$$Address, $src$$Register);
6288   %}
6289   ins_pipe(ialu_mem_reg);
6290 %}
6291 
6292 instruct storeImmN0(memory mem, immN0 zero)
6293 %{
6294   predicate(CompressedOops::base() == NULL && CompressedKlassPointers::base() == NULL);
6295   match(Set mem (StoreN mem zero));
6296 
6297   ins_cost(125); // XXX
6298   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6299   ins_encode %{
6300     __ movl($mem$$Address, r12);
6301   %}
6302   ins_pipe(ialu_mem_reg);
6303 %}
6304 
6305 instruct storeImmN(memory mem, immN src)
6306 %{
6307   match(Set mem (StoreN mem src));
6308 
6309   ins_cost(150); // XXX
6310   format %{ "movl    $mem, $src\t# compressed ptr" %}
6311   ins_encode %{
6312     address con = (address)$src$$constant;
6313     if (con == NULL) {
6314       __ movl($mem$$Address, (int32_t)0);
6315     } else {
6316       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6317     }
6318   %}
6319   ins_pipe(ialu_mem_imm);
6320 %}
6321 
6322 instruct storeImmNKlass(memory mem, immNKlass src)
6323 %{
6324   match(Set mem (StoreNKlass mem src));
6325 
6326   ins_cost(150); // XXX
6327   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6328   ins_encode %{
6329     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6330   %}
6331   ins_pipe(ialu_mem_imm);
6332 %}
6333 
6334 // Store Integer Immediate
6335 instruct storeImmI0(memory mem, immI0 zero)
6336 %{
6337   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6338   match(Set mem (StoreI mem zero));
6339 
6340   ins_cost(125); // XXX
6341   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6342   ins_encode %{
6343     __ movl($mem$$Address, r12);
6344   %}
6345   ins_pipe(ialu_mem_reg);
6346 %}
6347 
6348 instruct storeImmI(memory mem, immI src)
6349 %{
6350   match(Set mem (StoreI mem src));
6351 
6352   ins_cost(150);
6353   format %{ "movl    $mem, $src\t# int" %}
6354   opcode(0xC7); /* C7 /0 */
6355   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6356   ins_pipe(ialu_mem_imm);
6357 %}
6358 
6359 // Store Long Immediate
6360 instruct storeImmL0(memory mem, immL0 zero)
6361 %{
6362   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6363   match(Set mem (StoreL mem zero));
6364 
6365   ins_cost(125); // XXX
6366   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6367   ins_encode %{
6368     __ movq($mem$$Address, r12);
6369   %}
6370   ins_pipe(ialu_mem_reg);
6371 %}
6372 
6373 instruct storeImmL(memory mem, immL32 src)
6374 %{
6375   match(Set mem (StoreL mem src));
6376 
6377   ins_cost(150);
6378   format %{ "movq    $mem, $src\t# long" %}
6379   opcode(0xC7); /* C7 /0 */
6380   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6381   ins_pipe(ialu_mem_imm);
6382 %}
6383 
6384 // Store Short/Char Immediate
6385 instruct storeImmC0(memory mem, immI0 zero)
6386 %{
6387   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6388   match(Set mem (StoreC mem zero));
6389 
6390   ins_cost(125); // XXX
6391   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6392   ins_encode %{
6393     __ movw($mem$$Address, r12);
6394   %}
6395   ins_pipe(ialu_mem_reg);
6396 %}
6397 
6398 instruct storeImmI16(memory mem, immI16 src)
6399 %{
6400   predicate(UseStoreImmI16);
6401   match(Set mem (StoreC mem src));
6402 
6403   ins_cost(150);
6404   format %{ "movw    $mem, $src\t# short/char" %}
6405   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6406   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6407   ins_pipe(ialu_mem_imm);
6408 %}
6409 
6410 // Store Byte Immediate
6411 instruct storeImmB0(memory mem, immI0 zero)
6412 %{
6413   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6414   match(Set mem (StoreB mem zero));
6415 
6416   ins_cost(125); // XXX
6417   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6418   ins_encode %{
6419     __ movb($mem$$Address, r12);
6420   %}
6421   ins_pipe(ialu_mem_reg);
6422 %}
6423 
6424 instruct storeImmB(memory mem, immI8 src)
6425 %{
6426   match(Set mem (StoreB mem src));
6427 
6428   ins_cost(150); // XXX
6429   format %{ "movb    $mem, $src\t# byte" %}
6430   opcode(0xC6); /* C6 /0 */
6431   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6432   ins_pipe(ialu_mem_imm);
6433 %}
6434 
6435 // Store CMS card-mark Immediate
6436 instruct storeImmCM0_reg(memory mem, immI0 zero)
6437 %{
6438   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6439   match(Set mem (StoreCM mem zero));
6440 
6441   ins_cost(125); // XXX
6442   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6443   ins_encode %{
6444     __ movb($mem$$Address, r12);
6445   %}
6446   ins_pipe(ialu_mem_reg);
6447 %}
6448 
6449 instruct storeImmCM0(memory mem, immI0 src)
6450 %{
6451   match(Set mem (StoreCM mem src));
6452 
6453   ins_cost(150); // XXX
6454   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6455   opcode(0xC6); /* C6 /0 */
6456   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6457   ins_pipe(ialu_mem_imm);
6458 %}
6459 
6460 // Store Float
6461 instruct storeF(memory mem, regF src)
6462 %{
6463   match(Set mem (StoreF mem src));
6464 
6465   ins_cost(95); // XXX
6466   format %{ "movss   $mem, $src\t# float" %}
6467   ins_encode %{
6468     __ movflt($mem$$Address, $src$$XMMRegister);
6469   %}
6470   ins_pipe(pipe_slow); // XXX
6471 %}
6472 
6473 // Store immediate Float value (it is faster than store from XMM register)
6474 instruct storeF0(memory mem, immF0 zero)
6475 %{
6476   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6477   match(Set mem (StoreF mem zero));
6478 
6479   ins_cost(25); // XXX
6480   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6481   ins_encode %{
6482     __ movl($mem$$Address, r12);
6483   %}
6484   ins_pipe(ialu_mem_reg);
6485 %}
6486 
6487 instruct storeF_imm(memory mem, immF src)
6488 %{
6489   match(Set mem (StoreF mem src));
6490 
6491   ins_cost(50);
6492   format %{ "movl    $mem, $src\t# float" %}
6493   opcode(0xC7); /* C7 /0 */
6494   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6495   ins_pipe(ialu_mem_imm);
6496 %}
6497 
6498 // Store Double
6499 instruct storeD(memory mem, regD src)
6500 %{
6501   match(Set mem (StoreD mem src));
6502 
6503   ins_cost(95); // XXX
6504   format %{ "movsd   $mem, $src\t# double" %}
6505   ins_encode %{
6506     __ movdbl($mem$$Address, $src$$XMMRegister);
6507   %}
6508   ins_pipe(pipe_slow); // XXX
6509 %}
6510 
6511 // Store immediate double 0.0 (it is faster than store from XMM register)
6512 instruct storeD0_imm(memory mem, immD0 src)
6513 %{
6514   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
6515   match(Set mem (StoreD mem src));
6516 
6517   ins_cost(50);
6518   format %{ "movq    $mem, $src\t# double 0." %}
6519   opcode(0xC7); /* C7 /0 */
6520   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6521   ins_pipe(ialu_mem_imm);
6522 %}
6523 
6524 instruct storeD0(memory mem, immD0 zero)
6525 %{
6526   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
6527   match(Set mem (StoreD mem zero));
6528 
6529   ins_cost(25); // XXX
6530   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6531   ins_encode %{
6532     __ movq($mem$$Address, r12);
6533   %}
6534   ins_pipe(ialu_mem_reg);
6535 %}
6536 
6537 instruct storeSSI(stackSlotI dst, rRegI src)
6538 %{
6539   match(Set dst src);
6540 
6541   ins_cost(100);
6542   format %{ "movl    $dst, $src\t# int stk" %}
6543   opcode(0x89);
6544   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6545   ins_pipe( ialu_mem_reg );
6546 %}
6547 
6548 instruct storeSSL(stackSlotL dst, rRegL src)
6549 %{
6550   match(Set dst src);
6551 
6552   ins_cost(100);
6553   format %{ "movq    $dst, $src\t# long stk" %}
6554   opcode(0x89);
6555   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6556   ins_pipe(ialu_mem_reg);
6557 %}
6558 
6559 instruct storeSSP(stackSlotP dst, rRegP src)
6560 %{
6561   match(Set dst src);
6562 
6563   ins_cost(100);
6564   format %{ "movq    $dst, $src\t# ptr stk" %}
6565   opcode(0x89);
6566   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6567   ins_pipe(ialu_mem_reg);
6568 %}
6569 
6570 instruct storeSSF(stackSlotF dst, regF src)
6571 %{
6572   match(Set dst src);
6573 
6574   ins_cost(95); // XXX
6575   format %{ "movss   $dst, $src\t# float stk" %}
6576   ins_encode %{
6577     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6578   %}
6579   ins_pipe(pipe_slow); // XXX
6580 %}
6581 
6582 instruct storeSSD(stackSlotD dst, regD src)
6583 %{
6584   match(Set dst src);
6585 
6586   ins_cost(95); // XXX
6587   format %{ "movsd   $dst, $src\t# double stk" %}
6588   ins_encode %{
6589     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6590   %}
6591   ins_pipe(pipe_slow); // XXX
6592 %}
6593 
6594 instruct cacheWB(indirect addr)
6595 %{
6596   predicate(VM_Version::supports_data_cache_line_flush());
6597   match(CacheWB addr);
6598 
6599   ins_cost(100);
6600   format %{"cache wb $addr" %}
6601   ins_encode %{
6602     assert($addr->index_position() < 0, "should be");
6603     assert($addr$$disp == 0, "should be");
6604     __ cache_wb(Address($addr$$base$$Register, 0));
6605   %}
6606   ins_pipe(pipe_slow); // XXX
6607 %}
6608 
6609 instruct cacheWBPreSync()
6610 %{
6611   predicate(VM_Version::supports_data_cache_line_flush());
6612   match(CacheWBPreSync);
6613 
6614   ins_cost(100);
6615   format %{"cache wb presync" %}
6616   ins_encode %{
6617     __ cache_wbsync(true);
6618   %}
6619   ins_pipe(pipe_slow); // XXX
6620 %}
6621 
6622 instruct cacheWBPostSync()
6623 %{
6624   predicate(VM_Version::supports_data_cache_line_flush());
6625   match(CacheWBPostSync);
6626 
6627   ins_cost(100);
6628   format %{"cache wb postsync" %}
6629   ins_encode %{
6630     __ cache_wbsync(false);
6631   %}
6632   ins_pipe(pipe_slow); // XXX
6633 %}
6634 
6635 //----------BSWAP Instructions-------------------------------------------------
6636 instruct bytes_reverse_int(rRegI dst) %{
6637   match(Set dst (ReverseBytesI dst));
6638 
6639   format %{ "bswapl  $dst" %}
6640   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6641   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6642   ins_pipe( ialu_reg );
6643 %}
6644 
6645 instruct bytes_reverse_long(rRegL dst) %{
6646   match(Set dst (ReverseBytesL dst));
6647 
6648   format %{ "bswapq  $dst" %}
6649   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6650   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6651   ins_pipe( ialu_reg);
6652 %}
6653 
6654 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6655   match(Set dst (ReverseBytesUS dst));
6656   effect(KILL cr);
6657 
6658   format %{ "bswapl  $dst\n\t"
6659             "shrl    $dst,16\n\t" %}
6660   ins_encode %{
6661     __ bswapl($dst$$Register);
6662     __ shrl($dst$$Register, 16);
6663   %}
6664   ins_pipe( ialu_reg );
6665 %}
6666 
6667 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6668   match(Set dst (ReverseBytesS dst));
6669   effect(KILL cr);
6670 
6671   format %{ "bswapl  $dst\n\t"
6672             "sar     $dst,16\n\t" %}
6673   ins_encode %{
6674     __ bswapl($dst$$Register);
6675     __ sarl($dst$$Register, 16);
6676   %}
6677   ins_pipe( ialu_reg );
6678 %}
6679 
6680 //---------- Zeros Count Instructions ------------------------------------------
6681 
6682 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6683   predicate(UseCountLeadingZerosInstruction);
6684   match(Set dst (CountLeadingZerosI src));
6685   effect(KILL cr);
6686 
6687   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6688   ins_encode %{
6689     __ lzcntl($dst$$Register, $src$$Register);
6690   %}
6691   ins_pipe(ialu_reg);
6692 %}
6693 
6694 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6695   predicate(!UseCountLeadingZerosInstruction);
6696   match(Set dst (CountLeadingZerosI src));
6697   effect(KILL cr);
6698 
6699   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6700             "jnz     skip\n\t"
6701             "movl    $dst, -1\n"
6702       "skip:\n\t"
6703             "negl    $dst\n\t"
6704             "addl    $dst, 31" %}
6705   ins_encode %{
6706     Register Rdst = $dst$$Register;
6707     Register Rsrc = $src$$Register;
6708     Label skip;
6709     __ bsrl(Rdst, Rsrc);
6710     __ jccb(Assembler::notZero, skip);
6711     __ movl(Rdst, -1);
6712     __ bind(skip);
6713     __ negl(Rdst);
6714     __ addl(Rdst, BitsPerInt - 1);
6715   %}
6716   ins_pipe(ialu_reg);
6717 %}
6718 
6719 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6720   predicate(UseCountLeadingZerosInstruction);
6721   match(Set dst (CountLeadingZerosL src));
6722   effect(KILL cr);
6723 
6724   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6725   ins_encode %{
6726     __ lzcntq($dst$$Register, $src$$Register);
6727   %}
6728   ins_pipe(ialu_reg);
6729 %}
6730 
6731 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6732   predicate(!UseCountLeadingZerosInstruction);
6733   match(Set dst (CountLeadingZerosL src));
6734   effect(KILL cr);
6735 
6736   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6737             "jnz     skip\n\t"
6738             "movl    $dst, -1\n"
6739       "skip:\n\t"
6740             "negl    $dst\n\t"
6741             "addl    $dst, 63" %}
6742   ins_encode %{
6743     Register Rdst = $dst$$Register;
6744     Register Rsrc = $src$$Register;
6745     Label skip;
6746     __ bsrq(Rdst, Rsrc);
6747     __ jccb(Assembler::notZero, skip);
6748     __ movl(Rdst, -1);
6749     __ bind(skip);
6750     __ negl(Rdst);
6751     __ addl(Rdst, BitsPerLong - 1);
6752   %}
6753   ins_pipe(ialu_reg);
6754 %}
6755 
6756 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6757   predicate(UseCountTrailingZerosInstruction);
6758   match(Set dst (CountTrailingZerosI src));
6759   effect(KILL cr);
6760 
6761   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
6762   ins_encode %{
6763     __ tzcntl($dst$$Register, $src$$Register);
6764   %}
6765   ins_pipe(ialu_reg);
6766 %}
6767 
6768 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
6769   predicate(!UseCountTrailingZerosInstruction);
6770   match(Set dst (CountTrailingZerosI src));
6771   effect(KILL cr);
6772 
6773   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6774             "jnz     done\n\t"
6775             "movl    $dst, 32\n"
6776       "done:" %}
6777   ins_encode %{
6778     Register Rdst = $dst$$Register;
6779     Label done;
6780     __ bsfl(Rdst, $src$$Register);
6781     __ jccb(Assembler::notZero, done);
6782     __ movl(Rdst, BitsPerInt);
6783     __ bind(done);
6784   %}
6785   ins_pipe(ialu_reg);
6786 %}
6787 
6788 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6789   predicate(UseCountTrailingZerosInstruction);
6790   match(Set dst (CountTrailingZerosL src));
6791   effect(KILL cr);
6792 
6793   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
6794   ins_encode %{
6795     __ tzcntq($dst$$Register, $src$$Register);
6796   %}
6797   ins_pipe(ialu_reg);
6798 %}
6799 
6800 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
6801   predicate(!UseCountTrailingZerosInstruction);
6802   match(Set dst (CountTrailingZerosL src));
6803   effect(KILL cr);
6804 
6805   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6806             "jnz     done\n\t"
6807             "movl    $dst, 64\n"
6808       "done:" %}
6809   ins_encode %{
6810     Register Rdst = $dst$$Register;
6811     Label done;
6812     __ bsfq(Rdst, $src$$Register);
6813     __ jccb(Assembler::notZero, done);
6814     __ movl(Rdst, BitsPerLong);
6815     __ bind(done);
6816   %}
6817   ins_pipe(ialu_reg);
6818 %}
6819 
6820 
6821 //---------- Population Count Instructions -------------------------------------
6822 
6823 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6824   predicate(UsePopCountInstruction);
6825   match(Set dst (PopCountI src));
6826   effect(KILL cr);
6827 
6828   format %{ "popcnt  $dst, $src" %}
6829   ins_encode %{
6830     __ popcntl($dst$$Register, $src$$Register);
6831   %}
6832   ins_pipe(ialu_reg);
6833 %}
6834 
6835 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6836   predicate(UsePopCountInstruction);
6837   match(Set dst (PopCountI (LoadI mem)));
6838   effect(KILL cr);
6839 
6840   format %{ "popcnt  $dst, $mem" %}
6841   ins_encode %{
6842     __ popcntl($dst$$Register, $mem$$Address);
6843   %}
6844   ins_pipe(ialu_reg);
6845 %}
6846 
6847 // Note: Long.bitCount(long) returns an int.
6848 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6849   predicate(UsePopCountInstruction);
6850   match(Set dst (PopCountL src));
6851   effect(KILL cr);
6852 
6853   format %{ "popcnt  $dst, $src" %}
6854   ins_encode %{
6855     __ popcntq($dst$$Register, $src$$Register);
6856   %}
6857   ins_pipe(ialu_reg);
6858 %}
6859 
6860 // Note: Long.bitCount(long) returns an int.
6861 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6862   predicate(UsePopCountInstruction);
6863   match(Set dst (PopCountL (LoadL mem)));
6864   effect(KILL cr);
6865 
6866   format %{ "popcnt  $dst, $mem" %}
6867   ins_encode %{
6868     __ popcntq($dst$$Register, $mem$$Address);
6869   %}
6870   ins_pipe(ialu_reg);
6871 %}
6872 
6873 
6874 //----------MemBar Instructions-----------------------------------------------
6875 // Memory barrier flavors
6876 
6877 instruct membar_acquire()
6878 %{
6879   match(MemBarAcquire);
6880   match(LoadFence);
6881   ins_cost(0);
6882 
6883   size(0);
6884   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6885   ins_encode();
6886   ins_pipe(empty);
6887 %}
6888 
6889 instruct membar_acquire_lock()
6890 %{
6891   match(MemBarAcquireLock);
6892   ins_cost(0);
6893 
6894   size(0);
6895   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6896   ins_encode();
6897   ins_pipe(empty);
6898 %}
6899 
6900 instruct membar_release()
6901 %{
6902   match(MemBarRelease);
6903   match(StoreFence);
6904   ins_cost(0);
6905 
6906   size(0);
6907   format %{ "MEMBAR-release ! (empty encoding)" %}
6908   ins_encode();
6909   ins_pipe(empty);
6910 %}
6911 
6912 instruct membar_release_lock()
6913 %{
6914   match(MemBarReleaseLock);
6915   ins_cost(0);
6916 
6917   size(0);
6918   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6919   ins_encode();
6920   ins_pipe(empty);
6921 %}
6922 
6923 instruct membar_volatile(rFlagsReg cr) %{
6924   match(MemBarVolatile);
6925   effect(KILL cr);
6926   ins_cost(400);
6927 
6928   format %{
6929     $$template
6930     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6931   %}
6932   ins_encode %{
6933     __ membar(Assembler::StoreLoad);
6934   %}
6935   ins_pipe(pipe_slow);
6936 %}
6937 
6938 instruct unnecessary_membar_volatile()
6939 %{
6940   match(MemBarVolatile);
6941   predicate(Matcher::post_store_load_barrier(n));
6942   ins_cost(0);
6943 
6944   size(0);
6945   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6946   ins_encode();
6947   ins_pipe(empty);
6948 %}
6949 
6950 instruct membar_storestore() %{
6951   match(MemBarStoreStore);
6952   ins_cost(0);
6953 
6954   size(0);
6955   format %{ "MEMBAR-storestore (empty encoding)" %}
6956   ins_encode( );
6957   ins_pipe(empty);
6958 %}
6959 
6960 //----------Move Instructions--------------------------------------------------
6961 
6962 instruct castX2P(rRegP dst, rRegL src)
6963 %{
6964   match(Set dst (CastX2P src));
6965 
6966   format %{ "movq    $dst, $src\t# long->ptr" %}
6967   ins_encode %{
6968     if ($dst$$reg != $src$$reg) {
6969       __ movptr($dst$$Register, $src$$Register);
6970     }
6971   %}
6972   ins_pipe(ialu_reg_reg); // XXX
6973 %}
6974 
6975 instruct castN2X(rRegL dst, rRegN src)
6976 %{
6977   match(Set dst (CastP2X src));
6978 
6979   format %{ "movq    $dst, $src\t# ptr -> long" %}
6980   ins_encode %{
6981     if ($dst$$reg != $src$$reg) {
6982       __ movptr($dst$$Register, $src$$Register);
6983     }
6984   %}
6985   ins_pipe(ialu_reg_reg); // XXX
6986 %}
6987 
6988 instruct castP2X(rRegL dst, rRegP src)
6989 %{
6990   match(Set dst (CastP2X src));
6991 
6992   format %{ "movq    $dst, $src\t# ptr -> long" %}
6993   ins_encode %{
6994     if ($dst$$reg != $src$$reg) {
6995       __ movptr($dst$$Register, $src$$Register);
6996     }
6997   %}
6998   ins_pipe(ialu_reg_reg); // XXX
6999 %}
7000 
7001 instruct castN2I(rRegI dst, rRegN src)
7002 %{
7003   match(Set dst (CastN2I src));
7004 
7005   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
7006   ins_encode %{
7007     if ($dst$$reg != $src$$reg) {
7008       __ movl($dst$$Register, $src$$Register);
7009     }
7010   %}
7011   ins_pipe(ialu_reg_reg); // XXX
7012 %}
7013 
7014 instruct castI2N(rRegN dst, rRegI src)
7015 %{
7016   match(Set dst (CastI2N src));
7017 
7018   format %{ "movl    $dst, $src\t# int -> compressed ptr" %}
7019   ins_encode %{
7020     if ($dst$$reg != $src$$reg) {
7021       __ movl($dst$$Register, $src$$Register);
7022     }
7023   %}
7024   ins_pipe(ialu_reg_reg); // XXX
7025 %}
7026 
7027 
7028 // Convert oop into int for vectors alignment masking
7029 instruct convP2I(rRegI dst, rRegP src)
7030 %{
7031   match(Set dst (ConvL2I (CastP2X src)));
7032 
7033   format %{ "movl    $dst, $src\t# ptr -> int" %}
7034   ins_encode %{
7035     __ movl($dst$$Register, $src$$Register);
7036   %}
7037   ins_pipe(ialu_reg_reg); // XXX
7038 %}
7039 
7040 // Convert compressed oop into int for vectors alignment masking
7041 // in case of 32bit oops (heap < 4Gb).
7042 instruct convN2I(rRegI dst, rRegN src)
7043 %{
7044   predicate(CompressedOops::shift() == 0);
7045   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7046 
7047   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
7048   ins_encode %{
7049     __ movl($dst$$Register, $src$$Register);
7050   %}
7051   ins_pipe(ialu_reg_reg); // XXX
7052 %}
7053 
7054 // Convert oop pointer into compressed form
7055 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7056   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7057   match(Set dst (EncodeP src));
7058   effect(KILL cr);
7059   format %{ "encode_heap_oop $dst,$src" %}
7060   ins_encode %{
7061     Register s = $src$$Register;
7062     Register d = $dst$$Register;
7063     if (s != d) {
7064       __ movq(d, s);
7065     }
7066     __ encode_heap_oop(d);
7067   %}
7068   ins_pipe(ialu_reg_long);
7069 %}
7070 
7071 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7072   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7073   match(Set dst (EncodeP src));
7074   effect(KILL cr);
7075   format %{ "encode_heap_oop_not_null $dst,$src" %}
7076   ins_encode %{
7077     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7078   %}
7079   ins_pipe(ialu_reg_long);
7080 %}
7081 
7082 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7083   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7084             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7085   match(Set dst (DecodeN src));
7086   effect(KILL cr);
7087   format %{ "decode_heap_oop $dst,$src" %}
7088   ins_encode %{
7089     Register s = $src$$Register;
7090     Register d = $dst$$Register;
7091     if (s != d) {
7092       __ movq(d, s);
7093     }
7094     __ decode_heap_oop(d);
7095   %}
7096   ins_pipe(ialu_reg_long);
7097 %}
7098 
7099 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7100   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7101             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7102   match(Set dst (DecodeN src));
7103   effect(KILL cr);
7104   format %{ "decode_heap_oop_not_null $dst,$src" %}
7105   ins_encode %{
7106     Register s = $src$$Register;
7107     Register d = $dst$$Register;
7108     if (s != d) {
7109       __ decode_heap_oop_not_null(d, s);
7110     } else {
7111       __ decode_heap_oop_not_null(d);
7112     }
7113   %}
7114   ins_pipe(ialu_reg_long);
7115 %}
7116 
7117 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7118   match(Set dst (EncodePKlass src));
7119   effect(KILL cr);
7120   format %{ "encode_klass_not_null $dst,$src" %}
7121   ins_encode %{
7122     __ encode_klass_not_null($dst$$Register, $src$$Register);
7123   %}
7124   ins_pipe(ialu_reg_long);
7125 %}
7126 
7127 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7128   match(Set dst (DecodeNKlass src));
7129   effect(KILL cr);
7130   format %{ "decode_klass_not_null $dst,$src" %}
7131   ins_encode %{
7132     Register s = $src$$Register;
7133     Register d = $dst$$Register;
7134     if (s != d) {
7135       __ decode_klass_not_null(d, s);
7136     } else {
7137       __ decode_klass_not_null(d);
7138     }
7139   %}
7140   ins_pipe(ialu_reg_long);
7141 %}
7142 
7143 
7144 //----------Conditional Move---------------------------------------------------
7145 // Jump
7146 // dummy instruction for generating temp registers
7147 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7148   match(Jump (LShiftL switch_val shift));
7149   ins_cost(350);
7150   predicate(false);
7151   effect(TEMP dest);
7152 
7153   format %{ "leaq    $dest, [$constantaddress]\n\t"
7154             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7155   ins_encode %{
7156     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7157     // to do that and the compiler is using that register as one it can allocate.
7158     // So we build it all by hand.
7159     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7160     // ArrayAddress dispatch(table, index);
7161     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7162     __ lea($dest$$Register, $constantaddress);
7163     __ jmp(dispatch);
7164   %}
7165   ins_pipe(pipe_jmp);
7166 %}
7167 
7168 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7169   match(Jump (AddL (LShiftL switch_val shift) offset));
7170   ins_cost(350);
7171   effect(TEMP dest);
7172 
7173   format %{ "leaq    $dest, [$constantaddress]\n\t"
7174             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7175   ins_encode %{
7176     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7177     // to do that and the compiler is using that register as one it can allocate.
7178     // So we build it all by hand.
7179     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7180     // ArrayAddress dispatch(table, index);
7181     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7182     __ lea($dest$$Register, $constantaddress);
7183     __ jmp(dispatch);
7184   %}
7185   ins_pipe(pipe_jmp);
7186 %}
7187 
7188 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7189   match(Jump switch_val);
7190   ins_cost(350);
7191   effect(TEMP dest);
7192 
7193   format %{ "leaq    $dest, [$constantaddress]\n\t"
7194             "jmp     [$dest + $switch_val]\n\t" %}
7195   ins_encode %{
7196     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7197     // to do that and the compiler is using that register as one it can allocate.
7198     // So we build it all by hand.
7199     // Address index(noreg, switch_reg, Address::times_1);
7200     // ArrayAddress dispatch(table, index);
7201     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7202     __ lea($dest$$Register, $constantaddress);
7203     __ jmp(dispatch);
7204   %}
7205   ins_pipe(pipe_jmp);
7206 %}
7207 
7208 // Conditional move
7209 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7210 %{
7211   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7212 
7213   ins_cost(200); // XXX
7214   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7215   opcode(0x0F, 0x40);
7216   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7217   ins_pipe(pipe_cmov_reg);
7218 %}
7219 
7220 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7221   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7222 
7223   ins_cost(200); // XXX
7224   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7225   opcode(0x0F, 0x40);
7226   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7227   ins_pipe(pipe_cmov_reg);
7228 %}
7229 
7230 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7231   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7232   ins_cost(200);
7233   expand %{
7234     cmovI_regU(cop, cr, dst, src);
7235   %}
7236 %}
7237 
7238 // Conditional move
7239 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7240   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7241 
7242   ins_cost(250); // XXX
7243   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7244   opcode(0x0F, 0x40);
7245   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7246   ins_pipe(pipe_cmov_mem);
7247 %}
7248 
7249 // Conditional move
7250 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7251 %{
7252   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7253 
7254   ins_cost(250); // XXX
7255   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7256   opcode(0x0F, 0x40);
7257   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7258   ins_pipe(pipe_cmov_mem);
7259 %}
7260 
7261 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7262   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7263   ins_cost(250);
7264   expand %{
7265     cmovI_memU(cop, cr, dst, src);
7266   %}
7267 %}
7268 
7269 // Conditional move
7270 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7271 %{
7272   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7273 
7274   ins_cost(200); // XXX
7275   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7276   opcode(0x0F, 0x40);
7277   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7278   ins_pipe(pipe_cmov_reg);
7279 %}
7280 
7281 // Conditional move
7282 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7283 %{
7284   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7285 
7286   ins_cost(200); // XXX
7287   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7288   opcode(0x0F, 0x40);
7289   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7290   ins_pipe(pipe_cmov_reg);
7291 %}
7292 
7293 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7294   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7295   ins_cost(200);
7296   expand %{
7297     cmovN_regU(cop, cr, dst, src);
7298   %}
7299 %}
7300 
7301 // Conditional move
7302 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7303 %{
7304   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7305 
7306   ins_cost(200); // XXX
7307   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7308   opcode(0x0F, 0x40);
7309   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7310   ins_pipe(pipe_cmov_reg);  // XXX
7311 %}
7312 
7313 // Conditional move
7314 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7315 %{
7316   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7317 
7318   ins_cost(200); // XXX
7319   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7320   opcode(0x0F, 0x40);
7321   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7322   ins_pipe(pipe_cmov_reg); // XXX
7323 %}
7324 
7325 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7326   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7327   ins_cost(200);
7328   expand %{
7329     cmovP_regU(cop, cr, dst, src);
7330   %}
7331 %}
7332 
7333 // DISABLED: Requires the ADLC to emit a bottom_type call that
7334 // correctly meets the two pointer arguments; one is an incoming
7335 // register but the other is a memory operand.  ALSO appears to
7336 // be buggy with implicit null checks.
7337 //
7338 //// Conditional move
7339 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7340 //%{
7341 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7342 //  ins_cost(250);
7343 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7344 //  opcode(0x0F,0x40);
7345 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7346 //  ins_pipe( pipe_cmov_mem );
7347 //%}
7348 //
7349 //// Conditional move
7350 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7351 //%{
7352 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7353 //  ins_cost(250);
7354 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7355 //  opcode(0x0F,0x40);
7356 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7357 //  ins_pipe( pipe_cmov_mem );
7358 //%}
7359 
7360 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7361 %{
7362   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7363 
7364   ins_cost(200); // XXX
7365   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7366   opcode(0x0F, 0x40);
7367   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7368   ins_pipe(pipe_cmov_reg);  // XXX
7369 %}
7370 
7371 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7372 %{
7373   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7374 
7375   ins_cost(200); // XXX
7376   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7377   opcode(0x0F, 0x40);
7378   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7379   ins_pipe(pipe_cmov_mem);  // XXX
7380 %}
7381 
7382 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7383 %{
7384   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7385 
7386   ins_cost(200); // XXX
7387   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7388   opcode(0x0F, 0x40);
7389   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7390   ins_pipe(pipe_cmov_reg); // XXX
7391 %}
7392 
7393 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7394   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7395   ins_cost(200);
7396   expand %{
7397     cmovL_regU(cop, cr, dst, src);
7398   %}
7399 %}
7400 
7401 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7402 %{
7403   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7404 
7405   ins_cost(200); // XXX
7406   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7407   opcode(0x0F, 0x40);
7408   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7409   ins_pipe(pipe_cmov_mem); // XXX
7410 %}
7411 
7412 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7413   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7414   ins_cost(200);
7415   expand %{
7416     cmovL_memU(cop, cr, dst, src);
7417   %}
7418 %}
7419 
7420 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7421 %{
7422   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7423 
7424   ins_cost(200); // XXX
7425   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7426             "movss     $dst, $src\n"
7427     "skip:" %}
7428   ins_encode %{
7429     Label Lskip;
7430     // Invert sense of branch from sense of CMOV
7431     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7432     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7433     __ bind(Lskip);
7434   %}
7435   ins_pipe(pipe_slow);
7436 %}
7437 
7438 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7439 // %{
7440 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7441 
7442 //   ins_cost(200); // XXX
7443 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7444 //             "movss     $dst, $src\n"
7445 //     "skip:" %}
7446 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7447 //   ins_pipe(pipe_slow);
7448 // %}
7449 
7450 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7451 %{
7452   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7453 
7454   ins_cost(200); // XXX
7455   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7456             "movss     $dst, $src\n"
7457     "skip:" %}
7458   ins_encode %{
7459     Label Lskip;
7460     // Invert sense of branch from sense of CMOV
7461     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7462     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7463     __ bind(Lskip);
7464   %}
7465   ins_pipe(pipe_slow);
7466 %}
7467 
7468 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7469   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7470   ins_cost(200);
7471   expand %{
7472     cmovF_regU(cop, cr, dst, src);
7473   %}
7474 %}
7475 
7476 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7477 %{
7478   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7479 
7480   ins_cost(200); // XXX
7481   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7482             "movsd     $dst, $src\n"
7483     "skip:" %}
7484   ins_encode %{
7485     Label Lskip;
7486     // Invert sense of branch from sense of CMOV
7487     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7488     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7489     __ bind(Lskip);
7490   %}
7491   ins_pipe(pipe_slow);
7492 %}
7493 
7494 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7495 %{
7496   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7497 
7498   ins_cost(200); // XXX
7499   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7500             "movsd     $dst, $src\n"
7501     "skip:" %}
7502   ins_encode %{
7503     Label Lskip;
7504     // Invert sense of branch from sense of CMOV
7505     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7506     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7507     __ bind(Lskip);
7508   %}
7509   ins_pipe(pipe_slow);
7510 %}
7511 
7512 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7513   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7514   ins_cost(200);
7515   expand %{
7516     cmovD_regU(cop, cr, dst, src);
7517   %}
7518 %}
7519 
7520 //----------Arithmetic Instructions--------------------------------------------
7521 //----------Addition Instructions----------------------------------------------
7522 
7523 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7524 %{
7525   match(Set dst (AddI dst src));
7526   effect(KILL cr);
7527 
7528   format %{ "addl    $dst, $src\t# int" %}
7529   opcode(0x03);
7530   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7531   ins_pipe(ialu_reg_reg);
7532 %}
7533 
7534 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7535 %{
7536   match(Set dst (AddI dst src));
7537   effect(KILL cr);
7538 
7539   format %{ "addl    $dst, $src\t# int" %}
7540   opcode(0x81, 0x00); /* /0 id */
7541   ins_encode(OpcSErm(dst, src), Con8or32(src));
7542   ins_pipe( ialu_reg );
7543 %}
7544 
7545 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7546 %{
7547   match(Set dst (AddI dst (LoadI src)));
7548   effect(KILL cr);
7549 
7550   ins_cost(125); // XXX
7551   format %{ "addl    $dst, $src\t# int" %}
7552   opcode(0x03);
7553   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7554   ins_pipe(ialu_reg_mem);
7555 %}
7556 
7557 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7558 %{
7559   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7560   effect(KILL cr);
7561 
7562   ins_cost(150); // XXX
7563   format %{ "addl    $dst, $src\t# int" %}
7564   opcode(0x01); /* Opcode 01 /r */
7565   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7566   ins_pipe(ialu_mem_reg);
7567 %}
7568 
7569 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7570 %{
7571   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7572   effect(KILL cr);
7573 
7574   ins_cost(125); // XXX
7575   format %{ "addl    $dst, $src\t# int" %}
7576   opcode(0x81); /* Opcode 81 /0 id */
7577   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7578   ins_pipe(ialu_mem_imm);
7579 %}
7580 
7581 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7582 %{
7583   predicate(UseIncDec);
7584   match(Set dst (AddI dst src));
7585   effect(KILL cr);
7586 
7587   format %{ "incl    $dst\t# int" %}
7588   opcode(0xFF, 0x00); // FF /0
7589   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7590   ins_pipe(ialu_reg);
7591 %}
7592 
7593 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7594 %{
7595   predicate(UseIncDec);
7596   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7597   effect(KILL cr);
7598 
7599   ins_cost(125); // XXX
7600   format %{ "incl    $dst\t# int" %}
7601   opcode(0xFF); /* Opcode FF /0 */
7602   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7603   ins_pipe(ialu_mem_imm);
7604 %}
7605 
7606 // XXX why does that use AddI
7607 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7608 %{
7609   predicate(UseIncDec);
7610   match(Set dst (AddI dst src));
7611   effect(KILL cr);
7612 
7613   format %{ "decl    $dst\t# int" %}
7614   opcode(0xFF, 0x01); // FF /1
7615   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7616   ins_pipe(ialu_reg);
7617 %}
7618 
7619 // XXX why does that use AddI
7620 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7621 %{
7622   predicate(UseIncDec);
7623   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7624   effect(KILL cr);
7625 
7626   ins_cost(125); // XXX
7627   format %{ "decl    $dst\t# int" %}
7628   opcode(0xFF); /* Opcode FF /1 */
7629   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7630   ins_pipe(ialu_mem_imm);
7631 %}
7632 
7633 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7634 %{
7635   match(Set dst (AddI src0 src1));
7636 
7637   ins_cost(110);
7638   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7639   opcode(0x8D); /* 0x8D /r */
7640   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7641   ins_pipe(ialu_reg_reg);
7642 %}
7643 
7644 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7645 %{
7646   match(Set dst (AddL dst src));
7647   effect(KILL cr);
7648 
7649   format %{ "addq    $dst, $src\t# long" %}
7650   opcode(0x03);
7651   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7652   ins_pipe(ialu_reg_reg);
7653 %}
7654 
7655 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7656 %{
7657   match(Set dst (AddL dst src));
7658   effect(KILL cr);
7659 
7660   format %{ "addq    $dst, $src\t# long" %}
7661   opcode(0x81, 0x00); /* /0 id */
7662   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7663   ins_pipe( ialu_reg );
7664 %}
7665 
7666 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7667 %{
7668   match(Set dst (AddL dst (LoadL src)));
7669   effect(KILL cr);
7670 
7671   ins_cost(125); // XXX
7672   format %{ "addq    $dst, $src\t# long" %}
7673   opcode(0x03);
7674   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7675   ins_pipe(ialu_reg_mem);
7676 %}
7677 
7678 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7679 %{
7680   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7681   effect(KILL cr);
7682 
7683   ins_cost(150); // XXX
7684   format %{ "addq    $dst, $src\t# long" %}
7685   opcode(0x01); /* Opcode 01 /r */
7686   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7687   ins_pipe(ialu_mem_reg);
7688 %}
7689 
7690 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7691 %{
7692   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7693   effect(KILL cr);
7694 
7695   ins_cost(125); // XXX
7696   format %{ "addq    $dst, $src\t# long" %}
7697   opcode(0x81); /* Opcode 81 /0 id */
7698   ins_encode(REX_mem_wide(dst),
7699              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7700   ins_pipe(ialu_mem_imm);
7701 %}
7702 
7703 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7704 %{
7705   predicate(UseIncDec);
7706   match(Set dst (AddL dst src));
7707   effect(KILL cr);
7708 
7709   format %{ "incq    $dst\t# long" %}
7710   opcode(0xFF, 0x00); // FF /0
7711   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7712   ins_pipe(ialu_reg);
7713 %}
7714 
7715 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7716 %{
7717   predicate(UseIncDec);
7718   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7719   effect(KILL cr);
7720 
7721   ins_cost(125); // XXX
7722   format %{ "incq    $dst\t# long" %}
7723   opcode(0xFF); /* Opcode FF /0 */
7724   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7725   ins_pipe(ialu_mem_imm);
7726 %}
7727 
7728 // XXX why does that use AddL
7729 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7730 %{
7731   predicate(UseIncDec);
7732   match(Set dst (AddL dst src));
7733   effect(KILL cr);
7734 
7735   format %{ "decq    $dst\t# long" %}
7736   opcode(0xFF, 0x01); // FF /1
7737   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7738   ins_pipe(ialu_reg);
7739 %}
7740 
7741 // XXX why does that use AddL
7742 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7743 %{
7744   predicate(UseIncDec);
7745   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7746   effect(KILL cr);
7747 
7748   ins_cost(125); // XXX
7749   format %{ "decq    $dst\t# long" %}
7750   opcode(0xFF); /* Opcode FF /1 */
7751   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7752   ins_pipe(ialu_mem_imm);
7753 %}
7754 
7755 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7756 %{
7757   match(Set dst (AddL src0 src1));
7758 
7759   ins_cost(110);
7760   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7761   opcode(0x8D); /* 0x8D /r */
7762   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7763   ins_pipe(ialu_reg_reg);
7764 %}
7765 
7766 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7767 %{
7768   match(Set dst (AddP dst src));
7769   effect(KILL cr);
7770 
7771   format %{ "addq    $dst, $src\t# ptr" %}
7772   opcode(0x03);
7773   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7774   ins_pipe(ialu_reg_reg);
7775 %}
7776 
7777 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7778 %{
7779   match(Set dst (AddP dst src));
7780   effect(KILL cr);
7781 
7782   format %{ "addq    $dst, $src\t# ptr" %}
7783   opcode(0x81, 0x00); /* /0 id */
7784   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7785   ins_pipe( ialu_reg );
7786 %}
7787 
7788 // XXX addP mem ops ????
7789 
7790 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7791 %{
7792   match(Set dst (AddP src0 src1));
7793 
7794   ins_cost(110);
7795   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7796   opcode(0x8D); /* 0x8D /r */
7797   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7798   ins_pipe(ialu_reg_reg);
7799 %}
7800 
7801 instruct checkCastPP(rRegP dst)
7802 %{
7803   match(Set dst (CheckCastPP dst));
7804 
7805   size(0);
7806   format %{ "# checkcastPP of $dst" %}
7807   ins_encode(/* empty encoding */);
7808   ins_pipe(empty);
7809 %}
7810 
7811 instruct castPP(rRegP dst)
7812 %{
7813   match(Set dst (CastPP dst));
7814 
7815   size(0);
7816   format %{ "# castPP of $dst" %}
7817   ins_encode(/* empty encoding */);
7818   ins_pipe(empty);
7819 %}
7820 
7821 instruct castII(rRegI dst)
7822 %{
7823   match(Set dst (CastII dst));
7824 
7825   size(0);
7826   format %{ "# castII of $dst" %}
7827   ins_encode(/* empty encoding */);
7828   ins_cost(0);
7829   ins_pipe(empty);
7830 %}
7831 
7832 instruct castLL(rRegL dst)
7833 %{
7834   match(Set dst (CastLL dst));
7835 
7836   size(0);
7837   format %{ "# castLL of $dst" %}
7838   ins_encode(/* empty encoding */);
7839   ins_cost(0);
7840   ins_pipe(empty);
7841 %}
7842 
7843 // LoadP-locked same as a regular LoadP when used with compare-swap
7844 instruct loadPLocked(rRegP dst, memory mem)
7845 %{
7846   match(Set dst (LoadPLocked mem));
7847 
7848   ins_cost(125); // XXX
7849   format %{ "movq    $dst, $mem\t# ptr locked" %}
7850   opcode(0x8B);
7851   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7852   ins_pipe(ialu_reg_mem); // XXX
7853 %}
7854 
7855 // Conditional-store of the updated heap-top.
7856 // Used during allocation of the shared heap.
7857 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7858 
7859 instruct storePConditional(memory heap_top_ptr,
7860                            rax_RegP oldval, rRegP newval,
7861                            rFlagsReg cr)
7862 %{
7863   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7864 
7865   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7866             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7867   opcode(0x0F, 0xB1);
7868   ins_encode(lock_prefix,
7869              REX_reg_mem_wide(newval, heap_top_ptr),
7870              OpcP, OpcS,
7871              reg_mem(newval, heap_top_ptr));
7872   ins_pipe(pipe_cmpxchg);
7873 %}
7874 
7875 // Conditional-store of an int value.
7876 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7877 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7878 %{
7879   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7880   effect(KILL oldval);
7881 
7882   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7883   opcode(0x0F, 0xB1);
7884   ins_encode(lock_prefix,
7885              REX_reg_mem(newval, mem),
7886              OpcP, OpcS,
7887              reg_mem(newval, mem));
7888   ins_pipe(pipe_cmpxchg);
7889 %}
7890 
7891 // Conditional-store of a long value.
7892 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7893 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7894 %{
7895   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7896   effect(KILL oldval);
7897 
7898   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7899   opcode(0x0F, 0xB1);
7900   ins_encode(lock_prefix,
7901              REX_reg_mem_wide(newval, mem),
7902              OpcP, OpcS,
7903              reg_mem(newval, mem));
7904   ins_pipe(pipe_cmpxchg);
7905 %}
7906 
7907 
7908 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7909 instruct compareAndSwapP(rRegI res,
7910                          memory mem_ptr,
7911                          rax_RegP oldval, rRegP newval,
7912                          rFlagsReg cr)
7913 %{
7914   predicate(VM_Version::supports_cx8());
7915   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7916   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7917   effect(KILL cr, KILL oldval);
7918 
7919   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7920             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7921             "sete    $res\n\t"
7922             "movzbl  $res, $res" %}
7923   opcode(0x0F, 0xB1);
7924   ins_encode(lock_prefix,
7925              REX_reg_mem_wide(newval, mem_ptr),
7926              OpcP, OpcS,
7927              reg_mem(newval, mem_ptr),
7928              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7929              REX_reg_breg(res, res), // movzbl
7930              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7931   ins_pipe( pipe_cmpxchg );
7932 %}
7933 
7934 instruct compareAndSwapL(rRegI res,
7935                          memory mem_ptr,
7936                          rax_RegL oldval, rRegL newval,
7937                          rFlagsReg cr)
7938 %{
7939   predicate(VM_Version::supports_cx8());
7940   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7941   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7942   effect(KILL cr, KILL oldval);
7943 
7944   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7945             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7946             "sete    $res\n\t"
7947             "movzbl  $res, $res" %}
7948   opcode(0x0F, 0xB1);
7949   ins_encode(lock_prefix,
7950              REX_reg_mem_wide(newval, mem_ptr),
7951              OpcP, OpcS,
7952              reg_mem(newval, mem_ptr),
7953              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7954              REX_reg_breg(res, res), // movzbl
7955              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7956   ins_pipe( pipe_cmpxchg );
7957 %}
7958 
7959 instruct compareAndSwapI(rRegI res,
7960                          memory mem_ptr,
7961                          rax_RegI oldval, rRegI newval,
7962                          rFlagsReg cr)
7963 %{
7964   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7965   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7966   effect(KILL cr, KILL oldval);
7967 
7968   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7969             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7970             "sete    $res\n\t"
7971             "movzbl  $res, $res" %}
7972   opcode(0x0F, 0xB1);
7973   ins_encode(lock_prefix,
7974              REX_reg_mem(newval, mem_ptr),
7975              OpcP, OpcS,
7976              reg_mem(newval, mem_ptr),
7977              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7978              REX_reg_breg(res, res), // movzbl
7979              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7980   ins_pipe( pipe_cmpxchg );
7981 %}
7982 
7983 instruct compareAndSwapB(rRegI res,
7984                          memory mem_ptr,
7985                          rax_RegI oldval, rRegI newval,
7986                          rFlagsReg cr)
7987 %{
7988   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7989   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7990   effect(KILL cr, KILL oldval);
7991 
7992   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7993             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7994             "sete    $res\n\t"
7995             "movzbl  $res, $res" %}
7996   opcode(0x0F, 0xB0);
7997   ins_encode(lock_prefix,
7998              REX_breg_mem(newval, mem_ptr),
7999              OpcP, OpcS,
8000              reg_mem(newval, mem_ptr),
8001              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8002              REX_reg_breg(res, res), // movzbl
8003              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8004   ins_pipe( pipe_cmpxchg );
8005 %}
8006 
8007 instruct compareAndSwapS(rRegI res,
8008                          memory mem_ptr,
8009                          rax_RegI oldval, rRegI newval,
8010                          rFlagsReg cr)
8011 %{
8012   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
8013   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
8014   effect(KILL cr, KILL oldval);
8015 
8016   format %{ "cmpxchgw $mem_ptr,$newval\t# "
8017             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8018             "sete    $res\n\t"
8019             "movzbl  $res, $res" %}
8020   opcode(0x0F, 0xB1);
8021   ins_encode(lock_prefix,
8022              SizePrefix,
8023              REX_reg_mem(newval, mem_ptr),
8024              OpcP, OpcS,
8025              reg_mem(newval, mem_ptr),
8026              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8027              REX_reg_breg(res, res), // movzbl
8028              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8029   ins_pipe( pipe_cmpxchg );
8030 %}
8031 
8032 instruct compareAndSwapN(rRegI res,
8033                           memory mem_ptr,
8034                           rax_RegN oldval, rRegN newval,
8035                           rFlagsReg cr) %{
8036   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8037   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
8038   effect(KILL cr, KILL oldval);
8039 
8040   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8041             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8042             "sete    $res\n\t"
8043             "movzbl  $res, $res" %}
8044   opcode(0x0F, 0xB1);
8045   ins_encode(lock_prefix,
8046              REX_reg_mem(newval, mem_ptr),
8047              OpcP, OpcS,
8048              reg_mem(newval, mem_ptr),
8049              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8050              REX_reg_breg(res, res), // movzbl
8051              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8052   ins_pipe( pipe_cmpxchg );
8053 %}
8054 
8055 instruct compareAndExchangeB(
8056                          memory mem_ptr,
8057                          rax_RegI oldval, rRegI newval,
8058                          rFlagsReg cr)
8059 %{
8060   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
8061   effect(KILL cr);
8062 
8063   format %{ "cmpxchgb $mem_ptr,$newval\t# "
8064             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
8065   opcode(0x0F, 0xB0);
8066   ins_encode(lock_prefix,
8067              REX_breg_mem(newval, mem_ptr),
8068              OpcP, OpcS,
8069              reg_mem(newval, mem_ptr) // lock cmpxchg
8070              );
8071   ins_pipe( pipe_cmpxchg );
8072 %}
8073 
8074 instruct compareAndExchangeS(
8075                          memory mem_ptr,
8076                          rax_RegI oldval, rRegI newval,
8077                          rFlagsReg cr)
8078 %{
8079   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
8080   effect(KILL cr);
8081 
8082   format %{ "cmpxchgw $mem_ptr,$newval\t# "
8083             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
8084   opcode(0x0F, 0xB1);
8085   ins_encode(lock_prefix,
8086              SizePrefix,
8087              REX_reg_mem(newval, mem_ptr),
8088              OpcP, OpcS,
8089              reg_mem(newval, mem_ptr) // lock cmpxchg
8090              );
8091   ins_pipe( pipe_cmpxchg );
8092 %}
8093 
8094 instruct compareAndExchangeI(
8095                          memory mem_ptr,
8096                          rax_RegI oldval, rRegI newval,
8097                          rFlagsReg cr)
8098 %{
8099   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
8100   effect(KILL cr);
8101 
8102   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8103             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
8104   opcode(0x0F, 0xB1);
8105   ins_encode(lock_prefix,
8106              REX_reg_mem(newval, mem_ptr),
8107              OpcP, OpcS,
8108              reg_mem(newval, mem_ptr) // lock cmpxchg
8109              );
8110   ins_pipe( pipe_cmpxchg );
8111 %}
8112 
8113 instruct compareAndExchangeL(
8114                          memory mem_ptr,
8115                          rax_RegL oldval, rRegL newval,
8116                          rFlagsReg cr)
8117 %{
8118   predicate(VM_Version::supports_cx8());
8119   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
8120   effect(KILL cr);
8121 
8122   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8123             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
8124   opcode(0x0F, 0xB1);
8125   ins_encode(lock_prefix,
8126              REX_reg_mem_wide(newval, mem_ptr),
8127              OpcP, OpcS,
8128              reg_mem(newval, mem_ptr)  // lock cmpxchg
8129             );
8130   ins_pipe( pipe_cmpxchg );
8131 %}
8132 
8133 instruct compareAndExchangeN(
8134                           memory mem_ptr,
8135                           rax_RegN oldval, rRegN newval,
8136                           rFlagsReg cr) %{
8137   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
8138   effect(KILL cr);
8139 
8140   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8141             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
8142   opcode(0x0F, 0xB1);
8143   ins_encode(lock_prefix,
8144              REX_reg_mem(newval, mem_ptr),
8145              OpcP, OpcS,
8146              reg_mem(newval, mem_ptr)  // lock cmpxchg
8147           );
8148   ins_pipe( pipe_cmpxchg );
8149 %}
8150 
8151 instruct compareAndExchangeP(
8152                          memory mem_ptr,
8153                          rax_RegP oldval, rRegP newval,
8154                          rFlagsReg cr)
8155 %{
8156   predicate(VM_Version::supports_cx8());
8157   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
8158   effect(KILL cr);
8159 
8160   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8161             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
8162   opcode(0x0F, 0xB1);
8163   ins_encode(lock_prefix,
8164              REX_reg_mem_wide(newval, mem_ptr),
8165              OpcP, OpcS,
8166              reg_mem(newval, mem_ptr)  // lock cmpxchg
8167           );
8168   ins_pipe( pipe_cmpxchg );
8169 %}
8170 
8171 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
8172   predicate(n->as_LoadStore()->result_not_used());
8173   match(Set dummy (GetAndAddB mem add));
8174   effect(KILL cr);
8175   format %{ "ADDB  [$mem],$add" %}
8176   ins_encode %{
8177     __ lock();
8178     __ addb($mem$$Address, $add$$constant);
8179   %}
8180   ins_pipe( pipe_cmpxchg );
8181 %}
8182 
8183 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
8184   match(Set newval (GetAndAddB mem newval));
8185   effect(KILL cr);
8186   format %{ "XADDB  [$mem],$newval" %}
8187   ins_encode %{
8188     __ lock();
8189     __ xaddb($mem$$Address, $newval$$Register);
8190   %}
8191   ins_pipe( pipe_cmpxchg );
8192 %}
8193 
8194 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
8195   predicate(n->as_LoadStore()->result_not_used());
8196   match(Set dummy (GetAndAddS mem add));
8197   effect(KILL cr);
8198   format %{ "ADDW  [$mem],$add" %}
8199   ins_encode %{
8200     __ lock();
8201     __ addw($mem$$Address, $add$$constant);
8202   %}
8203   ins_pipe( pipe_cmpxchg );
8204 %}
8205 
8206 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
8207   match(Set newval (GetAndAddS mem newval));
8208   effect(KILL cr);
8209   format %{ "XADDW  [$mem],$newval" %}
8210   ins_encode %{
8211     __ lock();
8212     __ xaddw($mem$$Address, $newval$$Register);
8213   %}
8214   ins_pipe( pipe_cmpxchg );
8215 %}
8216 
8217 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
8218   predicate(n->as_LoadStore()->result_not_used());
8219   match(Set dummy (GetAndAddI mem add));
8220   effect(KILL cr);
8221   format %{ "ADDL  [$mem],$add" %}
8222   ins_encode %{
8223     __ lock();
8224     __ addl($mem$$Address, $add$$constant);
8225   %}
8226   ins_pipe( pipe_cmpxchg );
8227 %}
8228 
8229 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
8230   match(Set newval (GetAndAddI mem newval));
8231   effect(KILL cr);
8232   format %{ "XADDL  [$mem],$newval" %}
8233   ins_encode %{
8234     __ lock();
8235     __ xaddl($mem$$Address, $newval$$Register);
8236   %}
8237   ins_pipe( pipe_cmpxchg );
8238 %}
8239 
8240 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
8241   predicate(n->as_LoadStore()->result_not_used());
8242   match(Set dummy (GetAndAddL mem add));
8243   effect(KILL cr);
8244   format %{ "ADDQ  [$mem],$add" %}
8245   ins_encode %{
8246     __ lock();
8247     __ addq($mem$$Address, $add$$constant);
8248   %}
8249   ins_pipe( pipe_cmpxchg );
8250 %}
8251 
8252 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
8253   match(Set newval (GetAndAddL mem newval));
8254   effect(KILL cr);
8255   format %{ "XADDQ  [$mem],$newval" %}
8256   ins_encode %{
8257     __ lock();
8258     __ xaddq($mem$$Address, $newval$$Register);
8259   %}
8260   ins_pipe( pipe_cmpxchg );
8261 %}
8262 
8263 instruct xchgB( memory mem, rRegI newval) %{
8264   match(Set newval (GetAndSetB mem newval));
8265   format %{ "XCHGB  $newval,[$mem]" %}
8266   ins_encode %{
8267     __ xchgb($newval$$Register, $mem$$Address);
8268   %}
8269   ins_pipe( pipe_cmpxchg );
8270 %}
8271 
8272 instruct xchgS( memory mem, rRegI newval) %{
8273   match(Set newval (GetAndSetS mem newval));
8274   format %{ "XCHGW  $newval,[$mem]" %}
8275   ins_encode %{
8276     __ xchgw($newval$$Register, $mem$$Address);
8277   %}
8278   ins_pipe( pipe_cmpxchg );
8279 %}
8280 
8281 instruct xchgI( memory mem, rRegI newval) %{
8282   match(Set newval (GetAndSetI mem newval));
8283   format %{ "XCHGL  $newval,[$mem]" %}
8284   ins_encode %{
8285     __ xchgl($newval$$Register, $mem$$Address);
8286   %}
8287   ins_pipe( pipe_cmpxchg );
8288 %}
8289 
8290 instruct xchgL( memory mem, rRegL newval) %{
8291   match(Set newval (GetAndSetL mem newval));
8292   format %{ "XCHGL  $newval,[$mem]" %}
8293   ins_encode %{
8294     __ xchgq($newval$$Register, $mem$$Address);
8295   %}
8296   ins_pipe( pipe_cmpxchg );
8297 %}
8298 
8299 instruct xchgP( memory mem, rRegP newval) %{
8300   match(Set newval (GetAndSetP mem newval));
8301   format %{ "XCHGQ  $newval,[$mem]" %}
8302   ins_encode %{
8303     __ xchgq($newval$$Register, $mem$$Address);
8304   %}
8305   ins_pipe( pipe_cmpxchg );
8306 %}
8307 
8308 instruct xchgN( memory mem, rRegN newval) %{
8309   match(Set newval (GetAndSetN mem newval));
8310   format %{ "XCHGL  $newval,$mem]" %}
8311   ins_encode %{
8312     __ xchgl($newval$$Register, $mem$$Address);
8313   %}
8314   ins_pipe( pipe_cmpxchg );
8315 %}
8316 
8317 //----------Abs Instructions-------------------------------------------
8318 
8319 // Integer Absolute Instructions
8320 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
8321 %{
8322   match(Set dst (AbsI src));
8323   effect(TEMP dst, TEMP tmp, KILL cr);
8324   format %{ "movl $tmp, $src\n\t"
8325             "sarl $tmp, 31\n\t"
8326             "movl $dst, $src\n\t"
8327             "xorl $dst, $tmp\n\t"
8328             "subl $dst, $tmp\n"
8329           %}
8330   ins_encode %{
8331     __ movl($tmp$$Register, $src$$Register);
8332     __ sarl($tmp$$Register, 31);
8333     __ movl($dst$$Register, $src$$Register);
8334     __ xorl($dst$$Register, $tmp$$Register);
8335     __ subl($dst$$Register, $tmp$$Register);
8336   %}
8337 
8338   ins_pipe(ialu_reg_reg);
8339 %}
8340 
8341 // Long Absolute Instructions
8342 instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
8343 %{
8344   match(Set dst (AbsL src));
8345   effect(TEMP dst, TEMP tmp, KILL cr);
8346   format %{ "movq $tmp, $src\n\t"
8347             "sarq $tmp, 63\n\t"
8348             "movq $dst, $src\n\t"
8349             "xorq $dst, $tmp\n\t"
8350             "subq $dst, $tmp\n"
8351           %}
8352   ins_encode %{
8353     __ movq($tmp$$Register, $src$$Register);
8354     __ sarq($tmp$$Register, 63);
8355     __ movq($dst$$Register, $src$$Register);
8356     __ xorq($dst$$Register, $tmp$$Register);
8357     __ subq($dst$$Register, $tmp$$Register);
8358   %}
8359 
8360   ins_pipe(ialu_reg_reg);
8361 %}
8362 
8363 //----------Subtraction Instructions-------------------------------------------
8364 
8365 // Integer Subtraction Instructions
8366 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8367 %{
8368   match(Set dst (SubI dst src));
8369   effect(KILL cr);
8370 
8371   format %{ "subl    $dst, $src\t# int" %}
8372   opcode(0x2B);
8373   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8374   ins_pipe(ialu_reg_reg);
8375 %}
8376 
8377 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8378 %{
8379   match(Set dst (SubI dst src));
8380   effect(KILL cr);
8381 
8382   format %{ "subl    $dst, $src\t# int" %}
8383   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8384   ins_encode(OpcSErm(dst, src), Con8or32(src));
8385   ins_pipe(ialu_reg);
8386 %}
8387 
8388 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8389 %{
8390   match(Set dst (SubI dst (LoadI src)));
8391   effect(KILL cr);
8392 
8393   ins_cost(125);
8394   format %{ "subl    $dst, $src\t# int" %}
8395   opcode(0x2B);
8396   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8397   ins_pipe(ialu_reg_mem);
8398 %}
8399 
8400 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8401 %{
8402   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8403   effect(KILL cr);
8404 
8405   ins_cost(150);
8406   format %{ "subl    $dst, $src\t# int" %}
8407   opcode(0x29); /* Opcode 29 /r */
8408   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8409   ins_pipe(ialu_mem_reg);
8410 %}
8411 
8412 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8413 %{
8414   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8415   effect(KILL cr);
8416 
8417   ins_cost(125); // XXX
8418   format %{ "subl    $dst, $src\t# int" %}
8419   opcode(0x81); /* Opcode 81 /5 id */
8420   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8421   ins_pipe(ialu_mem_imm);
8422 %}
8423 
8424 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8425 %{
8426   match(Set dst (SubL dst src));
8427   effect(KILL cr);
8428 
8429   format %{ "subq    $dst, $src\t# long" %}
8430   opcode(0x2B);
8431   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8432   ins_pipe(ialu_reg_reg);
8433 %}
8434 
8435 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8436 %{
8437   match(Set dst (SubL dst src));
8438   effect(KILL cr);
8439 
8440   format %{ "subq    $dst, $src\t# long" %}
8441   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8442   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8443   ins_pipe(ialu_reg);
8444 %}
8445 
8446 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8447 %{
8448   match(Set dst (SubL dst (LoadL src)));
8449   effect(KILL cr);
8450 
8451   ins_cost(125);
8452   format %{ "subq    $dst, $src\t# long" %}
8453   opcode(0x2B);
8454   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8455   ins_pipe(ialu_reg_mem);
8456 %}
8457 
8458 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8459 %{
8460   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8461   effect(KILL cr);
8462 
8463   ins_cost(150);
8464   format %{ "subq    $dst, $src\t# long" %}
8465   opcode(0x29); /* Opcode 29 /r */
8466   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8467   ins_pipe(ialu_mem_reg);
8468 %}
8469 
8470 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8471 %{
8472   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8473   effect(KILL cr);
8474 
8475   ins_cost(125); // XXX
8476   format %{ "subq    $dst, $src\t# long" %}
8477   opcode(0x81); /* Opcode 81 /5 id */
8478   ins_encode(REX_mem_wide(dst),
8479              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8480   ins_pipe(ialu_mem_imm);
8481 %}
8482 
8483 // Subtract from a pointer
8484 // XXX hmpf???
8485 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8486 %{
8487   match(Set dst (AddP dst (SubI zero src)));
8488   effect(KILL cr);
8489 
8490   format %{ "subq    $dst, $src\t# ptr - int" %}
8491   opcode(0x2B);
8492   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8493   ins_pipe(ialu_reg_reg);
8494 %}
8495 
8496 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8497 %{
8498   match(Set dst (SubI zero dst));
8499   effect(KILL cr);
8500 
8501   format %{ "negl    $dst\t# int" %}
8502   opcode(0xF7, 0x03);  // Opcode F7 /3
8503   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8504   ins_pipe(ialu_reg);
8505 %}
8506 
8507 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8508 %{
8509   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8510   effect(KILL cr);
8511 
8512   format %{ "negl    $dst\t# int" %}
8513   opcode(0xF7, 0x03);  // Opcode F7 /3
8514   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8515   ins_pipe(ialu_reg);
8516 %}
8517 
8518 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8519 %{
8520   match(Set dst (SubL zero dst));
8521   effect(KILL cr);
8522 
8523   format %{ "negq    $dst\t# long" %}
8524   opcode(0xF7, 0x03);  // Opcode F7 /3
8525   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8526   ins_pipe(ialu_reg);
8527 %}
8528 
8529 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8530 %{
8531   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8532   effect(KILL cr);
8533 
8534   format %{ "negq    $dst\t# long" %}
8535   opcode(0xF7, 0x03);  // Opcode F7 /3
8536   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8537   ins_pipe(ialu_reg);
8538 %}
8539 
8540 //----------Multiplication/Division Instructions-------------------------------
8541 // Integer Multiplication Instructions
8542 // Multiply Register
8543 
8544 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8545 %{
8546   match(Set dst (MulI dst src));
8547   effect(KILL cr);
8548 
8549   ins_cost(300);
8550   format %{ "imull   $dst, $src\t# int" %}
8551   opcode(0x0F, 0xAF);
8552   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8553   ins_pipe(ialu_reg_reg_alu0);
8554 %}
8555 
8556 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8557 %{
8558   match(Set dst (MulI src imm));
8559   effect(KILL cr);
8560 
8561   ins_cost(300);
8562   format %{ "imull   $dst, $src, $imm\t# int" %}
8563   opcode(0x69); /* 69 /r id */
8564   ins_encode(REX_reg_reg(dst, src),
8565              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8566   ins_pipe(ialu_reg_reg_alu0);
8567 %}
8568 
8569 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8570 %{
8571   match(Set dst (MulI dst (LoadI src)));
8572   effect(KILL cr);
8573 
8574   ins_cost(350);
8575   format %{ "imull   $dst, $src\t# int" %}
8576   opcode(0x0F, 0xAF);
8577   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8578   ins_pipe(ialu_reg_mem_alu0);
8579 %}
8580 
8581 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8582 %{
8583   match(Set dst (MulI (LoadI src) imm));
8584   effect(KILL cr);
8585 
8586   ins_cost(300);
8587   format %{ "imull   $dst, $src, $imm\t# int" %}
8588   opcode(0x69); /* 69 /r id */
8589   ins_encode(REX_reg_mem(dst, src),
8590              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8591   ins_pipe(ialu_reg_mem_alu0);
8592 %}
8593 
8594 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
8595 %{
8596   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
8597   effect(KILL cr, KILL src2);
8598 
8599   expand %{ mulI_rReg(dst, src1, cr);
8600            mulI_rReg(src2, src3, cr);
8601            addI_rReg(dst, src2, cr); %}
8602 %}
8603 
8604 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8605 %{
8606   match(Set dst (MulL dst src));
8607   effect(KILL cr);
8608 
8609   ins_cost(300);
8610   format %{ "imulq   $dst, $src\t# long" %}
8611   opcode(0x0F, 0xAF);
8612   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8613   ins_pipe(ialu_reg_reg_alu0);
8614 %}
8615 
8616 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8617 %{
8618   match(Set dst (MulL src imm));
8619   effect(KILL cr);
8620 
8621   ins_cost(300);
8622   format %{ "imulq   $dst, $src, $imm\t# long" %}
8623   opcode(0x69); /* 69 /r id */
8624   ins_encode(REX_reg_reg_wide(dst, src),
8625              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8626   ins_pipe(ialu_reg_reg_alu0);
8627 %}
8628 
8629 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8630 %{
8631   match(Set dst (MulL dst (LoadL src)));
8632   effect(KILL cr);
8633 
8634   ins_cost(350);
8635   format %{ "imulq   $dst, $src\t# long" %}
8636   opcode(0x0F, 0xAF);
8637   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8638   ins_pipe(ialu_reg_mem_alu0);
8639 %}
8640 
8641 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8642 %{
8643   match(Set dst (MulL (LoadL src) imm));
8644   effect(KILL cr);
8645 
8646   ins_cost(300);
8647   format %{ "imulq   $dst, $src, $imm\t# long" %}
8648   opcode(0x69); /* 69 /r id */
8649   ins_encode(REX_reg_mem_wide(dst, src),
8650              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8651   ins_pipe(ialu_reg_mem_alu0);
8652 %}
8653 
8654 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8655 %{
8656   match(Set dst (MulHiL src rax));
8657   effect(USE_KILL rax, KILL cr);
8658 
8659   ins_cost(300);
8660   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8661   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8662   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8663   ins_pipe(ialu_reg_reg_alu0);
8664 %}
8665 
8666 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8667                    rFlagsReg cr)
8668 %{
8669   match(Set rax (DivI rax div));
8670   effect(KILL rdx, KILL cr);
8671 
8672   ins_cost(30*100+10*100); // XXX
8673   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8674             "jne,s   normal\n\t"
8675             "xorl    rdx, rdx\n\t"
8676             "cmpl    $div, -1\n\t"
8677             "je,s    done\n"
8678     "normal: cdql\n\t"
8679             "idivl   $div\n"
8680     "done:"        %}
8681   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8682   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8683   ins_pipe(ialu_reg_reg_alu0);
8684 %}
8685 
8686 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8687                    rFlagsReg cr)
8688 %{
8689   match(Set rax (DivL rax div));
8690   effect(KILL rdx, KILL cr);
8691 
8692   ins_cost(30*100+10*100); // XXX
8693   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8694             "cmpq    rax, rdx\n\t"
8695             "jne,s   normal\n\t"
8696             "xorl    rdx, rdx\n\t"
8697             "cmpq    $div, -1\n\t"
8698             "je,s    done\n"
8699     "normal: cdqq\n\t"
8700             "idivq   $div\n"
8701     "done:"        %}
8702   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8703   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8704   ins_pipe(ialu_reg_reg_alu0);
8705 %}
8706 
8707 // Integer DIVMOD with Register, both quotient and mod results
8708 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8709                              rFlagsReg cr)
8710 %{
8711   match(DivModI rax div);
8712   effect(KILL cr);
8713 
8714   ins_cost(30*100+10*100); // XXX
8715   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8716             "jne,s   normal\n\t"
8717             "xorl    rdx, rdx\n\t"
8718             "cmpl    $div, -1\n\t"
8719             "je,s    done\n"
8720     "normal: cdql\n\t"
8721             "idivl   $div\n"
8722     "done:"        %}
8723   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8724   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8725   ins_pipe(pipe_slow);
8726 %}
8727 
8728 // Long DIVMOD with Register, both quotient and mod results
8729 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8730                              rFlagsReg cr)
8731 %{
8732   match(DivModL rax div);
8733   effect(KILL cr);
8734 
8735   ins_cost(30*100+10*100); // XXX
8736   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8737             "cmpq    rax, rdx\n\t"
8738             "jne,s   normal\n\t"
8739             "xorl    rdx, rdx\n\t"
8740             "cmpq    $div, -1\n\t"
8741             "je,s    done\n"
8742     "normal: cdqq\n\t"
8743             "idivq   $div\n"
8744     "done:"        %}
8745   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8746   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8747   ins_pipe(pipe_slow);
8748 %}
8749 
8750 //----------- DivL-By-Constant-Expansions--------------------------------------
8751 // DivI cases are handled by the compiler
8752 
8753 // Magic constant, reciprocal of 10
8754 instruct loadConL_0x6666666666666667(rRegL dst)
8755 %{
8756   effect(DEF dst);
8757 
8758   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8759   ins_encode(load_immL(dst, 0x6666666666666667));
8760   ins_pipe(ialu_reg);
8761 %}
8762 
8763 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8764 %{
8765   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8766 
8767   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8768   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8769   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8770   ins_pipe(ialu_reg_reg_alu0);
8771 %}
8772 
8773 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8774 %{
8775   effect(USE_DEF dst, KILL cr);
8776 
8777   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8778   opcode(0xC1, 0x7); /* C1 /7 ib */
8779   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8780   ins_pipe(ialu_reg);
8781 %}
8782 
8783 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8784 %{
8785   effect(USE_DEF dst, KILL cr);
8786 
8787   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8788   opcode(0xC1, 0x7); /* C1 /7 ib */
8789   ins_encode(reg_opc_imm_wide(dst, 0x2));
8790   ins_pipe(ialu_reg);
8791 %}
8792 
8793 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8794 %{
8795   match(Set dst (DivL src div));
8796 
8797   ins_cost((5+8)*100);
8798   expand %{
8799     rax_RegL rax;                     // Killed temp
8800     rFlagsReg cr;                     // Killed
8801     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8802     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8803     sarL_rReg_63(src, cr);            // sarq  src, 63
8804     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8805     subL_rReg(dst, src, cr);          // subl  rdx, src
8806   %}
8807 %}
8808 
8809 //-----------------------------------------------------------------------------
8810 
8811 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8812                    rFlagsReg cr)
8813 %{
8814   match(Set rdx (ModI rax div));
8815   effect(KILL rax, KILL cr);
8816 
8817   ins_cost(300); // XXX
8818   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8819             "jne,s   normal\n\t"
8820             "xorl    rdx, rdx\n\t"
8821             "cmpl    $div, -1\n\t"
8822             "je,s    done\n"
8823     "normal: cdql\n\t"
8824             "idivl   $div\n"
8825     "done:"        %}
8826   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8827   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8828   ins_pipe(ialu_reg_reg_alu0);
8829 %}
8830 
8831 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8832                    rFlagsReg cr)
8833 %{
8834   match(Set rdx (ModL rax div));
8835   effect(KILL rax, KILL cr);
8836 
8837   ins_cost(300); // XXX
8838   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8839             "cmpq    rax, rdx\n\t"
8840             "jne,s   normal\n\t"
8841             "xorl    rdx, rdx\n\t"
8842             "cmpq    $div, -1\n\t"
8843             "je,s    done\n"
8844     "normal: cdqq\n\t"
8845             "idivq   $div\n"
8846     "done:"        %}
8847   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8848   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8849   ins_pipe(ialu_reg_reg_alu0);
8850 %}
8851 
8852 // Integer Shift Instructions
8853 // Shift Left by one
8854 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8855 %{
8856   match(Set dst (LShiftI dst shift));
8857   effect(KILL cr);
8858 
8859   format %{ "sall    $dst, $shift" %}
8860   opcode(0xD1, 0x4); /* D1 /4 */
8861   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8862   ins_pipe(ialu_reg);
8863 %}
8864 
8865 // Shift Left by one
8866 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8867 %{
8868   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8869   effect(KILL cr);
8870 
8871   format %{ "sall    $dst, $shift\t" %}
8872   opcode(0xD1, 0x4); /* D1 /4 */
8873   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8874   ins_pipe(ialu_mem_imm);
8875 %}
8876 
8877 // Shift Left by 8-bit immediate
8878 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8879 %{
8880   match(Set dst (LShiftI dst shift));
8881   effect(KILL cr);
8882 
8883   format %{ "sall    $dst, $shift" %}
8884   opcode(0xC1, 0x4); /* C1 /4 ib */
8885   ins_encode(reg_opc_imm(dst, shift));
8886   ins_pipe(ialu_reg);
8887 %}
8888 
8889 // Shift Left by 8-bit immediate
8890 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8891 %{
8892   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8893   effect(KILL cr);
8894 
8895   format %{ "sall    $dst, $shift" %}
8896   opcode(0xC1, 0x4); /* C1 /4 ib */
8897   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8898   ins_pipe(ialu_mem_imm);
8899 %}
8900 
8901 // Shift Left by variable
8902 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8903 %{
8904   match(Set dst (LShiftI dst shift));
8905   effect(KILL cr);
8906 
8907   format %{ "sall    $dst, $shift" %}
8908   opcode(0xD3, 0x4); /* D3 /4 */
8909   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8910   ins_pipe(ialu_reg_reg);
8911 %}
8912 
8913 // Shift Left by variable
8914 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8915 %{
8916   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8917   effect(KILL cr);
8918 
8919   format %{ "sall    $dst, $shift" %}
8920   opcode(0xD3, 0x4); /* D3 /4 */
8921   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8922   ins_pipe(ialu_mem_reg);
8923 %}
8924 
8925 // Arithmetic shift right by one
8926 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8927 %{
8928   match(Set dst (RShiftI dst shift));
8929   effect(KILL cr);
8930 
8931   format %{ "sarl    $dst, $shift" %}
8932   opcode(0xD1, 0x7); /* D1 /7 */
8933   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8934   ins_pipe(ialu_reg);
8935 %}
8936 
8937 // Arithmetic shift right by one
8938 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8939 %{
8940   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8941   effect(KILL cr);
8942 
8943   format %{ "sarl    $dst, $shift" %}
8944   opcode(0xD1, 0x7); /* D1 /7 */
8945   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8946   ins_pipe(ialu_mem_imm);
8947 %}
8948 
8949 // Arithmetic Shift Right by 8-bit immediate
8950 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8951 %{
8952   match(Set dst (RShiftI dst shift));
8953   effect(KILL cr);
8954 
8955   format %{ "sarl    $dst, $shift" %}
8956   opcode(0xC1, 0x7); /* C1 /7 ib */
8957   ins_encode(reg_opc_imm(dst, shift));
8958   ins_pipe(ialu_mem_imm);
8959 %}
8960 
8961 // Arithmetic Shift Right by 8-bit immediate
8962 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8963 %{
8964   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8965   effect(KILL cr);
8966 
8967   format %{ "sarl    $dst, $shift" %}
8968   opcode(0xC1, 0x7); /* C1 /7 ib */
8969   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8970   ins_pipe(ialu_mem_imm);
8971 %}
8972 
8973 // Arithmetic Shift Right by variable
8974 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8975 %{
8976   match(Set dst (RShiftI dst shift));
8977   effect(KILL cr);
8978 
8979   format %{ "sarl    $dst, $shift" %}
8980   opcode(0xD3, 0x7); /* D3 /7 */
8981   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8982   ins_pipe(ialu_reg_reg);
8983 %}
8984 
8985 // Arithmetic Shift Right by variable
8986 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8987 %{
8988   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8989   effect(KILL cr);
8990 
8991   format %{ "sarl    $dst, $shift" %}
8992   opcode(0xD3, 0x7); /* D3 /7 */
8993   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8994   ins_pipe(ialu_mem_reg);
8995 %}
8996 
8997 // Logical shift right by one
8998 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8999 %{
9000   match(Set dst (URShiftI dst shift));
9001   effect(KILL cr);
9002 
9003   format %{ "shrl    $dst, $shift" %}
9004   opcode(0xD1, 0x5); /* D1 /5 */
9005   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9006   ins_pipe(ialu_reg);
9007 %}
9008 
9009 // Logical shift right by one
9010 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9011 %{
9012   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9013   effect(KILL cr);
9014 
9015   format %{ "shrl    $dst, $shift" %}
9016   opcode(0xD1, 0x5); /* D1 /5 */
9017   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9018   ins_pipe(ialu_mem_imm);
9019 %}
9020 
9021 // Logical Shift Right by 8-bit immediate
9022 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9023 %{
9024   match(Set dst (URShiftI dst shift));
9025   effect(KILL cr);
9026 
9027   format %{ "shrl    $dst, $shift" %}
9028   opcode(0xC1, 0x5); /* C1 /5 ib */
9029   ins_encode(reg_opc_imm(dst, shift));
9030   ins_pipe(ialu_reg);
9031 %}
9032 
9033 // Logical Shift Right by 8-bit immediate
9034 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9035 %{
9036   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9037   effect(KILL cr);
9038 
9039   format %{ "shrl    $dst, $shift" %}
9040   opcode(0xC1, 0x5); /* C1 /5 ib */
9041   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9042   ins_pipe(ialu_mem_imm);
9043 %}
9044 
9045 // Logical Shift Right by variable
9046 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9047 %{
9048   match(Set dst (URShiftI dst shift));
9049   effect(KILL cr);
9050 
9051   format %{ "shrl    $dst, $shift" %}
9052   opcode(0xD3, 0x5); /* D3 /5 */
9053   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9054   ins_pipe(ialu_reg_reg);
9055 %}
9056 
9057 // Logical Shift Right by variable
9058 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9059 %{
9060   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9061   effect(KILL cr);
9062 
9063   format %{ "shrl    $dst, $shift" %}
9064   opcode(0xD3, 0x5); /* D3 /5 */
9065   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9066   ins_pipe(ialu_mem_reg);
9067 %}
9068 
9069 // Long Shift Instructions
9070 // Shift Left by one
9071 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9072 %{
9073   match(Set dst (LShiftL dst shift));
9074   effect(KILL cr);
9075 
9076   format %{ "salq    $dst, $shift" %}
9077   opcode(0xD1, 0x4); /* D1 /4 */
9078   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9079   ins_pipe(ialu_reg);
9080 %}
9081 
9082 // Shift Left by one
9083 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9084 %{
9085   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9086   effect(KILL cr);
9087 
9088   format %{ "salq    $dst, $shift" %}
9089   opcode(0xD1, 0x4); /* D1 /4 */
9090   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9091   ins_pipe(ialu_mem_imm);
9092 %}
9093 
9094 // Shift Left by 8-bit immediate
9095 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9096 %{
9097   match(Set dst (LShiftL dst shift));
9098   effect(KILL cr);
9099 
9100   format %{ "salq    $dst, $shift" %}
9101   opcode(0xC1, 0x4); /* C1 /4 ib */
9102   ins_encode(reg_opc_imm_wide(dst, shift));
9103   ins_pipe(ialu_reg);
9104 %}
9105 
9106 // Shift Left by 8-bit immediate
9107 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9108 %{
9109   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9110   effect(KILL cr);
9111 
9112   format %{ "salq    $dst, $shift" %}
9113   opcode(0xC1, 0x4); /* C1 /4 ib */
9114   ins_encode(REX_mem_wide(dst), OpcP,
9115              RM_opc_mem(secondary, dst), Con8or32(shift));
9116   ins_pipe(ialu_mem_imm);
9117 %}
9118 
9119 // Shift Left by variable
9120 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9121 %{
9122   match(Set dst (LShiftL dst shift));
9123   effect(KILL cr);
9124 
9125   format %{ "salq    $dst, $shift" %}
9126   opcode(0xD3, 0x4); /* D3 /4 */
9127   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9128   ins_pipe(ialu_reg_reg);
9129 %}
9130 
9131 // Shift Left by variable
9132 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9133 %{
9134   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9135   effect(KILL cr);
9136 
9137   format %{ "salq    $dst, $shift" %}
9138   opcode(0xD3, 0x4); /* D3 /4 */
9139   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9140   ins_pipe(ialu_mem_reg);
9141 %}
9142 
9143 // Arithmetic shift right by one
9144 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9145 %{
9146   match(Set dst (RShiftL dst shift));
9147   effect(KILL cr);
9148 
9149   format %{ "sarq    $dst, $shift" %}
9150   opcode(0xD1, 0x7); /* D1 /7 */
9151   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9152   ins_pipe(ialu_reg);
9153 %}
9154 
9155 // Arithmetic shift right by one
9156 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9157 %{
9158   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9159   effect(KILL cr);
9160 
9161   format %{ "sarq    $dst, $shift" %}
9162   opcode(0xD1, 0x7); /* D1 /7 */
9163   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9164   ins_pipe(ialu_mem_imm);
9165 %}
9166 
9167 // Arithmetic Shift Right by 8-bit immediate
9168 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9169 %{
9170   match(Set dst (RShiftL dst shift));
9171   effect(KILL cr);
9172 
9173   format %{ "sarq    $dst, $shift" %}
9174   opcode(0xC1, 0x7); /* C1 /7 ib */
9175   ins_encode(reg_opc_imm_wide(dst, shift));
9176   ins_pipe(ialu_mem_imm);
9177 %}
9178 
9179 // Arithmetic Shift Right by 8-bit immediate
9180 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9181 %{
9182   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9183   effect(KILL cr);
9184 
9185   format %{ "sarq    $dst, $shift" %}
9186   opcode(0xC1, 0x7); /* C1 /7 ib */
9187   ins_encode(REX_mem_wide(dst), OpcP,
9188              RM_opc_mem(secondary, dst), Con8or32(shift));
9189   ins_pipe(ialu_mem_imm);
9190 %}
9191 
9192 // Arithmetic Shift Right by variable
9193 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9194 %{
9195   match(Set dst (RShiftL dst shift));
9196   effect(KILL cr);
9197 
9198   format %{ "sarq    $dst, $shift" %}
9199   opcode(0xD3, 0x7); /* D3 /7 */
9200   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9201   ins_pipe(ialu_reg_reg);
9202 %}
9203 
9204 // Arithmetic Shift Right by variable
9205 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9206 %{
9207   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9208   effect(KILL cr);
9209 
9210   format %{ "sarq    $dst, $shift" %}
9211   opcode(0xD3, 0x7); /* D3 /7 */
9212   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9213   ins_pipe(ialu_mem_reg);
9214 %}
9215 
9216 // Logical shift right by one
9217 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9218 %{
9219   match(Set dst (URShiftL dst shift));
9220   effect(KILL cr);
9221 
9222   format %{ "shrq    $dst, $shift" %}
9223   opcode(0xD1, 0x5); /* D1 /5 */
9224   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9225   ins_pipe(ialu_reg);
9226 %}
9227 
9228 // Logical shift right by one
9229 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9230 %{
9231   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9232   effect(KILL cr);
9233 
9234   format %{ "shrq    $dst, $shift" %}
9235   opcode(0xD1, 0x5); /* D1 /5 */
9236   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9237   ins_pipe(ialu_mem_imm);
9238 %}
9239 
9240 // Logical Shift Right by 8-bit immediate
9241 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9242 %{
9243   match(Set dst (URShiftL dst shift));
9244   effect(KILL cr);
9245 
9246   format %{ "shrq    $dst, $shift" %}
9247   opcode(0xC1, 0x5); /* C1 /5 ib */
9248   ins_encode(reg_opc_imm_wide(dst, shift));
9249   ins_pipe(ialu_reg);
9250 %}
9251 
9252 
9253 // Logical Shift Right by 8-bit immediate
9254 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9255 %{
9256   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9257   effect(KILL cr);
9258 
9259   format %{ "shrq    $dst, $shift" %}
9260   opcode(0xC1, 0x5); /* C1 /5 ib */
9261   ins_encode(REX_mem_wide(dst), OpcP,
9262              RM_opc_mem(secondary, dst), Con8or32(shift));
9263   ins_pipe(ialu_mem_imm);
9264 %}
9265 
9266 // Logical Shift Right by variable
9267 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9268 %{
9269   match(Set dst (URShiftL dst shift));
9270   effect(KILL cr);
9271 
9272   format %{ "shrq    $dst, $shift" %}
9273   opcode(0xD3, 0x5); /* D3 /5 */
9274   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9275   ins_pipe(ialu_reg_reg);
9276 %}
9277 
9278 // Logical Shift Right by variable
9279 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9280 %{
9281   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9282   effect(KILL cr);
9283 
9284   format %{ "shrq    $dst, $shift" %}
9285   opcode(0xD3, 0x5); /* D3 /5 */
9286   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9287   ins_pipe(ialu_mem_reg);
9288 %}
9289 
9290 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9291 // This idiom is used by the compiler for the i2b bytecode.
9292 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9293 %{
9294   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9295 
9296   format %{ "movsbl  $dst, $src\t# i2b" %}
9297   opcode(0x0F, 0xBE);
9298   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9299   ins_pipe(ialu_reg_reg);
9300 %}
9301 
9302 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9303 // This idiom is used by the compiler the i2s bytecode.
9304 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9305 %{
9306   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9307 
9308   format %{ "movswl  $dst, $src\t# i2s" %}
9309   opcode(0x0F, 0xBF);
9310   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9311   ins_pipe(ialu_reg_reg);
9312 %}
9313 
9314 // ROL/ROR instructions
9315 
9316 // ROL expand
9317 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9318   effect(KILL cr, USE_DEF dst);
9319 
9320   format %{ "roll    $dst" %}
9321   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9322   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9323   ins_pipe(ialu_reg);
9324 %}
9325 
9326 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9327   effect(USE_DEF dst, USE shift, KILL cr);
9328 
9329   format %{ "roll    $dst, $shift" %}
9330   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9331   ins_encode( reg_opc_imm(dst, shift) );
9332   ins_pipe(ialu_reg);
9333 %}
9334 
9335 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9336 %{
9337   effect(USE_DEF dst, USE shift, KILL cr);
9338 
9339   format %{ "roll    $dst, $shift" %}
9340   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9341   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9342   ins_pipe(ialu_reg_reg);
9343 %}
9344 // end of ROL expand
9345 
9346 // Rotate Left by one
9347 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9348 %{
9349   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9350 
9351   expand %{
9352     rolI_rReg_imm1(dst, cr);
9353   %}
9354 %}
9355 
9356 // Rotate Left by 8-bit immediate
9357 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9358 %{
9359   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9360   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9361 
9362   expand %{
9363     rolI_rReg_imm8(dst, lshift, cr);
9364   %}
9365 %}
9366 
9367 // Rotate Left by variable
9368 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9369 %{
9370   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9371 
9372   expand %{
9373     rolI_rReg_CL(dst, shift, cr);
9374   %}
9375 %}
9376 
9377 // Rotate Left by variable
9378 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9379 %{
9380   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9381 
9382   expand %{
9383     rolI_rReg_CL(dst, shift, cr);
9384   %}
9385 %}
9386 
9387 // ROR expand
9388 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9389 %{
9390   effect(USE_DEF dst, KILL cr);
9391 
9392   format %{ "rorl    $dst" %}
9393   opcode(0xD1, 0x1); /* D1 /1 */
9394   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9395   ins_pipe(ialu_reg);
9396 %}
9397 
9398 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9399 %{
9400   effect(USE_DEF dst, USE shift, KILL cr);
9401 
9402   format %{ "rorl    $dst, $shift" %}
9403   opcode(0xC1, 0x1); /* C1 /1 ib */
9404   ins_encode(reg_opc_imm(dst, shift));
9405   ins_pipe(ialu_reg);
9406 %}
9407 
9408 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9409 %{
9410   effect(USE_DEF dst, USE shift, KILL cr);
9411 
9412   format %{ "rorl    $dst, $shift" %}
9413   opcode(0xD3, 0x1); /* D3 /1 */
9414   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9415   ins_pipe(ialu_reg_reg);
9416 %}
9417 // end of ROR expand
9418 
9419 // Rotate Right by one
9420 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9421 %{
9422   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9423 
9424   expand %{
9425     rorI_rReg_imm1(dst, cr);
9426   %}
9427 %}
9428 
9429 // Rotate Right by 8-bit immediate
9430 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9431 %{
9432   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9433   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9434 
9435   expand %{
9436     rorI_rReg_imm8(dst, rshift, cr);
9437   %}
9438 %}
9439 
9440 // Rotate Right by variable
9441 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9442 %{
9443   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9444 
9445   expand %{
9446     rorI_rReg_CL(dst, shift, cr);
9447   %}
9448 %}
9449 
9450 // Rotate Right by variable
9451 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9452 %{
9453   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9454 
9455   expand %{
9456     rorI_rReg_CL(dst, shift, cr);
9457   %}
9458 %}
9459 
9460 // for long rotate
9461 // ROL expand
9462 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9463   effect(USE_DEF dst, KILL cr);
9464 
9465   format %{ "rolq    $dst" %}
9466   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9467   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9468   ins_pipe(ialu_reg);
9469 %}
9470 
9471 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9472   effect(USE_DEF dst, USE shift, KILL cr);
9473 
9474   format %{ "rolq    $dst, $shift" %}
9475   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9476   ins_encode( reg_opc_imm_wide(dst, shift) );
9477   ins_pipe(ialu_reg);
9478 %}
9479 
9480 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9481 %{
9482   effect(USE_DEF dst, USE shift, KILL cr);
9483 
9484   format %{ "rolq    $dst, $shift" %}
9485   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9486   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9487   ins_pipe(ialu_reg_reg);
9488 %}
9489 // end of ROL expand
9490 
9491 // Rotate Left by one
9492 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9493 %{
9494   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9495 
9496   expand %{
9497     rolL_rReg_imm1(dst, cr);
9498   %}
9499 %}
9500 
9501 // Rotate Left by 8-bit immediate
9502 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9503 %{
9504   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9505   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9506 
9507   expand %{
9508     rolL_rReg_imm8(dst, lshift, cr);
9509   %}
9510 %}
9511 
9512 // Rotate Left by variable
9513 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9514 %{
9515   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9516 
9517   expand %{
9518     rolL_rReg_CL(dst, shift, cr);
9519   %}
9520 %}
9521 
9522 // Rotate Left by variable
9523 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9524 %{
9525   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9526 
9527   expand %{
9528     rolL_rReg_CL(dst, shift, cr);
9529   %}
9530 %}
9531 
9532 // ROR expand
9533 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9534 %{
9535   effect(USE_DEF dst, KILL cr);
9536 
9537   format %{ "rorq    $dst" %}
9538   opcode(0xD1, 0x1); /* D1 /1 */
9539   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9540   ins_pipe(ialu_reg);
9541 %}
9542 
9543 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9544 %{
9545   effect(USE_DEF dst, USE shift, KILL cr);
9546 
9547   format %{ "rorq    $dst, $shift" %}
9548   opcode(0xC1, 0x1); /* C1 /1 ib */
9549   ins_encode(reg_opc_imm_wide(dst, shift));
9550   ins_pipe(ialu_reg);
9551 %}
9552 
9553 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9554 %{
9555   effect(USE_DEF dst, USE shift, KILL cr);
9556 
9557   format %{ "rorq    $dst, $shift" %}
9558   opcode(0xD3, 0x1); /* D3 /1 */
9559   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9560   ins_pipe(ialu_reg_reg);
9561 %}
9562 // end of ROR expand
9563 
9564 // Rotate Right by one
9565 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9566 %{
9567   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9568 
9569   expand %{
9570     rorL_rReg_imm1(dst, cr);
9571   %}
9572 %}
9573 
9574 // Rotate Right by 8-bit immediate
9575 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9576 %{
9577   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9578   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9579 
9580   expand %{
9581     rorL_rReg_imm8(dst, rshift, cr);
9582   %}
9583 %}
9584 
9585 // Rotate Right by variable
9586 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9587 %{
9588   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9589 
9590   expand %{
9591     rorL_rReg_CL(dst, shift, cr);
9592   %}
9593 %}
9594 
9595 // Rotate Right by variable
9596 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9597 %{
9598   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9599 
9600   expand %{
9601     rorL_rReg_CL(dst, shift, cr);
9602   %}
9603 %}
9604 
9605 // Logical Instructions
9606 
9607 // Integer Logical Instructions
9608 
9609 // And Instructions
9610 // And Register with Register
9611 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9612 %{
9613   match(Set dst (AndI dst src));
9614   effect(KILL cr);
9615 
9616   format %{ "andl    $dst, $src\t# int" %}
9617   opcode(0x23);
9618   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9619   ins_pipe(ialu_reg_reg);
9620 %}
9621 
9622 // And Register with Immediate 255
9623 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9624 %{
9625   match(Set dst (AndI dst src));
9626 
9627   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9628   opcode(0x0F, 0xB6);
9629   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9630   ins_pipe(ialu_reg);
9631 %}
9632 
9633 // And Register with Immediate 255 and promote to long
9634 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9635 %{
9636   match(Set dst (ConvI2L (AndI src mask)));
9637 
9638   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9639   opcode(0x0F, 0xB6);
9640   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9641   ins_pipe(ialu_reg);
9642 %}
9643 
9644 // And Register with Immediate 65535
9645 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9646 %{
9647   match(Set dst (AndI dst src));
9648 
9649   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9650   opcode(0x0F, 0xB7);
9651   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9652   ins_pipe(ialu_reg);
9653 %}
9654 
9655 // And Register with Immediate 65535 and promote to long
9656 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9657 %{
9658   match(Set dst (ConvI2L (AndI src mask)));
9659 
9660   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9661   opcode(0x0F, 0xB7);
9662   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9663   ins_pipe(ialu_reg);
9664 %}
9665 
9666 // And Register with Immediate
9667 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9668 %{
9669   match(Set dst (AndI dst src));
9670   effect(KILL cr);
9671 
9672   format %{ "andl    $dst, $src\t# int" %}
9673   opcode(0x81, 0x04); /* Opcode 81 /4 */
9674   ins_encode(OpcSErm(dst, src), Con8or32(src));
9675   ins_pipe(ialu_reg);
9676 %}
9677 
9678 // And Register with Memory
9679 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9680 %{
9681   match(Set dst (AndI dst (LoadI src)));
9682   effect(KILL cr);
9683 
9684   ins_cost(125);
9685   format %{ "andl    $dst, $src\t# int" %}
9686   opcode(0x23);
9687   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9688   ins_pipe(ialu_reg_mem);
9689 %}
9690 
9691 // And Memory with Register
9692 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9693 %{
9694   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
9695   effect(KILL cr);
9696 
9697   ins_cost(150);
9698   format %{ "andb    $dst, $src\t# byte" %}
9699   opcode(0x20);
9700   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9701   ins_pipe(ialu_mem_reg);
9702 %}
9703 
9704 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9705 %{
9706   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9707   effect(KILL cr);
9708 
9709   ins_cost(150);
9710   format %{ "andl    $dst, $src\t# int" %}
9711   opcode(0x21); /* Opcode 21 /r */
9712   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9713   ins_pipe(ialu_mem_reg);
9714 %}
9715 
9716 // And Memory with Immediate
9717 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9718 %{
9719   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9720   effect(KILL cr);
9721 
9722   ins_cost(125);
9723   format %{ "andl    $dst, $src\t# int" %}
9724   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9725   ins_encode(REX_mem(dst), OpcSE(src),
9726              RM_opc_mem(secondary, dst), Con8or32(src));
9727   ins_pipe(ialu_mem_imm);
9728 %}
9729 
9730 // BMI1 instructions
9731 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
9732   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
9733   predicate(UseBMI1Instructions);
9734   effect(KILL cr);
9735 
9736   ins_cost(125);
9737   format %{ "andnl  $dst, $src1, $src2" %}
9738 
9739   ins_encode %{
9740     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
9741   %}
9742   ins_pipe(ialu_reg_mem);
9743 %}
9744 
9745 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
9746   match(Set dst (AndI (XorI src1 minus_1) src2));
9747   predicate(UseBMI1Instructions);
9748   effect(KILL cr);
9749 
9750   format %{ "andnl  $dst, $src1, $src2" %}
9751 
9752   ins_encode %{
9753     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
9754   %}
9755   ins_pipe(ialu_reg);
9756 %}
9757 
9758 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
9759   match(Set dst (AndI (SubI imm_zero src) src));
9760   predicate(UseBMI1Instructions);
9761   effect(KILL cr);
9762 
9763   format %{ "blsil  $dst, $src" %}
9764 
9765   ins_encode %{
9766     __ blsil($dst$$Register, $src$$Register);
9767   %}
9768   ins_pipe(ialu_reg);
9769 %}
9770 
9771 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
9772   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
9773   predicate(UseBMI1Instructions);
9774   effect(KILL cr);
9775 
9776   ins_cost(125);
9777   format %{ "blsil  $dst, $src" %}
9778 
9779   ins_encode %{
9780     __ blsil($dst$$Register, $src$$Address);
9781   %}
9782   ins_pipe(ialu_reg_mem);
9783 %}
9784 
9785 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9786 %{
9787   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
9788   predicate(UseBMI1Instructions);
9789   effect(KILL cr);
9790 
9791   ins_cost(125);
9792   format %{ "blsmskl $dst, $src" %}
9793 
9794   ins_encode %{
9795     __ blsmskl($dst$$Register, $src$$Address);
9796   %}
9797   ins_pipe(ialu_reg_mem);
9798 %}
9799 
9800 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9801 %{
9802   match(Set dst (XorI (AddI src minus_1) src));
9803   predicate(UseBMI1Instructions);
9804   effect(KILL cr);
9805 
9806   format %{ "blsmskl $dst, $src" %}
9807 
9808   ins_encode %{
9809     __ blsmskl($dst$$Register, $src$$Register);
9810   %}
9811 
9812   ins_pipe(ialu_reg);
9813 %}
9814 
9815 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9816 %{
9817   match(Set dst (AndI (AddI src minus_1) src) );
9818   predicate(UseBMI1Instructions);
9819   effect(KILL cr);
9820 
9821   format %{ "blsrl  $dst, $src" %}
9822 
9823   ins_encode %{
9824     __ blsrl($dst$$Register, $src$$Register);
9825   %}
9826 
9827   ins_pipe(ialu_reg_mem);
9828 %}
9829 
9830 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9831 %{
9832   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
9833   predicate(UseBMI1Instructions);
9834   effect(KILL cr);
9835 
9836   ins_cost(125);
9837   format %{ "blsrl  $dst, $src" %}
9838 
9839   ins_encode %{
9840     __ blsrl($dst$$Register, $src$$Address);
9841   %}
9842 
9843   ins_pipe(ialu_reg);
9844 %}
9845 
9846 // Or Instructions
9847 // Or Register with Register
9848 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9849 %{
9850   match(Set dst (OrI dst src));
9851   effect(KILL cr);
9852 
9853   format %{ "orl     $dst, $src\t# int" %}
9854   opcode(0x0B);
9855   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9856   ins_pipe(ialu_reg_reg);
9857 %}
9858 
9859 // Or Register with Immediate
9860 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9861 %{
9862   match(Set dst (OrI dst src));
9863   effect(KILL cr);
9864 
9865   format %{ "orl     $dst, $src\t# int" %}
9866   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9867   ins_encode(OpcSErm(dst, src), Con8or32(src));
9868   ins_pipe(ialu_reg);
9869 %}
9870 
9871 // Or Register with Memory
9872 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9873 %{
9874   match(Set dst (OrI dst (LoadI src)));
9875   effect(KILL cr);
9876 
9877   ins_cost(125);
9878   format %{ "orl     $dst, $src\t# int" %}
9879   opcode(0x0B);
9880   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9881   ins_pipe(ialu_reg_mem);
9882 %}
9883 
9884 // Or Memory with Register
9885 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9886 %{
9887   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
9888   effect(KILL cr);
9889 
9890   ins_cost(150);
9891   format %{ "orb    $dst, $src\t# byte" %}
9892   opcode(0x08);
9893   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9894   ins_pipe(ialu_mem_reg);
9895 %}
9896 
9897 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9898 %{
9899   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9900   effect(KILL cr);
9901 
9902   ins_cost(150);
9903   format %{ "orl     $dst, $src\t# int" %}
9904   opcode(0x09); /* Opcode 09 /r */
9905   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9906   ins_pipe(ialu_mem_reg);
9907 %}
9908 
9909 // Or Memory with Immediate
9910 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9911 %{
9912   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9913   effect(KILL cr);
9914 
9915   ins_cost(125);
9916   format %{ "orl     $dst, $src\t# int" %}
9917   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9918   ins_encode(REX_mem(dst), OpcSE(src),
9919              RM_opc_mem(secondary, dst), Con8or32(src));
9920   ins_pipe(ialu_mem_imm);
9921 %}
9922 
9923 // Xor Instructions
9924 // Xor Register with Register
9925 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9926 %{
9927   match(Set dst (XorI dst src));
9928   effect(KILL cr);
9929 
9930   format %{ "xorl    $dst, $src\t# int" %}
9931   opcode(0x33);
9932   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9933   ins_pipe(ialu_reg_reg);
9934 %}
9935 
9936 // Xor Register with Immediate -1
9937 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9938   match(Set dst (XorI dst imm));
9939 
9940   format %{ "not    $dst" %}
9941   ins_encode %{
9942      __ notl($dst$$Register);
9943   %}
9944   ins_pipe(ialu_reg);
9945 %}
9946 
9947 // Xor Register with Immediate
9948 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9949 %{
9950   match(Set dst (XorI dst src));
9951   effect(KILL cr);
9952 
9953   format %{ "xorl    $dst, $src\t# int" %}
9954   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9955   ins_encode(OpcSErm(dst, src), Con8or32(src));
9956   ins_pipe(ialu_reg);
9957 %}
9958 
9959 // Xor Register with Memory
9960 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9961 %{
9962   match(Set dst (XorI dst (LoadI src)));
9963   effect(KILL cr);
9964 
9965   ins_cost(125);
9966   format %{ "xorl    $dst, $src\t# int" %}
9967   opcode(0x33);
9968   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9969   ins_pipe(ialu_reg_mem);
9970 %}
9971 
9972 // Xor Memory with Register
9973 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9974 %{
9975   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
9976   effect(KILL cr);
9977 
9978   ins_cost(150);
9979   format %{ "xorb    $dst, $src\t# byte" %}
9980   opcode(0x30);
9981   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9982   ins_pipe(ialu_mem_reg);
9983 %}
9984 
9985 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9986 %{
9987   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9988   effect(KILL cr);
9989 
9990   ins_cost(150);
9991   format %{ "xorl    $dst, $src\t# int" %}
9992   opcode(0x31); /* Opcode 31 /r */
9993   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9994   ins_pipe(ialu_mem_reg);
9995 %}
9996 
9997 // Xor Memory with Immediate
9998 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9999 %{
10000   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10001   effect(KILL cr);
10002 
10003   ins_cost(125);
10004   format %{ "xorl    $dst, $src\t# int" %}
10005   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10006   ins_encode(REX_mem(dst), OpcSE(src),
10007              RM_opc_mem(secondary, dst), Con8or32(src));
10008   ins_pipe(ialu_mem_imm);
10009 %}
10010 
10011 
10012 // Long Logical Instructions
10013 
10014 // And Instructions
10015 // And Register with Register
10016 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10017 %{
10018   match(Set dst (AndL dst src));
10019   effect(KILL cr);
10020 
10021   format %{ "andq    $dst, $src\t# long" %}
10022   opcode(0x23);
10023   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10024   ins_pipe(ialu_reg_reg);
10025 %}
10026 
10027 // And Register with Immediate 255
10028 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10029 %{
10030   match(Set dst (AndL dst src));
10031 
10032   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10033   opcode(0x0F, 0xB6);
10034   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10035   ins_pipe(ialu_reg);
10036 %}
10037 
10038 // And Register with Immediate 65535
10039 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10040 %{
10041   match(Set dst (AndL dst src));
10042 
10043   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10044   opcode(0x0F, 0xB7);
10045   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10046   ins_pipe(ialu_reg);
10047 %}
10048 
10049 // And Register with Immediate
10050 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10051 %{
10052   match(Set dst (AndL dst src));
10053   effect(KILL cr);
10054 
10055   format %{ "andq    $dst, $src\t# long" %}
10056   opcode(0x81, 0x04); /* Opcode 81 /4 */
10057   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10058   ins_pipe(ialu_reg);
10059 %}
10060 
10061 // And Register with Memory
10062 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10063 %{
10064   match(Set dst (AndL dst (LoadL src)));
10065   effect(KILL cr);
10066 
10067   ins_cost(125);
10068   format %{ "andq    $dst, $src\t# long" %}
10069   opcode(0x23);
10070   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10071   ins_pipe(ialu_reg_mem);
10072 %}
10073 
10074 // And Memory with Register
10075 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10076 %{
10077   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10078   effect(KILL cr);
10079 
10080   ins_cost(150);
10081   format %{ "andq    $dst, $src\t# long" %}
10082   opcode(0x21); /* Opcode 21 /r */
10083   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10084   ins_pipe(ialu_mem_reg);
10085 %}
10086 
10087 // And Memory with Immediate
10088 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10089 %{
10090   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10091   effect(KILL cr);
10092 
10093   ins_cost(125);
10094   format %{ "andq    $dst, $src\t# long" %}
10095   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10096   ins_encode(REX_mem_wide(dst), OpcSE(src),
10097              RM_opc_mem(secondary, dst), Con8or32(src));
10098   ins_pipe(ialu_mem_imm);
10099 %}
10100 
10101 // BMI1 instructions
10102 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10103   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10104   predicate(UseBMI1Instructions);
10105   effect(KILL cr);
10106 
10107   ins_cost(125);
10108   format %{ "andnq  $dst, $src1, $src2" %}
10109 
10110   ins_encode %{
10111     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10112   %}
10113   ins_pipe(ialu_reg_mem);
10114 %}
10115 
10116 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10117   match(Set dst (AndL (XorL src1 minus_1) src2));
10118   predicate(UseBMI1Instructions);
10119   effect(KILL cr);
10120 
10121   format %{ "andnq  $dst, $src1, $src2" %}
10122 
10123   ins_encode %{
10124   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10125   %}
10126   ins_pipe(ialu_reg_mem);
10127 %}
10128 
10129 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10130   match(Set dst (AndL (SubL imm_zero src) src));
10131   predicate(UseBMI1Instructions);
10132   effect(KILL cr);
10133 
10134   format %{ "blsiq  $dst, $src" %}
10135 
10136   ins_encode %{
10137     __ blsiq($dst$$Register, $src$$Register);
10138   %}
10139   ins_pipe(ialu_reg);
10140 %}
10141 
10142 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10143   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10144   predicate(UseBMI1Instructions);
10145   effect(KILL cr);
10146 
10147   ins_cost(125);
10148   format %{ "blsiq  $dst, $src" %}
10149 
10150   ins_encode %{
10151     __ blsiq($dst$$Register, $src$$Address);
10152   %}
10153   ins_pipe(ialu_reg_mem);
10154 %}
10155 
10156 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10157 %{
10158   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10159   predicate(UseBMI1Instructions);
10160   effect(KILL cr);
10161 
10162   ins_cost(125);
10163   format %{ "blsmskq $dst, $src" %}
10164 
10165   ins_encode %{
10166     __ blsmskq($dst$$Register, $src$$Address);
10167   %}
10168   ins_pipe(ialu_reg_mem);
10169 %}
10170 
10171 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10172 %{
10173   match(Set dst (XorL (AddL src minus_1) src));
10174   predicate(UseBMI1Instructions);
10175   effect(KILL cr);
10176 
10177   format %{ "blsmskq $dst, $src" %}
10178 
10179   ins_encode %{
10180     __ blsmskq($dst$$Register, $src$$Register);
10181   %}
10182 
10183   ins_pipe(ialu_reg);
10184 %}
10185 
10186 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10187 %{
10188   match(Set dst (AndL (AddL src minus_1) src) );
10189   predicate(UseBMI1Instructions);
10190   effect(KILL cr);
10191 
10192   format %{ "blsrq  $dst, $src" %}
10193 
10194   ins_encode %{
10195     __ blsrq($dst$$Register, $src$$Register);
10196   %}
10197 
10198   ins_pipe(ialu_reg);
10199 %}
10200 
10201 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10202 %{
10203   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10204   predicate(UseBMI1Instructions);
10205   effect(KILL cr);
10206 
10207   ins_cost(125);
10208   format %{ "blsrq  $dst, $src" %}
10209 
10210   ins_encode %{
10211     __ blsrq($dst$$Register, $src$$Address);
10212   %}
10213 
10214   ins_pipe(ialu_reg);
10215 %}
10216 
10217 // Or Instructions
10218 // Or Register with Register
10219 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10220 %{
10221   match(Set dst (OrL dst src));
10222   effect(KILL cr);
10223 
10224   format %{ "orq     $dst, $src\t# long" %}
10225   opcode(0x0B);
10226   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10227   ins_pipe(ialu_reg_reg);
10228 %}
10229 
10230 // Use any_RegP to match R15 (TLS register) without spilling.
10231 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10232   match(Set dst (OrL dst (CastP2X src)));
10233   effect(KILL cr);
10234 
10235   format %{ "orq     $dst, $src\t# long" %}
10236   opcode(0x0B);
10237   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10238   ins_pipe(ialu_reg_reg);
10239 %}
10240 
10241 
10242 // Or Register with Immediate
10243 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10244 %{
10245   match(Set dst (OrL dst src));
10246   effect(KILL cr);
10247 
10248   format %{ "orq     $dst, $src\t# long" %}
10249   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10250   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10251   ins_pipe(ialu_reg);
10252 %}
10253 
10254 // Or Register with Memory
10255 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10256 %{
10257   match(Set dst (OrL dst (LoadL src)));
10258   effect(KILL cr);
10259 
10260   ins_cost(125);
10261   format %{ "orq     $dst, $src\t# long" %}
10262   opcode(0x0B);
10263   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10264   ins_pipe(ialu_reg_mem);
10265 %}
10266 
10267 // Or Memory with Register
10268 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10269 %{
10270   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10271   effect(KILL cr);
10272 
10273   ins_cost(150);
10274   format %{ "orq     $dst, $src\t# long" %}
10275   opcode(0x09); /* Opcode 09 /r */
10276   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10277   ins_pipe(ialu_mem_reg);
10278 %}
10279 
10280 // Or Memory with Immediate
10281 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10282 %{
10283   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10284   effect(KILL cr);
10285 
10286   ins_cost(125);
10287   format %{ "orq     $dst, $src\t# long" %}
10288   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10289   ins_encode(REX_mem_wide(dst), OpcSE(src),
10290              RM_opc_mem(secondary, dst), Con8or32(src));
10291   ins_pipe(ialu_mem_imm);
10292 %}
10293 
10294 // Xor Instructions
10295 // Xor Register with Register
10296 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10297 %{
10298   match(Set dst (XorL dst src));
10299   effect(KILL cr);
10300 
10301   format %{ "xorq    $dst, $src\t# long" %}
10302   opcode(0x33);
10303   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10304   ins_pipe(ialu_reg_reg);
10305 %}
10306 
10307 // Xor Register with Immediate -1
10308 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10309   match(Set dst (XorL dst imm));
10310 
10311   format %{ "notq   $dst" %}
10312   ins_encode %{
10313      __ notq($dst$$Register);
10314   %}
10315   ins_pipe(ialu_reg);
10316 %}
10317 
10318 // Xor Register with Immediate
10319 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10320 %{
10321   match(Set dst (XorL dst src));
10322   effect(KILL cr);
10323 
10324   format %{ "xorq    $dst, $src\t# long" %}
10325   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10326   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10327   ins_pipe(ialu_reg);
10328 %}
10329 
10330 // Xor Register with Memory
10331 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10332 %{
10333   match(Set dst (XorL dst (LoadL src)));
10334   effect(KILL cr);
10335 
10336   ins_cost(125);
10337   format %{ "xorq    $dst, $src\t# long" %}
10338   opcode(0x33);
10339   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10340   ins_pipe(ialu_reg_mem);
10341 %}
10342 
10343 // Xor Memory with Register
10344 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10345 %{
10346   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10347   effect(KILL cr);
10348 
10349   ins_cost(150);
10350   format %{ "xorq    $dst, $src\t# long" %}
10351   opcode(0x31); /* Opcode 31 /r */
10352   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10353   ins_pipe(ialu_mem_reg);
10354 %}
10355 
10356 // Xor Memory with Immediate
10357 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10358 %{
10359   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10360   effect(KILL cr);
10361 
10362   ins_cost(125);
10363   format %{ "xorq    $dst, $src\t# long" %}
10364   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10365   ins_encode(REX_mem_wide(dst), OpcSE(src),
10366              RM_opc_mem(secondary, dst), Con8or32(src));
10367   ins_pipe(ialu_mem_imm);
10368 %}
10369 
10370 // Convert Int to Boolean
10371 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10372 %{
10373   match(Set dst (Conv2B src));
10374   effect(KILL cr);
10375 
10376   format %{ "testl   $src, $src\t# ci2b\n\t"
10377             "setnz   $dst\n\t"
10378             "movzbl  $dst, $dst" %}
10379   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10380              setNZ_reg(dst),
10381              REX_reg_breg(dst, dst), // movzbl
10382              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10383   ins_pipe(pipe_slow); // XXX
10384 %}
10385 
10386 // Convert Pointer to Boolean
10387 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10388 %{
10389   match(Set dst (Conv2B src));
10390   effect(KILL cr);
10391 
10392   format %{ "testq   $src, $src\t# cp2b\n\t"
10393             "setnz   $dst\n\t"
10394             "movzbl  $dst, $dst" %}
10395   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10396              setNZ_reg(dst),
10397              REX_reg_breg(dst, dst), // movzbl
10398              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10399   ins_pipe(pipe_slow); // XXX
10400 %}
10401 
10402 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10403 %{
10404   match(Set dst (CmpLTMask p q));
10405   effect(KILL cr);
10406 
10407   ins_cost(400);
10408   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10409             "setlt   $dst\n\t"
10410             "movzbl  $dst, $dst\n\t"
10411             "negl    $dst" %}
10412   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10413              setLT_reg(dst),
10414              REX_reg_breg(dst, dst), // movzbl
10415              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10416              neg_reg(dst));
10417   ins_pipe(pipe_slow);
10418 %}
10419 
10420 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10421 %{
10422   match(Set dst (CmpLTMask dst zero));
10423   effect(KILL cr);
10424 
10425   ins_cost(100);
10426   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10427   ins_encode %{
10428   __ sarl($dst$$Register, 31);
10429   %}
10430   ins_pipe(ialu_reg);
10431 %}
10432 
10433 /* Better to save a register than avoid a branch */
10434 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10435 %{
10436   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10437   effect(KILL cr);
10438   ins_cost(300);
10439   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10440             "jge     done\n\t"
10441             "addl    $p,$y\n"
10442             "done:   " %}
10443   ins_encode %{
10444     Register Rp = $p$$Register;
10445     Register Rq = $q$$Register;
10446     Register Ry = $y$$Register;
10447     Label done;
10448     __ subl(Rp, Rq);
10449     __ jccb(Assembler::greaterEqual, done);
10450     __ addl(Rp, Ry);
10451     __ bind(done);
10452   %}
10453   ins_pipe(pipe_cmplt);
10454 %}
10455 
10456 /* Better to save a register than avoid a branch */
10457 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10458 %{
10459   match(Set y (AndI (CmpLTMask p q) y));
10460   effect(KILL cr);
10461 
10462   ins_cost(300);
10463 
10464   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10465             "jlt     done\n\t"
10466             "xorl    $y, $y\n"
10467             "done:   " %}
10468   ins_encode %{
10469     Register Rp = $p$$Register;
10470     Register Rq = $q$$Register;
10471     Register Ry = $y$$Register;
10472     Label done;
10473     __ cmpl(Rp, Rq);
10474     __ jccb(Assembler::less, done);
10475     __ xorl(Ry, Ry);
10476     __ bind(done);
10477   %}
10478   ins_pipe(pipe_cmplt);
10479 %}
10480 
10481 
10482 //---------- FP Instructions------------------------------------------------
10483 
10484 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10485 %{
10486   match(Set cr (CmpF src1 src2));
10487 
10488   ins_cost(145);
10489   format %{ "ucomiss $src1, $src2\n\t"
10490             "jnp,s   exit\n\t"
10491             "pushfq\t# saw NaN, set CF\n\t"
10492             "andq    [rsp], #0xffffff2b\n\t"
10493             "popfq\n"
10494     "exit:" %}
10495   ins_encode %{
10496     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10497     emit_cmpfp_fixup(_masm);
10498   %}
10499   ins_pipe(pipe_slow);
10500 %}
10501 
10502 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10503   match(Set cr (CmpF src1 src2));
10504 
10505   ins_cost(100);
10506   format %{ "ucomiss $src1, $src2" %}
10507   ins_encode %{
10508     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10509   %}
10510   ins_pipe(pipe_slow);
10511 %}
10512 
10513 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10514 %{
10515   match(Set cr (CmpF src1 (LoadF src2)));
10516 
10517   ins_cost(145);
10518   format %{ "ucomiss $src1, $src2\n\t"
10519             "jnp,s   exit\n\t"
10520             "pushfq\t# saw NaN, set CF\n\t"
10521             "andq    [rsp], #0xffffff2b\n\t"
10522             "popfq\n"
10523     "exit:" %}
10524   ins_encode %{
10525     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10526     emit_cmpfp_fixup(_masm);
10527   %}
10528   ins_pipe(pipe_slow);
10529 %}
10530 
10531 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10532   match(Set cr (CmpF src1 (LoadF src2)));
10533 
10534   ins_cost(100);
10535   format %{ "ucomiss $src1, $src2" %}
10536   ins_encode %{
10537     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10538   %}
10539   ins_pipe(pipe_slow);
10540 %}
10541 
10542 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10543   match(Set cr (CmpF src con));
10544 
10545   ins_cost(145);
10546   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10547             "jnp,s   exit\n\t"
10548             "pushfq\t# saw NaN, set CF\n\t"
10549             "andq    [rsp], #0xffffff2b\n\t"
10550             "popfq\n"
10551     "exit:" %}
10552   ins_encode %{
10553     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10554     emit_cmpfp_fixup(_masm);
10555   %}
10556   ins_pipe(pipe_slow);
10557 %}
10558 
10559 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10560   match(Set cr (CmpF src con));
10561   ins_cost(100);
10562   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10563   ins_encode %{
10564     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10565   %}
10566   ins_pipe(pipe_slow);
10567 %}
10568 
10569 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10570 %{
10571   match(Set cr (CmpD src1 src2));
10572 
10573   ins_cost(145);
10574   format %{ "ucomisd $src1, $src2\n\t"
10575             "jnp,s   exit\n\t"
10576             "pushfq\t# saw NaN, set CF\n\t"
10577             "andq    [rsp], #0xffffff2b\n\t"
10578             "popfq\n"
10579     "exit:" %}
10580   ins_encode %{
10581     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10582     emit_cmpfp_fixup(_masm);
10583   %}
10584   ins_pipe(pipe_slow);
10585 %}
10586 
10587 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10588   match(Set cr (CmpD src1 src2));
10589 
10590   ins_cost(100);
10591   format %{ "ucomisd $src1, $src2 test" %}
10592   ins_encode %{
10593     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10594   %}
10595   ins_pipe(pipe_slow);
10596 %}
10597 
10598 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10599 %{
10600   match(Set cr (CmpD src1 (LoadD src2)));
10601 
10602   ins_cost(145);
10603   format %{ "ucomisd $src1, $src2\n\t"
10604             "jnp,s   exit\n\t"
10605             "pushfq\t# saw NaN, set CF\n\t"
10606             "andq    [rsp], #0xffffff2b\n\t"
10607             "popfq\n"
10608     "exit:" %}
10609   ins_encode %{
10610     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10611     emit_cmpfp_fixup(_masm);
10612   %}
10613   ins_pipe(pipe_slow);
10614 %}
10615 
10616 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10617   match(Set cr (CmpD src1 (LoadD src2)));
10618 
10619   ins_cost(100);
10620   format %{ "ucomisd $src1, $src2" %}
10621   ins_encode %{
10622     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10623   %}
10624   ins_pipe(pipe_slow);
10625 %}
10626 
10627 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10628   match(Set cr (CmpD src con));
10629 
10630   ins_cost(145);
10631   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10632             "jnp,s   exit\n\t"
10633             "pushfq\t# saw NaN, set CF\n\t"
10634             "andq    [rsp], #0xffffff2b\n\t"
10635             "popfq\n"
10636     "exit:" %}
10637   ins_encode %{
10638     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10639     emit_cmpfp_fixup(_masm);
10640   %}
10641   ins_pipe(pipe_slow);
10642 %}
10643 
10644 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10645   match(Set cr (CmpD src con));
10646   ins_cost(100);
10647   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10648   ins_encode %{
10649     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10650   %}
10651   ins_pipe(pipe_slow);
10652 %}
10653 
10654 // Compare into -1,0,1
10655 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10656 %{
10657   match(Set dst (CmpF3 src1 src2));
10658   effect(KILL cr);
10659 
10660   ins_cost(275);
10661   format %{ "ucomiss $src1, $src2\n\t"
10662             "movl    $dst, #-1\n\t"
10663             "jp,s    done\n\t"
10664             "jb,s    done\n\t"
10665             "setne   $dst\n\t"
10666             "movzbl  $dst, $dst\n"
10667     "done:" %}
10668   ins_encode %{
10669     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10670     emit_cmpfp3(_masm, $dst$$Register);
10671   %}
10672   ins_pipe(pipe_slow);
10673 %}
10674 
10675 // Compare into -1,0,1
10676 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10677 %{
10678   match(Set dst (CmpF3 src1 (LoadF src2)));
10679   effect(KILL cr);
10680 
10681   ins_cost(275);
10682   format %{ "ucomiss $src1, $src2\n\t"
10683             "movl    $dst, #-1\n\t"
10684             "jp,s    done\n\t"
10685             "jb,s    done\n\t"
10686             "setne   $dst\n\t"
10687             "movzbl  $dst, $dst\n"
10688     "done:" %}
10689   ins_encode %{
10690     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10691     emit_cmpfp3(_masm, $dst$$Register);
10692   %}
10693   ins_pipe(pipe_slow);
10694 %}
10695 
10696 // Compare into -1,0,1
10697 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10698   match(Set dst (CmpF3 src con));
10699   effect(KILL cr);
10700 
10701   ins_cost(275);
10702   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10703             "movl    $dst, #-1\n\t"
10704             "jp,s    done\n\t"
10705             "jb,s    done\n\t"
10706             "setne   $dst\n\t"
10707             "movzbl  $dst, $dst\n"
10708     "done:" %}
10709   ins_encode %{
10710     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10711     emit_cmpfp3(_masm, $dst$$Register);
10712   %}
10713   ins_pipe(pipe_slow);
10714 %}
10715 
10716 // Compare into -1,0,1
10717 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10718 %{
10719   match(Set dst (CmpD3 src1 src2));
10720   effect(KILL cr);
10721 
10722   ins_cost(275);
10723   format %{ "ucomisd $src1, $src2\n\t"
10724             "movl    $dst, #-1\n\t"
10725             "jp,s    done\n\t"
10726             "jb,s    done\n\t"
10727             "setne   $dst\n\t"
10728             "movzbl  $dst, $dst\n"
10729     "done:" %}
10730   ins_encode %{
10731     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10732     emit_cmpfp3(_masm, $dst$$Register);
10733   %}
10734   ins_pipe(pipe_slow);
10735 %}
10736 
10737 // Compare into -1,0,1
10738 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10739 %{
10740   match(Set dst (CmpD3 src1 (LoadD src2)));
10741   effect(KILL cr);
10742 
10743   ins_cost(275);
10744   format %{ "ucomisd $src1, $src2\n\t"
10745             "movl    $dst, #-1\n\t"
10746             "jp,s    done\n\t"
10747             "jb,s    done\n\t"
10748             "setne   $dst\n\t"
10749             "movzbl  $dst, $dst\n"
10750     "done:" %}
10751   ins_encode %{
10752     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10753     emit_cmpfp3(_masm, $dst$$Register);
10754   %}
10755   ins_pipe(pipe_slow);
10756 %}
10757 
10758 // Compare into -1,0,1
10759 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10760   match(Set dst (CmpD3 src con));
10761   effect(KILL cr);
10762 
10763   ins_cost(275);
10764   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10765             "movl    $dst, #-1\n\t"
10766             "jp,s    done\n\t"
10767             "jb,s    done\n\t"
10768             "setne   $dst\n\t"
10769             "movzbl  $dst, $dst\n"
10770     "done:" %}
10771   ins_encode %{
10772     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10773     emit_cmpfp3(_masm, $dst$$Register);
10774   %}
10775   ins_pipe(pipe_slow);
10776 %}
10777 
10778 //----------Arithmetic Conversion Instructions---------------------------------
10779 
10780 instruct roundFloat_nop(regF dst)
10781 %{
10782   match(Set dst (RoundFloat dst));
10783 
10784   ins_cost(0);
10785   ins_encode();
10786   ins_pipe(empty);
10787 %}
10788 
10789 instruct roundDouble_nop(regD dst)
10790 %{
10791   match(Set dst (RoundDouble dst));
10792 
10793   ins_cost(0);
10794   ins_encode();
10795   ins_pipe(empty);
10796 %}
10797 
10798 instruct convF2D_reg_reg(regD dst, regF src)
10799 %{
10800   match(Set dst (ConvF2D src));
10801 
10802   format %{ "cvtss2sd $dst, $src" %}
10803   ins_encode %{
10804     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10805   %}
10806   ins_pipe(pipe_slow); // XXX
10807 %}
10808 
10809 instruct convF2D_reg_mem(regD dst, memory src)
10810 %{
10811   match(Set dst (ConvF2D (LoadF src)));
10812 
10813   format %{ "cvtss2sd $dst, $src" %}
10814   ins_encode %{
10815     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10816   %}
10817   ins_pipe(pipe_slow); // XXX
10818 %}
10819 
10820 instruct convD2F_reg_reg(regF dst, regD src)
10821 %{
10822   match(Set dst (ConvD2F src));
10823 
10824   format %{ "cvtsd2ss $dst, $src" %}
10825   ins_encode %{
10826     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10827   %}
10828   ins_pipe(pipe_slow); // XXX
10829 %}
10830 
10831 instruct convD2F_reg_mem(regF dst, memory src)
10832 %{
10833   match(Set dst (ConvD2F (LoadD src)));
10834 
10835   format %{ "cvtsd2ss $dst, $src" %}
10836   ins_encode %{
10837     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10838   %}
10839   ins_pipe(pipe_slow); // XXX
10840 %}
10841 
10842 // XXX do mem variants
10843 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10844 %{
10845   match(Set dst (ConvF2I src));
10846   effect(KILL cr);
10847 
10848   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10849             "cmpl    $dst, #0x80000000\n\t"
10850             "jne,s   done\n\t"
10851             "subq    rsp, #8\n\t"
10852             "movss   [rsp], $src\n\t"
10853             "call    f2i_fixup\n\t"
10854             "popq    $dst\n"
10855     "done:   "%}
10856   ins_encode %{
10857     Label done;
10858     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10859     __ cmpl($dst$$Register, 0x80000000);
10860     __ jccb(Assembler::notEqual, done);
10861     __ subptr(rsp, 8);
10862     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10863     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
10864     __ pop($dst$$Register);
10865     __ bind(done);
10866   %}
10867   ins_pipe(pipe_slow);
10868 %}
10869 
10870 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10871 %{
10872   match(Set dst (ConvF2L src));
10873   effect(KILL cr);
10874 
10875   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10876             "cmpq    $dst, [0x8000000000000000]\n\t"
10877             "jne,s   done\n\t"
10878             "subq    rsp, #8\n\t"
10879             "movss   [rsp], $src\n\t"
10880             "call    f2l_fixup\n\t"
10881             "popq    $dst\n"
10882     "done:   "%}
10883   ins_encode %{
10884     Label done;
10885     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
10886     __ cmp64($dst$$Register,
10887              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10888     __ jccb(Assembler::notEqual, done);
10889     __ subptr(rsp, 8);
10890     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10891     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
10892     __ pop($dst$$Register);
10893     __ bind(done);
10894   %}
10895   ins_pipe(pipe_slow);
10896 %}
10897 
10898 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10899 %{
10900   match(Set dst (ConvD2I src));
10901   effect(KILL cr);
10902 
10903   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10904             "cmpl    $dst, #0x80000000\n\t"
10905             "jne,s   done\n\t"
10906             "subq    rsp, #8\n\t"
10907             "movsd   [rsp], $src\n\t"
10908             "call    d2i_fixup\n\t"
10909             "popq    $dst\n"
10910     "done:   "%}
10911   ins_encode %{
10912     Label done;
10913     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10914     __ cmpl($dst$$Register, 0x80000000);
10915     __ jccb(Assembler::notEqual, done);
10916     __ subptr(rsp, 8);
10917     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10918     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10919     __ pop($dst$$Register);
10920     __ bind(done);
10921   %}
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10926 %{
10927   match(Set dst (ConvD2L src));
10928   effect(KILL cr);
10929 
10930   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10931             "cmpq    $dst, [0x8000000000000000]\n\t"
10932             "jne,s   done\n\t"
10933             "subq    rsp, #8\n\t"
10934             "movsd   [rsp], $src\n\t"
10935             "call    d2l_fixup\n\t"
10936             "popq    $dst\n"
10937     "done:   "%}
10938   ins_encode %{
10939     Label done;
10940     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10941     __ cmp64($dst$$Register,
10942              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10943     __ jccb(Assembler::notEqual, done);
10944     __ subptr(rsp, 8);
10945     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10946     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10947     __ pop($dst$$Register);
10948     __ bind(done);
10949   %}
10950   ins_pipe(pipe_slow);
10951 %}
10952 
10953 instruct convI2F_reg_reg(regF dst, rRegI src)
10954 %{
10955   predicate(!UseXmmI2F);
10956   match(Set dst (ConvI2F src));
10957 
10958   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10959   ins_encode %{
10960     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10961   %}
10962   ins_pipe(pipe_slow); // XXX
10963 %}
10964 
10965 instruct convI2F_reg_mem(regF dst, memory src)
10966 %{
10967   match(Set dst (ConvI2F (LoadI src)));
10968 
10969   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10970   ins_encode %{
10971     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10972   %}
10973   ins_pipe(pipe_slow); // XXX
10974 %}
10975 
10976 instruct convI2D_reg_reg(regD dst, rRegI src)
10977 %{
10978   predicate(!UseXmmI2D);
10979   match(Set dst (ConvI2D src));
10980 
10981   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10982   ins_encode %{
10983     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10984   %}
10985   ins_pipe(pipe_slow); // XXX
10986 %}
10987 
10988 instruct convI2D_reg_mem(regD dst, memory src)
10989 %{
10990   match(Set dst (ConvI2D (LoadI src)));
10991 
10992   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10993   ins_encode %{
10994     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10995   %}
10996   ins_pipe(pipe_slow); // XXX
10997 %}
10998 
10999 instruct convXI2F_reg(regF dst, rRegI src)
11000 %{
11001   predicate(UseXmmI2F);
11002   match(Set dst (ConvI2F src));
11003 
11004   format %{ "movdl $dst, $src\n\t"
11005             "cvtdq2psl $dst, $dst\t# i2f" %}
11006   ins_encode %{
11007     __ movdl($dst$$XMMRegister, $src$$Register);
11008     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11009   %}
11010   ins_pipe(pipe_slow); // XXX
11011 %}
11012 
11013 instruct convXI2D_reg(regD dst, rRegI src)
11014 %{
11015   predicate(UseXmmI2D);
11016   match(Set dst (ConvI2D src));
11017 
11018   format %{ "movdl $dst, $src\n\t"
11019             "cvtdq2pdl $dst, $dst\t# i2d" %}
11020   ins_encode %{
11021     __ movdl($dst$$XMMRegister, $src$$Register);
11022     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11023   %}
11024   ins_pipe(pipe_slow); // XXX
11025 %}
11026 
11027 instruct convL2F_reg_reg(regF dst, rRegL src)
11028 %{
11029   match(Set dst (ConvL2F src));
11030 
11031   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11032   ins_encode %{
11033     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11034   %}
11035   ins_pipe(pipe_slow); // XXX
11036 %}
11037 
11038 instruct convL2F_reg_mem(regF dst, memory src)
11039 %{
11040   match(Set dst (ConvL2F (LoadL src)));
11041 
11042   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11043   ins_encode %{
11044     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11045   %}
11046   ins_pipe(pipe_slow); // XXX
11047 %}
11048 
11049 instruct convL2D_reg_reg(regD dst, rRegL src)
11050 %{
11051   match(Set dst (ConvL2D src));
11052 
11053   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11054   ins_encode %{
11055     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11056   %}
11057   ins_pipe(pipe_slow); // XXX
11058 %}
11059 
11060 instruct convL2D_reg_mem(regD dst, memory src)
11061 %{
11062   match(Set dst (ConvL2D (LoadL src)));
11063 
11064   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11065   ins_encode %{
11066     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11067   %}
11068   ins_pipe(pipe_slow); // XXX
11069 %}
11070 
11071 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11072 %{
11073   match(Set dst (ConvI2L src));
11074 
11075   ins_cost(125);
11076   format %{ "movslq  $dst, $src\t# i2l" %}
11077   ins_encode %{
11078     __ movslq($dst$$Register, $src$$Register);
11079   %}
11080   ins_pipe(ialu_reg_reg);
11081 %}
11082 
11083 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11084 // %{
11085 //   match(Set dst (ConvI2L src));
11086 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11087 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11088 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11089 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11090 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11091 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11092 
11093 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11094 //   ins_encode(enc_copy(dst, src));
11095 // //   opcode(0x63); // needs REX.W
11096 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11097 //   ins_pipe(ialu_reg_reg);
11098 // %}
11099 
11100 // Zero-extend convert int to long
11101 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11102 %{
11103   match(Set dst (AndL (ConvI2L src) mask));
11104 
11105   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11106   ins_encode %{
11107     if ($dst$$reg != $src$$reg) {
11108       __ movl($dst$$Register, $src$$Register);
11109     }
11110   %}
11111   ins_pipe(ialu_reg_reg);
11112 %}
11113 
11114 // Zero-extend convert int to long
11115 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11116 %{
11117   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11118 
11119   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11120   ins_encode %{
11121     __ movl($dst$$Register, $src$$Address);
11122   %}
11123   ins_pipe(ialu_reg_mem);
11124 %}
11125 
11126 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11127 %{
11128   match(Set dst (AndL src mask));
11129 
11130   format %{ "movl    $dst, $src\t# zero-extend long" %}
11131   ins_encode %{
11132     __ movl($dst$$Register, $src$$Register);
11133   %}
11134   ins_pipe(ialu_reg_reg);
11135 %}
11136 
11137 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11138 %{
11139   match(Set dst (ConvL2I src));
11140 
11141   format %{ "movl    $dst, $src\t# l2i" %}
11142   ins_encode %{
11143     __ movl($dst$$Register, $src$$Register);
11144   %}
11145   ins_pipe(ialu_reg_reg);
11146 %}
11147 
11148 
11149 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11150   match(Set dst (MoveF2I src));
11151   effect(DEF dst, USE src);
11152 
11153   ins_cost(125);
11154   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11155   ins_encode %{
11156     __ movl($dst$$Register, Address(rsp, $src$$disp));
11157   %}
11158   ins_pipe(ialu_reg_mem);
11159 %}
11160 
11161 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11162   match(Set dst (MoveI2F src));
11163   effect(DEF dst, USE src);
11164 
11165   ins_cost(125);
11166   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11167   ins_encode %{
11168     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11169   %}
11170   ins_pipe(pipe_slow);
11171 %}
11172 
11173 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11174   match(Set dst (MoveD2L src));
11175   effect(DEF dst, USE src);
11176 
11177   ins_cost(125);
11178   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11179   ins_encode %{
11180     __ movq($dst$$Register, Address(rsp, $src$$disp));
11181   %}
11182   ins_pipe(ialu_reg_mem);
11183 %}
11184 
11185 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11186   predicate(!UseXmmLoadAndClearUpper);
11187   match(Set dst (MoveL2D src));
11188   effect(DEF dst, USE src);
11189 
11190   ins_cost(125);
11191   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11192   ins_encode %{
11193     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11194   %}
11195   ins_pipe(pipe_slow);
11196 %}
11197 
11198 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11199   predicate(UseXmmLoadAndClearUpper);
11200   match(Set dst (MoveL2D src));
11201   effect(DEF dst, USE src);
11202 
11203   ins_cost(125);
11204   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11205   ins_encode %{
11206     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11207   %}
11208   ins_pipe(pipe_slow);
11209 %}
11210 
11211 
11212 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11213   match(Set dst (MoveF2I src));
11214   effect(DEF dst, USE src);
11215 
11216   ins_cost(95); // XXX
11217   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11218   ins_encode %{
11219     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11220   %}
11221   ins_pipe(pipe_slow);
11222 %}
11223 
11224 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11225   match(Set dst (MoveI2F src));
11226   effect(DEF dst, USE src);
11227 
11228   ins_cost(100);
11229   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11230   ins_encode %{
11231     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11232   %}
11233   ins_pipe( ialu_mem_reg );
11234 %}
11235 
11236 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11237   match(Set dst (MoveD2L src));
11238   effect(DEF dst, USE src);
11239 
11240   ins_cost(95); // XXX
11241   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11242   ins_encode %{
11243     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11244   %}
11245   ins_pipe(pipe_slow);
11246 %}
11247 
11248 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11249   match(Set dst (MoveL2D src));
11250   effect(DEF dst, USE src);
11251 
11252   ins_cost(100);
11253   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11254   ins_encode %{
11255     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11256   %}
11257   ins_pipe(ialu_mem_reg);
11258 %}
11259 
11260 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11261   match(Set dst (MoveF2I src));
11262   effect(DEF dst, USE src);
11263   ins_cost(85);
11264   format %{ "movd    $dst,$src\t# MoveF2I" %}
11265   ins_encode %{
11266     __ movdl($dst$$Register, $src$$XMMRegister);
11267   %}
11268   ins_pipe( pipe_slow );
11269 %}
11270 
11271 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11272   match(Set dst (MoveD2L src));
11273   effect(DEF dst, USE src);
11274   ins_cost(85);
11275   format %{ "movd    $dst,$src\t# MoveD2L" %}
11276   ins_encode %{
11277     __ movdq($dst$$Register, $src$$XMMRegister);
11278   %}
11279   ins_pipe( pipe_slow );
11280 %}
11281 
11282 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11283   match(Set dst (MoveI2F src));
11284   effect(DEF dst, USE src);
11285   ins_cost(100);
11286   format %{ "movd    $dst,$src\t# MoveI2F" %}
11287   ins_encode %{
11288     __ movdl($dst$$XMMRegister, $src$$Register);
11289   %}
11290   ins_pipe( pipe_slow );
11291 %}
11292 
11293 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11294   match(Set dst (MoveL2D src));
11295   effect(DEF dst, USE src);
11296   ins_cost(100);
11297   format %{ "movd    $dst,$src\t# MoveL2D" %}
11298   ins_encode %{
11299      __ movdq($dst$$XMMRegister, $src$$Register);
11300   %}
11301   ins_pipe( pipe_slow );
11302 %}
11303 
11304 
11305 // =======================================================================
11306 // fast clearing of an array
11307 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11308                   Universe dummy, rFlagsReg cr)
11309 %{
11310   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
11311   match(Set dummy (ClearArray (Binary cnt base) val));
11312   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11313 
11314   format %{ $$template
11315     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11316     $$emit$$"jg      LARGE\n\t"
11317     $$emit$$"dec     rcx\n\t"
11318     $$emit$$"js      DONE\t# Zero length\n\t"
11319     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11320     $$emit$$"dec     rcx\n\t"
11321     $$emit$$"jge     LOOP\n\t"
11322     $$emit$$"jmp     DONE\n\t"
11323     $$emit$$"# LARGE:\n\t"
11324     if (UseFastStosb) {
11325        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11326        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11327     } else if (UseXMMForObjInit) {
11328        $$emit$$"movdq   $tmp, $val\n\t"
11329        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11330        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11331        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11332        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11333        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11334        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11335        $$emit$$"add     0x40,rax\n\t"
11336        $$emit$$"# L_zero_64_bytes:\n\t"
11337        $$emit$$"sub     0x8,rcx\n\t"
11338        $$emit$$"jge     L_loop\n\t"
11339        $$emit$$"add     0x4,rcx\n\t"
11340        $$emit$$"jl      L_tail\n\t"
11341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11342        $$emit$$"add     0x20,rax\n\t"
11343        $$emit$$"sub     0x4,rcx\n\t"
11344        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11345        $$emit$$"add     0x4,rcx\n\t"
11346        $$emit$$"jle     L_end\n\t"
11347        $$emit$$"dec     rcx\n\t"
11348        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11349        $$emit$$"vmovq   xmm0,(rax)\n\t"
11350        $$emit$$"add     0x8,rax\n\t"
11351        $$emit$$"dec     rcx\n\t"
11352        $$emit$$"jge     L_sloop\n\t"
11353        $$emit$$"# L_end:\n\t"
11354     } else {
11355        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11356     }
11357     $$emit$$"# DONE"
11358   %}
11359   ins_encode %{
11360     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11361                  $tmp$$XMMRegister, false, false);
11362   %}
11363   ins_pipe(pipe_slow);
11364 %}
11365 
11366 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11367                   Universe dummy, rFlagsReg cr)
11368 %{
11369   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11370   match(Set dummy (ClearArray (Binary cnt base) val));
11371   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11372 
11373   format %{ $$template
11374     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11375     $$emit$$"jg      LARGE\n\t"
11376     $$emit$$"dec     rcx\n\t"
11377     $$emit$$"js      DONE\t# Zero length\n\t"
11378     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11379     $$emit$$"dec     rcx\n\t"
11380     $$emit$$"jge     LOOP\n\t"
11381     $$emit$$"jmp     DONE\n\t"
11382     $$emit$$"# LARGE:\n\t"
11383     if (UseXMMForObjInit) {
11384        $$emit$$"movdq   $tmp, $val\n\t"
11385        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11386        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11387        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11388        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11389        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11390        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11391        $$emit$$"add     0x40,rax\n\t"
11392        $$emit$$"# L_zero_64_bytes:\n\t"
11393        $$emit$$"sub     0x8,rcx\n\t"
11394        $$emit$$"jge     L_loop\n\t"
11395        $$emit$$"add     0x4,rcx\n\t"
11396        $$emit$$"jl      L_tail\n\t"
11397        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11398        $$emit$$"add     0x20,rax\n\t"
11399        $$emit$$"sub     0x4,rcx\n\t"
11400        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11401        $$emit$$"add     0x4,rcx\n\t"
11402        $$emit$$"jle     L_end\n\t"
11403        $$emit$$"dec     rcx\n\t"
11404        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11405        $$emit$$"vmovq   xmm0,(rax)\n\t"
11406        $$emit$$"add     0x8,rax\n\t"
11407        $$emit$$"dec     rcx\n\t"
11408        $$emit$$"jge     L_sloop\n\t"
11409        $$emit$$"# L_end:\n\t"
11410     } else {
11411        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11412     }
11413     $$emit$$"# DONE"
11414   %}
11415   ins_encode %{
11416     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11417                  $tmp$$XMMRegister, false, true);
11418   %}
11419   ins_pipe(pipe_slow);
11420 %}
11421 
11422 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11423                         Universe dummy, rFlagsReg cr)
11424 %{
11425   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
11426   match(Set dummy (ClearArray (Binary cnt base) val));
11427   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11428 
11429   format %{ $$template
11430     if (UseFastStosb) {
11431        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11432        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11433     } else if (UseXMMForObjInit) {
11434        $$emit$$"movdq   $tmp, $val\n\t"
11435        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11436        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11437        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11438        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11439        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11440        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11441        $$emit$$"add     0x40,rax\n\t"
11442        $$emit$$"# L_zero_64_bytes:\n\t"
11443        $$emit$$"sub     0x8,rcx\n\t"
11444        $$emit$$"jge     L_loop\n\t"
11445        $$emit$$"add     0x4,rcx\n\t"
11446        $$emit$$"jl      L_tail\n\t"
11447        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11448        $$emit$$"add     0x20,rax\n\t"
11449        $$emit$$"sub     0x4,rcx\n\t"
11450        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11451        $$emit$$"add     0x4,rcx\n\t"
11452        $$emit$$"jle     L_end\n\t"
11453        $$emit$$"dec     rcx\n\t"
11454        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11455        $$emit$$"vmovq   xmm0,(rax)\n\t"
11456        $$emit$$"add     0x8,rax\n\t"
11457        $$emit$$"dec     rcx\n\t"
11458        $$emit$$"jge     L_sloop\n\t"
11459        $$emit$$"# L_end:\n\t"
11460     } else {
11461        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11462     }
11463   %}
11464   ins_encode %{
11465     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11466                  $tmp$$XMMRegister, true, false);
11467   %}
11468   ins_pipe(pipe_slow);
11469 %}
11470 
11471 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
11472                         Universe dummy, rFlagsReg cr)
11473 %{
11474   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11475   match(Set dummy (ClearArray (Binary cnt base) val));
11476   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11477 
11478   format %{ $$template
11479     if (UseXMMForObjInit) {
11480        $$emit$$"movdq   $tmp, $val\n\t"
11481        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11482        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11483        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11484        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11485        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11486        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11487        $$emit$$"add     0x40,rax\n\t"
11488        $$emit$$"# L_zero_64_bytes:\n\t"
11489        $$emit$$"sub     0x8,rcx\n\t"
11490        $$emit$$"jge     L_loop\n\t"
11491        $$emit$$"add     0x4,rcx\n\t"
11492        $$emit$$"jl      L_tail\n\t"
11493        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11494        $$emit$$"add     0x20,rax\n\t"
11495        $$emit$$"sub     0x4,rcx\n\t"
11496        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11497        $$emit$$"add     0x4,rcx\n\t"
11498        $$emit$$"jle     L_end\n\t"
11499        $$emit$$"dec     rcx\n\t"
11500        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11501        $$emit$$"vmovq   xmm0,(rax)\n\t"
11502        $$emit$$"add     0x8,rax\n\t"
11503        $$emit$$"dec     rcx\n\t"
11504        $$emit$$"jge     L_sloop\n\t"
11505        $$emit$$"# L_end:\n\t"
11506     } else {
11507        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11508     }
11509   %}
11510   ins_encode %{
11511     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
11512                  $tmp$$XMMRegister, true, true);
11513   %}
11514   ins_pipe(pipe_slow);
11515 %}
11516 
11517 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11518                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11519 %{
11520   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11521   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11522   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11523 
11524   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11525   ins_encode %{
11526     __ string_compare($str1$$Register, $str2$$Register,
11527                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11528                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11529   %}
11530   ins_pipe( pipe_slow );
11531 %}
11532 
11533 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11534                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11535 %{
11536   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11537   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11538   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11539 
11540   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11541   ins_encode %{
11542     __ string_compare($str1$$Register, $str2$$Register,
11543                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11544                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11545   %}
11546   ins_pipe( pipe_slow );
11547 %}
11548 
11549 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11550                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11551 %{
11552   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11553   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11554   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11555 
11556   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11557   ins_encode %{
11558     __ string_compare($str1$$Register, $str2$$Register,
11559                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11560                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11561   %}
11562   ins_pipe( pipe_slow );
11563 %}
11564 
11565 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11566                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11567 %{
11568   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11569   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11570   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11571 
11572   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11573   ins_encode %{
11574     __ string_compare($str2$$Register, $str1$$Register,
11575                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11576                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11577   %}
11578   ins_pipe( pipe_slow );
11579 %}
11580 
11581 // fast search of substring with known size.
11582 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11583                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11584 %{
11585   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11586   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11587   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11588 
11589   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11590   ins_encode %{
11591     int icnt2 = (int)$int_cnt2$$constant;
11592     if (icnt2 >= 16) {
11593       // IndexOf for constant substrings with size >= 16 elements
11594       // which don't need to be loaded through stack.
11595       __ string_indexofC8($str1$$Register, $str2$$Register,
11596                           $cnt1$$Register, $cnt2$$Register,
11597                           icnt2, $result$$Register,
11598                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11599     } else {
11600       // Small strings are loaded through stack if they cross page boundary.
11601       __ string_indexof($str1$$Register, $str2$$Register,
11602                         $cnt1$$Register, $cnt2$$Register,
11603                         icnt2, $result$$Register,
11604                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11605     }
11606   %}
11607   ins_pipe( pipe_slow );
11608 %}
11609 
11610 // fast search of substring with known size.
11611 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11612                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11613 %{
11614   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11615   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11616   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11617 
11618   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11619   ins_encode %{
11620     int icnt2 = (int)$int_cnt2$$constant;
11621     if (icnt2 >= 8) {
11622       // IndexOf for constant substrings with size >= 8 elements
11623       // which don't need to be loaded through stack.
11624       __ string_indexofC8($str1$$Register, $str2$$Register,
11625                           $cnt1$$Register, $cnt2$$Register,
11626                           icnt2, $result$$Register,
11627                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11628     } else {
11629       // Small strings are loaded through stack if they cross page boundary.
11630       __ string_indexof($str1$$Register, $str2$$Register,
11631                         $cnt1$$Register, $cnt2$$Register,
11632                         icnt2, $result$$Register,
11633                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11634     }
11635   %}
11636   ins_pipe( pipe_slow );
11637 %}
11638 
11639 // fast search of substring with known size.
11640 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11641                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11642 %{
11643   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11644   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11645   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11646 
11647   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11648   ins_encode %{
11649     int icnt2 = (int)$int_cnt2$$constant;
11650     if (icnt2 >= 8) {
11651       // IndexOf for constant substrings with size >= 8 elements
11652       // which don't need to be loaded through stack.
11653       __ string_indexofC8($str1$$Register, $str2$$Register,
11654                           $cnt1$$Register, $cnt2$$Register,
11655                           icnt2, $result$$Register,
11656                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11657     } else {
11658       // Small strings are loaded through stack if they cross page boundary.
11659       __ string_indexof($str1$$Register, $str2$$Register,
11660                         $cnt1$$Register, $cnt2$$Register,
11661                         icnt2, $result$$Register,
11662                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11663     }
11664   %}
11665   ins_pipe( pipe_slow );
11666 %}
11667 
11668 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11669                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11670 %{
11671   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11672   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11673   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11674 
11675   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11676   ins_encode %{
11677     __ string_indexof($str1$$Register, $str2$$Register,
11678                       $cnt1$$Register, $cnt2$$Register,
11679                       (-1), $result$$Register,
11680                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11681   %}
11682   ins_pipe( pipe_slow );
11683 %}
11684 
11685 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11686                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11687 %{
11688   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11689   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11690   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11691 
11692   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11693   ins_encode %{
11694     __ string_indexof($str1$$Register, $str2$$Register,
11695                       $cnt1$$Register, $cnt2$$Register,
11696                       (-1), $result$$Register,
11697                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11698   %}
11699   ins_pipe( pipe_slow );
11700 %}
11701 
11702 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11703                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11704 %{
11705   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11706   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11707   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11708 
11709   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11710   ins_encode %{
11711     __ string_indexof($str1$$Register, $str2$$Register,
11712                       $cnt1$$Register, $cnt2$$Register,
11713                       (-1), $result$$Register,
11714                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11715   %}
11716   ins_pipe( pipe_slow );
11717 %}
11718 
11719 instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11720                               rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
11721 %{
11722   predicate(UseSSE42Intrinsics);
11723   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11724   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11725   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11726   ins_encode %{
11727     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11728                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 // fast string equals
11734 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11735                        legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11736 %{
11737   match(Set result (StrEquals (Binary str1 str2) cnt));
11738   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11739 
11740   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11741   ins_encode %{
11742     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11743                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11744                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11745   %}
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 // fast array equals
11750 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11751                        legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11752 %{
11753   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11754   match(Set result (AryEq ary1 ary2));
11755   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11756 
11757   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11758   ins_encode %{
11759     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11760                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11761                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11762   %}
11763   ins_pipe( pipe_slow );
11764 %}
11765 
11766 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11767                       legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11768 %{
11769   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11770   match(Set result (AryEq ary1 ary2));
11771   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11772 
11773   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11774   ins_encode %{
11775     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11776                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11777                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11778   %}
11779   ins_pipe( pipe_slow );
11780 %}
11781 
11782 instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11783                       legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11784 %{
11785   match(Set result (HasNegatives ary1 len));
11786   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11787 
11788   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11789   ins_encode %{
11790     __ has_negatives($ary1$$Register, $len$$Register,
11791                      $result$$Register, $tmp3$$Register,
11792                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11793   %}
11794   ins_pipe( pipe_slow );
11795 %}
11796 
11797 // fast char[] to byte[] compression
11798 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11799                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11800   match(Set result (StrCompressedCopy src (Binary dst len)));
11801   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11802 
11803   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11804   ins_encode %{
11805     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11806                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11807                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11808   %}
11809   ins_pipe( pipe_slow );
11810 %}
11811 
11812 // fast byte[] to char[] inflation
11813 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11814                         legVecS tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
11815   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11816   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11817 
11818   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11819   ins_encode %{
11820     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11821                           $tmp1$$XMMRegister, $tmp2$$Register);
11822   %}
11823   ins_pipe( pipe_slow );
11824 %}
11825 
11826 // encode char[] to byte[] in ISO_8859_1
11827 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11828                           legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11829                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11830   match(Set result (EncodeISOArray src (Binary dst len)));
11831   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11832 
11833   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11834   ins_encode %{
11835     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11836                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11837                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11838   %}
11839   ins_pipe( pipe_slow );
11840 %}
11841 
11842 //----------Overflow Math Instructions-----------------------------------------
11843 
11844 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11845 %{
11846   match(Set cr (OverflowAddI op1 op2));
11847   effect(DEF cr, USE_KILL op1, USE op2);
11848 
11849   format %{ "addl    $op1, $op2\t# overflow check int" %}
11850 
11851   ins_encode %{
11852     __ addl($op1$$Register, $op2$$Register);
11853   %}
11854   ins_pipe(ialu_reg_reg);
11855 %}
11856 
11857 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
11858 %{
11859   match(Set cr (OverflowAddI op1 op2));
11860   effect(DEF cr, USE_KILL op1, USE op2);
11861 
11862   format %{ "addl    $op1, $op2\t# overflow check int" %}
11863 
11864   ins_encode %{
11865     __ addl($op1$$Register, $op2$$constant);
11866   %}
11867   ins_pipe(ialu_reg_reg);
11868 %}
11869 
11870 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11871 %{
11872   match(Set cr (OverflowAddL op1 op2));
11873   effect(DEF cr, USE_KILL op1, USE op2);
11874 
11875   format %{ "addq    $op1, $op2\t# overflow check long" %}
11876   ins_encode %{
11877     __ addq($op1$$Register, $op2$$Register);
11878   %}
11879   ins_pipe(ialu_reg_reg);
11880 %}
11881 
11882 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
11883 %{
11884   match(Set cr (OverflowAddL op1 op2));
11885   effect(DEF cr, USE_KILL op1, USE op2);
11886 
11887   format %{ "addq    $op1, $op2\t# overflow check long" %}
11888   ins_encode %{
11889     __ addq($op1$$Register, $op2$$constant);
11890   %}
11891   ins_pipe(ialu_reg_reg);
11892 %}
11893 
11894 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11895 %{
11896   match(Set cr (OverflowSubI op1 op2));
11897 
11898   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11899   ins_encode %{
11900     __ cmpl($op1$$Register, $op2$$Register);
11901   %}
11902   ins_pipe(ialu_reg_reg);
11903 %}
11904 
11905 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11906 %{
11907   match(Set cr (OverflowSubI op1 op2));
11908 
11909   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11910   ins_encode %{
11911     __ cmpl($op1$$Register, $op2$$constant);
11912   %}
11913   ins_pipe(ialu_reg_reg);
11914 %}
11915 
11916 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11917 %{
11918   match(Set cr (OverflowSubL op1 op2));
11919 
11920   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11921   ins_encode %{
11922     __ cmpq($op1$$Register, $op2$$Register);
11923   %}
11924   ins_pipe(ialu_reg_reg);
11925 %}
11926 
11927 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11928 %{
11929   match(Set cr (OverflowSubL op1 op2));
11930 
11931   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11932   ins_encode %{
11933     __ cmpq($op1$$Register, $op2$$constant);
11934   %}
11935   ins_pipe(ialu_reg_reg);
11936 %}
11937 
11938 instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
11939 %{
11940   match(Set cr (OverflowSubI zero op2));
11941   effect(DEF cr, USE_KILL op2);
11942 
11943   format %{ "negl    $op2\t# overflow check int" %}
11944   ins_encode %{
11945     __ negl($op2$$Register);
11946   %}
11947   ins_pipe(ialu_reg_reg);
11948 %}
11949 
11950 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
11951 %{
11952   match(Set cr (OverflowSubL zero op2));
11953   effect(DEF cr, USE_KILL op2);
11954 
11955   format %{ "negq    $op2\t# overflow check long" %}
11956   ins_encode %{
11957     __ negq($op2$$Register);
11958   %}
11959   ins_pipe(ialu_reg_reg);
11960 %}
11961 
11962 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11963 %{
11964   match(Set cr (OverflowMulI op1 op2));
11965   effect(DEF cr, USE_KILL op1, USE op2);
11966 
11967   format %{ "imull    $op1, $op2\t# overflow check int" %}
11968   ins_encode %{
11969     __ imull($op1$$Register, $op2$$Register);
11970   %}
11971   ins_pipe(ialu_reg_reg_alu0);
11972 %}
11973 
11974 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
11975 %{
11976   match(Set cr (OverflowMulI op1 op2));
11977   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11978 
11979   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
11980   ins_encode %{
11981     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
11982   %}
11983   ins_pipe(ialu_reg_reg_alu0);
11984 %}
11985 
11986 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11987 %{
11988   match(Set cr (OverflowMulL op1 op2));
11989   effect(DEF cr, USE_KILL op1, USE op2);
11990 
11991   format %{ "imulq    $op1, $op2\t# overflow check long" %}
11992   ins_encode %{
11993     __ imulq($op1$$Register, $op2$$Register);
11994   %}
11995   ins_pipe(ialu_reg_reg_alu0);
11996 %}
11997 
11998 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
11999 %{
12000   match(Set cr (OverflowMulL op1 op2));
12001   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12002 
12003   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12004   ins_encode %{
12005     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12006   %}
12007   ins_pipe(ialu_reg_reg_alu0);
12008 %}
12009 
12010 
12011 //----------Control Flow Instructions------------------------------------------
12012 // Signed compare Instructions
12013 
12014 // XXX more variants!!
12015 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12016 %{
12017   match(Set cr (CmpI op1 op2));
12018   effect(DEF cr, USE op1, USE op2);
12019 
12020   format %{ "cmpl    $op1, $op2" %}
12021   opcode(0x3B);  /* Opcode 3B /r */
12022   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12023   ins_pipe(ialu_cr_reg_reg);
12024 %}
12025 
12026 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12027 %{
12028   match(Set cr (CmpI op1 op2));
12029 
12030   format %{ "cmpl    $op1, $op2" %}
12031   opcode(0x81, 0x07); /* Opcode 81 /7 */
12032   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12033   ins_pipe(ialu_cr_reg_imm);
12034 %}
12035 
12036 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12037 %{
12038   match(Set cr (CmpI op1 (LoadI op2)));
12039 
12040   ins_cost(500); // XXX
12041   format %{ "cmpl    $op1, $op2" %}
12042   opcode(0x3B); /* Opcode 3B /r */
12043   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12044   ins_pipe(ialu_cr_reg_mem);
12045 %}
12046 
12047 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
12048 %{
12049   match(Set cr (CmpI src zero));
12050 
12051   format %{ "testl   $src, $src" %}
12052   opcode(0x85);
12053   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12054   ins_pipe(ialu_cr_reg_imm);
12055 %}
12056 
12057 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
12058 %{
12059   match(Set cr (CmpI (AndI src con) zero));
12060 
12061   format %{ "testl   $src, $con" %}
12062   opcode(0xF7, 0x00);
12063   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
12064   ins_pipe(ialu_cr_reg_imm);
12065 %}
12066 
12067 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
12068 %{
12069   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12070 
12071   format %{ "testl   $src, $mem" %}
12072   opcode(0x85);
12073   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
12074   ins_pipe(ialu_cr_reg_mem);
12075 %}
12076 
12077 // Fold array properties check
12078 instruct testI_mem_imm(rFlagsReg cr, memory mem, immI con, immI0 zero)
12079 %{
12080   match(Set cr (CmpI (AndI (CastN2I (LoadNKlass mem)) con) zero));
12081 
12082   format %{ "testl   $mem, $con" %}
12083   opcode(0xF7, 0x00);
12084   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(con));
12085   ins_pipe(ialu_mem_imm);
12086 %}
12087 
12088 // Clear array property bits
12089 instruct clear_property_bits(rRegN dst, memory mem, immU31 mask, rFlagsReg cr)
12090 %{
12091   match(Set dst (CastI2N (AndI (CastN2I (LoadNKlass mem)) mask)));
12092   effect(KILL cr);
12093 
12094   format %{ "movl    $dst, $mem\t# clear property bits\n\t"
12095             "andl    $dst, $mask" %}
12096   ins_encode %{
12097     __ movl($dst$$Register, $mem$$Address);
12098     __ andl($dst$$Register, $mask$$constant);
12099   %}
12100   ins_pipe(ialu_reg_mem);
12101 %}
12102 
12103 // Unsigned compare Instructions; really, same as signed except they
12104 // produce an rFlagsRegU instead of rFlagsReg.
12105 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12106 %{
12107   match(Set cr (CmpU op1 op2));
12108 
12109   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12110   opcode(0x3B); /* Opcode 3B /r */
12111   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12112   ins_pipe(ialu_cr_reg_reg);
12113 %}
12114 
12115 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12116 %{
12117   match(Set cr (CmpU op1 op2));
12118 
12119   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12120   opcode(0x81,0x07); /* Opcode 81 /7 */
12121   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12122   ins_pipe(ialu_cr_reg_imm);
12123 %}
12124 
12125 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12126 %{
12127   match(Set cr (CmpU op1 (LoadI op2)));
12128 
12129   ins_cost(500); // XXX
12130   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12131   opcode(0x3B); /* Opcode 3B /r */
12132   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12133   ins_pipe(ialu_cr_reg_mem);
12134 %}
12135 
12136 // // // Cisc-spilled version of cmpU_rReg
12137 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12138 // //%{
12139 // //  match(Set cr (CmpU (LoadI op1) op2));
12140 // //
12141 // //  format %{ "CMPu   $op1,$op2" %}
12142 // //  ins_cost(500);
12143 // //  opcode(0x39);  /* Opcode 39 /r */
12144 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12145 // //%}
12146 
12147 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
12148 %{
12149   match(Set cr (CmpU src zero));
12150 
12151   format %{ "testl   $src, $src\t# unsigned" %}
12152   opcode(0x85);
12153   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12154   ins_pipe(ialu_cr_reg_imm);
12155 %}
12156 
12157 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12158 %{
12159   match(Set cr (CmpP op1 op2));
12160 
12161   format %{ "cmpq    $op1, $op2\t# ptr" %}
12162   opcode(0x3B); /* Opcode 3B /r */
12163   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12164   ins_pipe(ialu_cr_reg_reg);
12165 %}
12166 
12167 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12168 %{
12169   match(Set cr (CmpP op1 (LoadP op2)));
12170 
12171   ins_cost(500); // XXX
12172   format %{ "cmpq    $op1, $op2\t# ptr" %}
12173   opcode(0x3B); /* Opcode 3B /r */
12174   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12175   ins_pipe(ialu_cr_reg_mem);
12176 %}
12177 
12178 // // // Cisc-spilled version of cmpP_rReg
12179 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12180 // //%{
12181 // //  match(Set cr (CmpP (LoadP op1) op2));
12182 // //
12183 // //  format %{ "CMPu   $op1,$op2" %}
12184 // //  ins_cost(500);
12185 // //  opcode(0x39);  /* Opcode 39 /r */
12186 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12187 // //%}
12188 
12189 // XXX this is generalized by compP_rReg_mem???
12190 // Compare raw pointer (used in out-of-heap check).
12191 // Only works because non-oop pointers must be raw pointers
12192 // and raw pointers have no anti-dependencies.
12193 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12194 %{
12195   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
12196   match(Set cr (CmpP op1 (LoadP op2)));
12197 
12198   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12199   opcode(0x3B); /* Opcode 3B /r */
12200   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12201   ins_pipe(ialu_cr_reg_mem);
12202 %}
12203 
12204 // This will generate a signed flags result. This should be OK since
12205 // any compare to a zero should be eq/neq.
12206 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12207 %{
12208   match(Set cr (CmpP src zero));
12209 
12210   format %{ "testq   $src, $src\t# ptr" %}
12211   opcode(0x85);
12212   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12213   ins_pipe(ialu_cr_reg_imm);
12214 %}
12215 
12216 // This will generate a signed flags result. This should be OK since
12217 // any compare to a zero should be eq/neq.
12218 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12219 %{
12220   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
12221   match(Set cr (CmpP (LoadP op) zero));
12222 
12223   ins_cost(500); // XXX
12224   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12225   opcode(0xF7); /* Opcode F7 /0 */
12226   ins_encode(REX_mem_wide(op),
12227              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
12228   ins_pipe(ialu_cr_reg_imm);
12229 %}
12230 
12231 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12232 %{
12233   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
12234   match(Set cr (CmpP (LoadP mem) zero));
12235 
12236   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12237   ins_encode %{
12238     __ cmpq(r12, $mem$$Address);
12239   %}
12240   ins_pipe(ialu_cr_reg_mem);
12241 %}
12242 
12243 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12244 %{
12245   match(Set cr (CmpN op1 op2));
12246 
12247   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12248   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12249   ins_pipe(ialu_cr_reg_reg);
12250 %}
12251 
12252 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12253 %{
12254   match(Set cr (CmpN src (LoadN mem)));
12255 
12256   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12257   ins_encode %{
12258     __ cmpl($src$$Register, $mem$$Address);
12259   %}
12260   ins_pipe(ialu_cr_reg_mem);
12261 %}
12262 
12263 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12264   match(Set cr (CmpN op1 op2));
12265 
12266   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12267   ins_encode %{
12268     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12269   %}
12270   ins_pipe(ialu_cr_reg_imm);
12271 %}
12272 
12273 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12274 %{
12275   match(Set cr (CmpN src (LoadN mem)));
12276 
12277   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12278   ins_encode %{
12279     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12280   %}
12281   ins_pipe(ialu_cr_reg_mem);
12282 %}
12283 
12284 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12285   match(Set cr (CmpN op1 op2));
12286 
12287   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12288   ins_encode %{
12289     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12290   %}
12291   ins_pipe(ialu_cr_reg_imm);
12292 %}
12293 
12294 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12295 %{
12296   match(Set cr (CmpN src (LoadNKlass mem)));
12297 
12298   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12299   ins_encode %{
12300     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12301   %}
12302   ins_pipe(ialu_cr_reg_mem);
12303 %}
12304 
12305 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12306   match(Set cr (CmpN src zero));
12307 
12308   format %{ "testl   $src, $src\t# compressed ptr" %}
12309   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12310   ins_pipe(ialu_cr_reg_imm);
12311 %}
12312 
12313 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12314 %{
12315   predicate(CompressedOops::base() != NULL);
12316   match(Set cr (CmpN (LoadN mem) zero));
12317 
12318   ins_cost(500); // XXX
12319   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12320   ins_encode %{
12321     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12322   %}
12323   ins_pipe(ialu_cr_reg_mem);
12324 %}
12325 
12326 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12327 %{
12328   predicate(CompressedOops::base() == NULL && (CompressedKlassPointers::base() == NULL));
12329   match(Set cr (CmpN (LoadN mem) zero));
12330 
12331   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12332   ins_encode %{
12333     __ cmpl(r12, $mem$$Address);
12334   %}
12335   ins_pipe(ialu_cr_reg_mem);
12336 %}
12337 
12338 // Yanked all unsigned pointer compare operations.
12339 // Pointer compares are done with CmpP which is already unsigned.
12340 
12341 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12342 %{
12343   match(Set cr (CmpL op1 op2));
12344 
12345   format %{ "cmpq    $op1, $op2" %}
12346   opcode(0x3B);  /* Opcode 3B /r */
12347   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12348   ins_pipe(ialu_cr_reg_reg);
12349 %}
12350 
12351 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12352 %{
12353   match(Set cr (CmpL op1 op2));
12354 
12355   format %{ "cmpq    $op1, $op2" %}
12356   opcode(0x81, 0x07); /* Opcode 81 /7 */
12357   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12358   ins_pipe(ialu_cr_reg_imm);
12359 %}
12360 
12361 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12362 %{
12363   match(Set cr (CmpL op1 (LoadL op2)));
12364 
12365   format %{ "cmpq    $op1, $op2" %}
12366   opcode(0x3B); /* Opcode 3B /r */
12367   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12368   ins_pipe(ialu_cr_reg_mem);
12369 %}
12370 
12371 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12372 %{
12373   match(Set cr (CmpL src zero));
12374 
12375   format %{ "testq   $src, $src" %}
12376   opcode(0x85);
12377   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12378   ins_pipe(ialu_cr_reg_imm);
12379 %}
12380 
12381 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12382 %{
12383   match(Set cr (CmpL (AndL src con) zero));
12384 
12385   format %{ "testq   $src, $con\t# long" %}
12386   opcode(0xF7, 0x00);
12387   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12388   ins_pipe(ialu_cr_reg_imm);
12389 %}
12390 
12391 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12392 %{
12393   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12394 
12395   format %{ "testq   $src, $mem" %}
12396   opcode(0x85);
12397   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12398   ins_pipe(ialu_cr_reg_mem);
12399 %}
12400 
12401 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12402 %{
12403   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12404 
12405   format %{ "testq   $src, $mem" %}
12406   opcode(0x85);
12407   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12408   ins_pipe(ialu_cr_reg_mem);
12409 %}
12410 
12411 // Fold array properties check
12412 instruct testL_reg_mem3(rFlagsReg cr, memory mem, rRegL src, immL0 zero)
12413 %{
12414   match(Set cr (CmpL (AndL (CastP2X (LoadKlass mem)) src) zero));
12415 
12416   format %{ "testq   $src, $mem\t# test array properties" %}
12417   opcode(0x85);
12418   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12419   ins_pipe(ialu_cr_reg_mem);
12420 %}
12421 
12422 // Manifest a CmpL result in an integer register.  Very painful.
12423 // This is the test to avoid.
12424 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12425 %{
12426   match(Set dst (CmpL3 src1 src2));
12427   effect(KILL flags);
12428 
12429   ins_cost(275); // XXX
12430   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12431             "movl    $dst, -1\n\t"
12432             "jl,s    done\n\t"
12433             "setne   $dst\n\t"
12434             "movzbl  $dst, $dst\n\t"
12435     "done:" %}
12436   ins_encode(cmpl3_flag(src1, src2, dst));
12437   ins_pipe(pipe_slow);
12438 %}
12439 
12440 // Unsigned long compare Instructions; really, same as signed long except they
12441 // produce an rFlagsRegU instead of rFlagsReg.
12442 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
12443 %{
12444   match(Set cr (CmpUL op1 op2));
12445 
12446   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12447   opcode(0x3B);  /* Opcode 3B /r */
12448   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12449   ins_pipe(ialu_cr_reg_reg);
12450 %}
12451 
12452 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12453 %{
12454   match(Set cr (CmpUL op1 op2));
12455 
12456   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12457   opcode(0x81, 0x07); /* Opcode 81 /7 */
12458   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12459   ins_pipe(ialu_cr_reg_imm);
12460 %}
12461 
12462 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12463 %{
12464   match(Set cr (CmpUL op1 (LoadL op2)));
12465 
12466   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12467   opcode(0x3B); /* Opcode 3B /r */
12468   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12469   ins_pipe(ialu_cr_reg_mem);
12470 %}
12471 
12472 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12473 %{
12474   match(Set cr (CmpUL src zero));
12475 
12476   format %{ "testq   $src, $src\t# unsigned" %}
12477   opcode(0x85);
12478   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12479   ins_pipe(ialu_cr_reg_imm);
12480 %}
12481 
12482 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12483 %{
12484   match(Set cr (CmpI (LoadB mem) imm));
12485 
12486   ins_cost(125);
12487   format %{ "cmpb    $mem, $imm" %}
12488   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12489   ins_pipe(ialu_cr_reg_mem);
12490 %}
12491 
12492 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU8 imm, immI0 zero)
12493 %{
12494   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12495 
12496   ins_cost(125);
12497   format %{ "testb   $mem, $imm\t# ubyte" %}
12498   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12499   ins_pipe(ialu_cr_reg_mem);
12500 %}
12501 
12502 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
12503 %{
12504   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12505 
12506   ins_cost(125);
12507   format %{ "testb   $mem, $imm\t# byte" %}
12508   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12509   ins_pipe(ialu_cr_reg_mem);
12510 %}
12511 
12512 //----------Max and Min--------------------------------------------------------
12513 // Min Instructions
12514 
12515 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12516 %{
12517   effect(USE_DEF dst, USE src, USE cr);
12518 
12519   format %{ "cmovlgt $dst, $src\t# min" %}
12520   opcode(0x0F, 0x4F);
12521   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12522   ins_pipe(pipe_cmov_reg);
12523 %}
12524 
12525 
12526 instruct minI_rReg(rRegI dst, rRegI src)
12527 %{
12528   match(Set dst (MinI dst src));
12529 
12530   ins_cost(200);
12531   expand %{
12532     rFlagsReg cr;
12533     compI_rReg(cr, dst, src);
12534     cmovI_reg_g(dst, src, cr);
12535   %}
12536 %}
12537 
12538 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12539 %{
12540   effect(USE_DEF dst, USE src, USE cr);
12541 
12542   format %{ "cmovllt $dst, $src\t# max" %}
12543   opcode(0x0F, 0x4C);
12544   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12545   ins_pipe(pipe_cmov_reg);
12546 %}
12547 
12548 
12549 instruct maxI_rReg(rRegI dst, rRegI src)
12550 %{
12551   match(Set dst (MaxI dst src));
12552 
12553   ins_cost(200);
12554   expand %{
12555     rFlagsReg cr;
12556     compI_rReg(cr, dst, src);
12557     cmovI_reg_l(dst, src, cr);
12558   %}
12559 %}
12560 
12561 // ============================================================================
12562 // Branch Instructions
12563 
12564 // Jump Direct - Label defines a relative address from JMP+1
12565 instruct jmpDir(label labl)
12566 %{
12567   match(Goto);
12568   effect(USE labl);
12569 
12570   ins_cost(300);
12571   format %{ "jmp     $labl" %}
12572   size(5);
12573   ins_encode %{
12574     Label* L = $labl$$label;
12575     __ jmp(*L, false); // Always long jump
12576   %}
12577   ins_pipe(pipe_jmp);
12578 %}
12579 
12580 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12581 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12582 %{
12583   match(If cop cr);
12584   effect(USE labl);
12585 
12586   ins_cost(300);
12587   format %{ "j$cop     $labl" %}
12588   size(6);
12589   ins_encode %{
12590     Label* L = $labl$$label;
12591     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12592   %}
12593   ins_pipe(pipe_jcc);
12594 %}
12595 
12596 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12597 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12598 %{
12599   predicate(!n->has_vector_mask_set());
12600   match(CountedLoopEnd cop cr);
12601   effect(USE labl);
12602 
12603   ins_cost(300);
12604   format %{ "j$cop     $labl\t# loop end" %}
12605   size(6);
12606   ins_encode %{
12607     Label* L = $labl$$label;
12608     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12609   %}
12610   ins_pipe(pipe_jcc);
12611 %}
12612 
12613 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12614 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12615   predicate(!n->has_vector_mask_set());
12616   match(CountedLoopEnd cop cmp);
12617   effect(USE labl);
12618 
12619   ins_cost(300);
12620   format %{ "j$cop,u   $labl\t# loop end" %}
12621   size(6);
12622   ins_encode %{
12623     Label* L = $labl$$label;
12624     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12625   %}
12626   ins_pipe(pipe_jcc);
12627 %}
12628 
12629 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12630   predicate(!n->has_vector_mask_set());
12631   match(CountedLoopEnd cop cmp);
12632   effect(USE labl);
12633 
12634   ins_cost(200);
12635   format %{ "j$cop,u   $labl\t# loop end" %}
12636   size(6);
12637   ins_encode %{
12638     Label* L = $labl$$label;
12639     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12640   %}
12641   ins_pipe(pipe_jcc);
12642 %}
12643 
12644 // mask version
12645 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12646 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
12647 %{
12648   predicate(n->has_vector_mask_set());
12649   match(CountedLoopEnd cop cr);
12650   effect(USE labl);
12651 
12652   ins_cost(400);
12653   format %{ "j$cop     $labl\t# loop end\n\t"
12654             "restorevectmask \t# vector mask restore for loops" %}
12655   size(10);
12656   ins_encode %{
12657     Label* L = $labl$$label;
12658     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12659     __ restorevectmask();
12660   %}
12661   ins_pipe(pipe_jcc);
12662 %}
12663 
12664 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12665 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12666   predicate(n->has_vector_mask_set());
12667   match(CountedLoopEnd cop cmp);
12668   effect(USE labl);
12669 
12670   ins_cost(400);
12671   format %{ "j$cop,u   $labl\t# loop end\n\t"
12672             "restorevectmask \t# vector mask restore for loops" %}
12673   size(10);
12674   ins_encode %{
12675     Label* L = $labl$$label;
12676     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12677     __ restorevectmask();
12678   %}
12679   ins_pipe(pipe_jcc);
12680 %}
12681 
12682 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12683   predicate(n->has_vector_mask_set());
12684   match(CountedLoopEnd cop cmp);
12685   effect(USE labl);
12686 
12687   ins_cost(300);
12688   format %{ "j$cop,u   $labl\t# loop end\n\t"
12689             "restorevectmask \t# vector mask restore for loops" %}
12690   size(10);
12691   ins_encode %{
12692     Label* L = $labl$$label;
12693     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12694     __ restorevectmask();
12695   %}
12696   ins_pipe(pipe_jcc);
12697 %}
12698 
12699 // Jump Direct Conditional - using unsigned comparison
12700 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12701   match(If cop cmp);
12702   effect(USE labl);
12703 
12704   ins_cost(300);
12705   format %{ "j$cop,u   $labl" %}
12706   size(6);
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12710   %}
12711   ins_pipe(pipe_jcc);
12712 %}
12713 
12714 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12715   match(If cop cmp);
12716   effect(USE labl);
12717 
12718   ins_cost(200);
12719   format %{ "j$cop,u   $labl" %}
12720   size(6);
12721   ins_encode %{
12722     Label* L = $labl$$label;
12723     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12724   %}
12725   ins_pipe(pipe_jcc);
12726 %}
12727 
12728 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12729   match(If cop cmp);
12730   effect(USE labl);
12731 
12732   ins_cost(200);
12733   format %{ $$template
12734     if ($cop$$cmpcode == Assembler::notEqual) {
12735       $$emit$$"jp,u    $labl\n\t"
12736       $$emit$$"j$cop,u   $labl"
12737     } else {
12738       $$emit$$"jp,u    done\n\t"
12739       $$emit$$"j$cop,u   $labl\n\t"
12740       $$emit$$"done:"
12741     }
12742   %}
12743   ins_encode %{
12744     Label* l = $labl$$label;
12745     if ($cop$$cmpcode == Assembler::notEqual) {
12746       __ jcc(Assembler::parity, *l, false);
12747       __ jcc(Assembler::notEqual, *l, false);
12748     } else if ($cop$$cmpcode == Assembler::equal) {
12749       Label done;
12750       __ jccb(Assembler::parity, done);
12751       __ jcc(Assembler::equal, *l, false);
12752       __ bind(done);
12753     } else {
12754        ShouldNotReachHere();
12755     }
12756   %}
12757   ins_pipe(pipe_jcc);
12758 %}
12759 
12760 // ============================================================================
12761 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12762 // superklass array for an instance of the superklass.  Set a hidden
12763 // internal cache on a hit (cache is checked with exposed code in
12764 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12765 // encoding ALSO sets flags.
12766 
12767 instruct partialSubtypeCheck(rdi_RegP result,
12768                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12769                              rFlagsReg cr)
12770 %{
12771   match(Set result (PartialSubtypeCheck sub super));
12772   effect(KILL rcx, KILL cr);
12773 
12774   ins_cost(1100);  // slightly larger than the next version
12775   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12776             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12777             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12778             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12779             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12780             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12781             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12782     "miss:\t" %}
12783 
12784   opcode(0x1); // Force a XOR of RDI
12785   ins_encode(enc_PartialSubtypeCheck());
12786   ins_pipe(pipe_slow);
12787 %}
12788 
12789 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12790                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12791                                      immP0 zero,
12792                                      rdi_RegP result)
12793 %{
12794   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12795   effect(KILL rcx, KILL result);
12796 
12797   ins_cost(1000);
12798   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12799             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12800             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12801             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12802             "jne,s   miss\t\t# Missed: flags nz\n\t"
12803             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12804     "miss:\t" %}
12805 
12806   opcode(0x0); // No need to XOR RDI
12807   ins_encode(enc_PartialSubtypeCheck());
12808   ins_pipe(pipe_slow);
12809 %}
12810 
12811 // ============================================================================
12812 // Branch Instructions -- short offset versions
12813 //
12814 // These instructions are used to replace jumps of a long offset (the default
12815 // match) with jumps of a shorter offset.  These instructions are all tagged
12816 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12817 // match rules in general matching.  Instead, the ADLC generates a conversion
12818 // method in the MachNode which can be used to do in-place replacement of the
12819 // long variant with the shorter variant.  The compiler will determine if a
12820 // branch can be taken by the is_short_branch_offset() predicate in the machine
12821 // specific code section of the file.
12822 
12823 // Jump Direct - Label defines a relative address from JMP+1
12824 instruct jmpDir_short(label labl) %{
12825   match(Goto);
12826   effect(USE labl);
12827 
12828   ins_cost(300);
12829   format %{ "jmp,s   $labl" %}
12830   size(2);
12831   ins_encode %{
12832     Label* L = $labl$$label;
12833     __ jmpb(*L);
12834   %}
12835   ins_pipe(pipe_jmp);
12836   ins_short_branch(1);
12837 %}
12838 
12839 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12840 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12841   match(If cop cr);
12842   effect(USE labl);
12843 
12844   ins_cost(300);
12845   format %{ "j$cop,s   $labl" %}
12846   size(2);
12847   ins_encode %{
12848     Label* L = $labl$$label;
12849     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12850   %}
12851   ins_pipe(pipe_jcc);
12852   ins_short_branch(1);
12853 %}
12854 
12855 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12856 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12857   match(CountedLoopEnd cop cr);
12858   effect(USE labl);
12859 
12860   ins_cost(300);
12861   format %{ "j$cop,s   $labl\t# loop end" %}
12862   size(2);
12863   ins_encode %{
12864     Label* L = $labl$$label;
12865     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12866   %}
12867   ins_pipe(pipe_jcc);
12868   ins_short_branch(1);
12869 %}
12870 
12871 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12872 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12873   match(CountedLoopEnd cop cmp);
12874   effect(USE labl);
12875 
12876   ins_cost(300);
12877   format %{ "j$cop,us  $labl\t# loop end" %}
12878   size(2);
12879   ins_encode %{
12880     Label* L = $labl$$label;
12881     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12882   %}
12883   ins_pipe(pipe_jcc);
12884   ins_short_branch(1);
12885 %}
12886 
12887 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12888   match(CountedLoopEnd cop cmp);
12889   effect(USE labl);
12890 
12891   ins_cost(300);
12892   format %{ "j$cop,us  $labl\t# loop end" %}
12893   size(2);
12894   ins_encode %{
12895     Label* L = $labl$$label;
12896     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12897   %}
12898   ins_pipe(pipe_jcc);
12899   ins_short_branch(1);
12900 %}
12901 
12902 // Jump Direct Conditional - using unsigned comparison
12903 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12904   match(If cop cmp);
12905   effect(USE labl);
12906 
12907   ins_cost(300);
12908   format %{ "j$cop,us  $labl" %}
12909   size(2);
12910   ins_encode %{
12911     Label* L = $labl$$label;
12912     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12913   %}
12914   ins_pipe(pipe_jcc);
12915   ins_short_branch(1);
12916 %}
12917 
12918 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12919   match(If cop cmp);
12920   effect(USE labl);
12921 
12922   ins_cost(300);
12923   format %{ "j$cop,us  $labl" %}
12924   size(2);
12925   ins_encode %{
12926     Label* L = $labl$$label;
12927     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12928   %}
12929   ins_pipe(pipe_jcc);
12930   ins_short_branch(1);
12931 %}
12932 
12933 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12934   match(If cop cmp);
12935   effect(USE labl);
12936 
12937   ins_cost(300);
12938   format %{ $$template
12939     if ($cop$$cmpcode == Assembler::notEqual) {
12940       $$emit$$"jp,u,s  $labl\n\t"
12941       $$emit$$"j$cop,u,s  $labl"
12942     } else {
12943       $$emit$$"jp,u,s  done\n\t"
12944       $$emit$$"j$cop,u,s  $labl\n\t"
12945       $$emit$$"done:"
12946     }
12947   %}
12948   size(4);
12949   ins_encode %{
12950     Label* l = $labl$$label;
12951     if ($cop$$cmpcode == Assembler::notEqual) {
12952       __ jccb(Assembler::parity, *l);
12953       __ jccb(Assembler::notEqual, *l);
12954     } else if ($cop$$cmpcode == Assembler::equal) {
12955       Label done;
12956       __ jccb(Assembler::parity, done);
12957       __ jccb(Assembler::equal, *l);
12958       __ bind(done);
12959     } else {
12960        ShouldNotReachHere();
12961     }
12962   %}
12963   ins_pipe(pipe_jcc);
12964   ins_short_branch(1);
12965 %}
12966 
12967 // ============================================================================
12968 // inlined locking and unlocking
12969 
12970 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
12971   predicate(Compile::current()->use_rtm());
12972   match(Set cr (FastLock object box));
12973   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12974   ins_cost(300);
12975   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12976   ins_encode %{
12977     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12978                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12979                  _counters, _rtm_counters, _stack_rtm_counters,
12980                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12981                  true, ra_->C->profile_rtm());
12982   %}
12983   ins_pipe(pipe_slow);
12984 %}
12985 
12986 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
12987   predicate(!Compile::current()->use_rtm());
12988   match(Set cr (FastLock object box));
12989   effect(TEMP tmp, TEMP scr, USE_KILL box);
12990   ins_cost(300);
12991   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
12992   ins_encode %{
12993     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12994                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12995   %}
12996   ins_pipe(pipe_slow);
12997 %}
12998 
12999 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13000   match(Set cr (FastUnlock object box));
13001   effect(TEMP tmp, USE_KILL box);
13002   ins_cost(300);
13003   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13004   ins_encode %{
13005     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13006   %}
13007   ins_pipe(pipe_slow);
13008 %}
13009 
13010 
13011 // ============================================================================
13012 // Safepoint Instructions
13013 instruct safePoint_poll(rFlagsReg cr)
13014 %{
13015   predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
13016   match(SafePoint);
13017   effect(KILL cr);
13018 
13019   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
13020             "# Safepoint: poll for GC" %}
13021   ins_cost(125);
13022   ins_encode %{
13023     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
13024     __ testl(rax, addr);
13025   %}
13026   ins_pipe(ialu_reg_mem);
13027 %}
13028 
13029 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
13030 %{
13031   predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
13032   match(SafePoint poll);
13033   effect(KILL cr, USE poll);
13034 
13035   format %{ "testl   rax, [$poll]\t"
13036             "# Safepoint: poll for GC" %}
13037   ins_cost(125);
13038   ins_encode %{
13039     __ relocate(relocInfo::poll_type);
13040     __ testl(rax, Address($poll$$Register, 0));
13041   %}
13042   ins_pipe(ialu_reg_mem);
13043 %}
13044 
13045 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13046 %{
13047   predicate(SafepointMechanism::uses_thread_local_poll());
13048   match(SafePoint poll);
13049   effect(KILL cr, USE poll);
13050 
13051   format %{ "testl   rax, [$poll]\t"
13052             "# Safepoint: poll for GC" %}
13053   ins_cost(125);
13054   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13055   ins_encode %{
13056     __ relocate(relocInfo::poll_type);
13057     address pre_pc = __ pc();
13058     __ testl(rax, Address($poll$$Register, 0));
13059     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13060   %}
13061   ins_pipe(ialu_reg_mem);
13062 %}
13063 
13064 // ============================================================================
13065 // Procedure Call/Return Instructions
13066 // Call Java Static Instruction
13067 // Note: If this code changes, the corresponding ret_addr_offset() and
13068 //       compute_padding() functions will have to be adjusted.
13069 instruct CallStaticJavaDirect(method meth) %{
13070   match(CallStaticJava);
13071   effect(USE meth);
13072 
13073   ins_cost(300);
13074   format %{ "call,static " %}
13075   opcode(0xE8); /* E8 cd */
13076   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13077   ins_pipe(pipe_slow);
13078   ins_alignment(4);
13079 %}
13080 
13081 // Call Java Dynamic Instruction
13082 // Note: If this code changes, the corresponding ret_addr_offset() and
13083 //       compute_padding() functions will have to be adjusted.
13084 instruct CallDynamicJavaDirect(method meth)
13085 %{
13086   match(CallDynamicJava);
13087   effect(USE meth);
13088 
13089   ins_cost(300);
13090   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13091             "call,dynamic " %}
13092   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13093   ins_pipe(pipe_slow);
13094   ins_alignment(4);
13095 %}
13096 
13097 // Call Runtime Instruction
13098 instruct CallRuntimeDirect(method meth)
13099 %{
13100   match(CallRuntime);
13101   effect(USE meth);
13102 
13103   ins_cost(300);
13104   format %{ "call,runtime " %}
13105   ins_encode(clear_avx, Java_To_Runtime(meth));
13106   ins_pipe(pipe_slow);
13107 %}
13108 
13109 // Call runtime without safepoint
13110 instruct CallLeafDirect(method meth)
13111 %{
13112   match(CallLeaf);
13113   effect(USE meth);
13114 
13115   ins_cost(300);
13116   format %{ "call_leaf,runtime " %}
13117   ins_encode(clear_avx, Java_To_Runtime(meth));
13118   ins_pipe(pipe_slow);
13119 %}
13120 
13121 // Call runtime without safepoint
13122 // entry point is null, target holds the address to call
13123 instruct CallLeafNoFPInDirect(rRegP target)
13124 %{
13125   predicate(n->as_Call()->entry_point() == NULL);
13126   match(CallLeafNoFP target);
13127 
13128   ins_cost(300);
13129   format %{ "call_leaf_nofp,runtime indirect " %}
13130   ins_encode %{
13131      __ call($target$$Register);
13132   %}
13133 
13134   ins_pipe(pipe_slow);
13135 %}
13136 
13137 instruct CallLeafNoFPDirect(method meth)
13138 %{
13139   predicate(n->as_Call()->entry_point() != NULL);
13140   match(CallLeafNoFP);
13141   effect(USE meth);
13142 
13143   ins_cost(300);
13144   format %{ "call_leaf_nofp,runtime " %}
13145   ins_encode(clear_avx, Java_To_Runtime(meth));
13146   ins_pipe(pipe_slow);
13147 %}
13148 
13149 // Return Instruction
13150 // Remove the return address & jump to it.
13151 // Notice: We always emit a nop after a ret to make sure there is room
13152 // for safepoint patching
13153 instruct Ret()
13154 %{
13155   match(Return);
13156 
13157   format %{ "ret" %}
13158   opcode(0xC3);
13159   ins_encode(OpcP);
13160   ins_pipe(pipe_jmp);
13161 %}
13162 
13163 // Tail Call; Jump from runtime stub to Java code.
13164 // Also known as an 'interprocedural jump'.
13165 // Target of jump will eventually return to caller.
13166 // TailJump below removes the return address.
13167 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
13168 %{
13169   match(TailCall jump_target method_oop);
13170 
13171   ins_cost(300);
13172   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
13173   opcode(0xFF, 0x4); /* Opcode FF /4 */
13174   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
13175   ins_pipe(pipe_jmp);
13176 %}
13177 
13178 // Tail Jump; remove the return address; jump to target.
13179 // TailCall above leaves the return address around.
13180 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13181 %{
13182   match(TailJump jump_target ex_oop);
13183 
13184   ins_cost(300);
13185   format %{ "popq    rdx\t# pop return address\n\t"
13186             "jmp     $jump_target" %}
13187   opcode(0xFF, 0x4); /* Opcode FF /4 */
13188   ins_encode(Opcode(0x5a), // popq rdx
13189              REX_reg(jump_target), OpcP, reg_opc(jump_target));
13190   ins_pipe(pipe_jmp);
13191 %}
13192 
13193 // Create exception oop: created by stack-crawling runtime code.
13194 // Created exception is now available to this handler, and is setup
13195 // just prior to jumping to this handler.  No code emitted.
13196 instruct CreateException(rax_RegP ex_oop)
13197 %{
13198   match(Set ex_oop (CreateEx));
13199 
13200   size(0);
13201   // use the following format syntax
13202   format %{ "# exception oop is in rax; no code emitted" %}
13203   ins_encode();
13204   ins_pipe(empty);
13205 %}
13206 
13207 // Rethrow exception:
13208 // The exception oop will come in the first argument position.
13209 // Then JUMP (not call) to the rethrow stub code.
13210 instruct RethrowException()
13211 %{
13212   match(Rethrow);
13213 
13214   // use the following format syntax
13215   format %{ "jmp     rethrow_stub" %}
13216   ins_encode(enc_rethrow);
13217   ins_pipe(pipe_jmp);
13218 %}
13219 
13220 // ============================================================================
13221 // This name is KNOWN by the ADLC and cannot be changed.
13222 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13223 // for this guy.
13224 instruct tlsLoadP(r15_RegP dst) %{
13225   match(Set dst (ThreadLocal));
13226   effect(DEF dst);
13227 
13228   size(0);
13229   format %{ "# TLS is in R15" %}
13230   ins_encode( /*empty encoding*/ );
13231   ins_pipe(ialu_reg_reg);
13232 %}
13233 
13234 
13235 //----------PEEPHOLE RULES-----------------------------------------------------
13236 // These must follow all instruction definitions as they use the names
13237 // defined in the instructions definitions.
13238 //
13239 // peepmatch ( root_instr_name [preceding_instruction]* );
13240 //
13241 // peepconstraint %{
13242 // (instruction_number.operand_name relational_op instruction_number.operand_name
13243 //  [, ...] );
13244 // // instruction numbers are zero-based using left to right order in peepmatch
13245 //
13246 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13247 // // provide an instruction_number.operand_name for each operand that appears
13248 // // in the replacement instruction's match rule
13249 //
13250 // ---------VM FLAGS---------------------------------------------------------
13251 //
13252 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13253 //
13254 // Each peephole rule is given an identifying number starting with zero and
13255 // increasing by one in the order seen by the parser.  An individual peephole
13256 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13257 // on the command-line.
13258 //
13259 // ---------CURRENT LIMITATIONS----------------------------------------------
13260 //
13261 // Only match adjacent instructions in same basic block
13262 // Only equality constraints
13263 // Only constraints between operands, not (0.dest_reg == RAX_enc)
13264 // Only one replacement instruction
13265 //
13266 // ---------EXAMPLE----------------------------------------------------------
13267 //
13268 // // pertinent parts of existing instructions in architecture description
13269 // instruct movI(rRegI dst, rRegI src)
13270 // %{
13271 //   match(Set dst (CopyI src));
13272 // %}
13273 //
13274 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
13275 // %{
13276 //   match(Set dst (AddI dst src));
13277 //   effect(KILL cr);
13278 // %}
13279 //
13280 // // Change (inc mov) to lea
13281 // peephole %{
13282 //   // increment preceeded by register-register move
13283 //   peepmatch ( incI_rReg movI );
13284 //   // require that the destination register of the increment
13285 //   // match the destination register of the move
13286 //   peepconstraint ( 0.dst == 1.dst );
13287 //   // construct a replacement instruction that sets
13288 //   // the destination to ( move's source register + one )
13289 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13290 // %}
13291 //
13292 
13293 // Implementation no longer uses movX instructions since
13294 // machine-independent system no longer uses CopyX nodes.
13295 //
13296 // peephole
13297 // %{
13298 //   peepmatch (incI_rReg movI);
13299 //   peepconstraint (0.dst == 1.dst);
13300 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13301 // %}
13302 
13303 // peephole
13304 // %{
13305 //   peepmatch (decI_rReg movI);
13306 //   peepconstraint (0.dst == 1.dst);
13307 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13308 // %}
13309 
13310 // peephole
13311 // %{
13312 //   peepmatch (addI_rReg_imm movI);
13313 //   peepconstraint (0.dst == 1.dst);
13314 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13315 // %}
13316 
13317 // peephole
13318 // %{
13319 //   peepmatch (incL_rReg movL);
13320 //   peepconstraint (0.dst == 1.dst);
13321 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13322 // %}
13323 
13324 // peephole
13325 // %{
13326 //   peepmatch (decL_rReg movL);
13327 //   peepconstraint (0.dst == 1.dst);
13328 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13329 // %}
13330 
13331 // peephole
13332 // %{
13333 //   peepmatch (addL_rReg_imm movL);
13334 //   peepconstraint (0.dst == 1.dst);
13335 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13336 // %}
13337 
13338 // peephole
13339 // %{
13340 //   peepmatch (addP_rReg_imm movP);
13341 //   peepconstraint (0.dst == 1.dst);
13342 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13343 // %}
13344 
13345 // // Change load of spilled value to only a spill
13346 // instruct storeI(memory mem, rRegI src)
13347 // %{
13348 //   match(Set mem (StoreI mem src));
13349 // %}
13350 //
13351 // instruct loadI(rRegI dst, memory mem)
13352 // %{
13353 //   match(Set dst (LoadI mem));
13354 // %}
13355 //
13356 
13357 peephole
13358 %{
13359   peepmatch (loadI storeI);
13360   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13361   peepreplace (storeI(1.mem 1.mem 1.src));
13362 %}
13363 
13364 peephole
13365 %{
13366   peepmatch (loadL storeL);
13367   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13368   peepreplace (storeL(1.mem 1.mem 1.src));
13369 %}
13370 
13371 //----------SMARTSPILL RULES---------------------------------------------------
13372 // These must follow all instruction definitions as they use the names
13373 // defined in the instructions definitions.