1 //
   2 // Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Class for all pointer registers (including RSP)
 170 reg_class any_reg(RAX, RAX_H,
 171                   RDX, RDX_H,
 172                   RBP, RBP_H,
 173                   RDI, RDI_H,
 174                   RSI, RSI_H,
 175                   RCX, RCX_H,
 176                   RBX, RBX_H,
 177                   RSP, RSP_H,
 178                   R8,  R8_H,
 179                   R9,  R9_H,
 180                   R10, R10_H,
 181                   R11, R11_H,
 182                   R12, R12_H,
 183                   R13, R13_H,
 184                   R14, R14_H,
 185                   R15, R15_H);
 186 
 187 // Class for all pointer registers except RSP
 188 reg_class ptr_reg(RAX, RAX_H,
 189                   RDX, RDX_H,
 190                   RBP, RBP_H,
 191                   RDI, RDI_H,
 192                   RSI, RSI_H,
 193                   RCX, RCX_H,
 194                   RBX, RBX_H,
 195                   R8,  R8_H,
 196                   R9,  R9_H,
 197                   R10, R10_H,
 198                   R11, R11_H,
 199                   R13, R13_H,
 200                   R14, R14_H);
 201 
 202 // Class for all pointer registers except RAX and RSP
 203 reg_class ptr_no_rax_reg(RDX, RDX_H,
 204                          RBP, RBP_H,
 205                          RDI, RDI_H,
 206                          RSI, RSI_H,
 207                          RCX, RCX_H,
 208                          RBX, RBX_H,
 209                          R8,  R8_H,
 210                          R9,  R9_H,
 211                          R10, R10_H,
 212                          R11, R11_H,
 213                          R13, R13_H,
 214                          R14, R14_H);
 215 
 216 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 217                          RAX, RAX_H,
 218                          RDI, RDI_H,
 219                          RSI, RSI_H,
 220                          RCX, RCX_H,
 221                          RBX, RBX_H,
 222                          R8,  R8_H,
 223                          R9,  R9_H,
 224                          R10, R10_H,
 225                          R11, R11_H,
 226                          R13, R13_H,
 227                          R14, R14_H);
 228 
 229 // Class for all pointer registers except RAX, RBX and RSP
 230 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 231                              RBP, RBP_H,
 232                              RDI, RDI_H,
 233                              RSI, RSI_H,
 234                              RCX, RCX_H,
 235                              R8,  R8_H,
 236                              R9,  R9_H,
 237                              R10, R10_H,
 238                              R11, R11_H,
 239                              R13, R13_H,
 240                              R14, R14_H);
 241 
 242 // Singleton class for RAX pointer register
 243 reg_class ptr_rax_reg(RAX, RAX_H);
 244 
 245 // Singleton class for RBX pointer register
 246 reg_class ptr_rbx_reg(RBX, RBX_H);
 247 
 248 // Singleton class for RSI pointer register
 249 reg_class ptr_rsi_reg(RSI, RSI_H);
 250 
 251 // Singleton class for RDI pointer register
 252 reg_class ptr_rdi_reg(RDI, RDI_H);
 253 
 254 // Singleton class for RBP pointer register
 255 reg_class ptr_rbp_reg(RBP, RBP_H);
 256 
 257 // Singleton class for stack pointer
 258 reg_class ptr_rsp_reg(RSP, RSP_H);
 259 
 260 // Singleton class for TLS pointer
 261 reg_class ptr_r15_reg(R15, R15_H);
 262 
 263 // Class for all long registers (except RSP)
 264 reg_class long_reg(RAX, RAX_H,
 265                    RDX, RDX_H,
 266                    RBP, RBP_H,
 267                    RDI, RDI_H,
 268                    RSI, RSI_H,
 269                    RCX, RCX_H,
 270                    RBX, RBX_H,
 271                    R8,  R8_H,
 272                    R9,  R9_H,
 273                    R10, R10_H,
 274                    R11, R11_H,
 275                    R13, R13_H,
 276                    R14, R14_H);
 277 
 278 // Class for all long registers except RAX, RDX (and RSP)
 279 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 280                               RDI, RDI_H,
 281                               RSI, RSI_H,
 282                               RCX, RCX_H,
 283                               RBX, RBX_H,
 284                               R8,  R8_H,
 285                               R9,  R9_H,
 286                               R10, R10_H,
 287                               R11, R11_H,
 288                               R13, R13_H,
 289                               R14, R14_H);
 290 
 291 // Class for all long registers except RCX (and RSP)
 292 reg_class long_no_rcx_reg(RBP, RBP_H,
 293                           RDI, RDI_H,
 294                           RSI, RSI_H,
 295                           RAX, RAX_H,
 296                           RDX, RDX_H,
 297                           RBX, RBX_H,
 298                           R8,  R8_H,
 299                           R9,  R9_H,
 300                           R10, R10_H,
 301                           R11, R11_H,
 302                           R13, R13_H,
 303                           R14, R14_H);
 304 
 305 // Class for all long registers except RAX (and RSP)
 306 reg_class long_no_rax_reg(RBP, RBP_H,
 307                           RDX, RDX_H,
 308                           RDI, RDI_H,
 309                           RSI, RSI_H,
 310                           RCX, RCX_H,
 311                           RBX, RBX_H,
 312                           R8,  R8_H,
 313                           R9,  R9_H,
 314                           R10, R10_H,
 315                           R11, R11_H,
 316                           R13, R13_H,
 317                           R14, R14_H);
 318 
 319 // Singleton class for RAX long register
 320 reg_class long_rax_reg(RAX, RAX_H);
 321 
 322 // Singleton class for RCX long register
 323 reg_class long_rcx_reg(RCX, RCX_H);
 324 
 325 // Singleton class for RDX long register
 326 reg_class long_rdx_reg(RDX, RDX_H);
 327 
 328 // Class for all int registers (except RSP)
 329 reg_class int_reg(RAX,
 330                   RDX,
 331                   RBP,
 332                   RDI,
 333                   RSI,
 334                   RCX,
 335                   RBX,
 336                   R8,
 337                   R9,
 338                   R10,
 339                   R11,
 340                   R13,
 341                   R14);
 342 
 343 // Class for all int registers except RCX (and RSP)
 344 reg_class int_no_rcx_reg(RAX,
 345                          RDX,
 346                          RBP,
 347                          RDI,
 348                          RSI,
 349                          RBX,
 350                          R8,
 351                          R9,
 352                          R10,
 353                          R11,
 354                          R13,
 355                          R14);
 356 
 357 // Class for all int registers except RAX, RDX (and RSP)
 358 reg_class int_no_rax_rdx_reg(RBP,
 359                              RDI,
 360                              RSI,
 361                              RCX,
 362                              RBX,
 363                              R8,
 364                              R9,
 365                              R10,
 366                              R11,
 367                              R13,
 368                              R14);
 369 
 370 // Singleton class for RAX int register
 371 reg_class int_rax_reg(RAX);
 372 
 373 // Singleton class for RBX int register
 374 reg_class int_rbx_reg(RBX);
 375 
 376 // Singleton class for RCX int register
 377 reg_class int_rcx_reg(RCX);
 378 
 379 // Singleton class for RCX int register
 380 reg_class int_rdx_reg(RDX);
 381 
 382 // Singleton class for RCX int register
 383 reg_class int_rdi_reg(RDI);
 384 
 385 // Singleton class for instruction pointer
 386 // reg_class ip_reg(RIP);
 387 
 388 %}
 389 
 390 //----------SOURCE BLOCK-------------------------------------------------------
 391 // This is a block of C++ code which provides values, functions, and
 392 // definitions necessary in the rest of the architecture description
 393 source %{
 394 #define   RELOC_IMM64    Assembler::imm_operand
 395 #define   RELOC_DISP32   Assembler::disp32_operand
 396 
 397 #define __ _masm.
 398 
 399 static int preserve_SP_size() {
 400   return 3;  // rex.w, op, rm(reg/reg)
 401 }
 402 static int clear_avx_size() {
 403   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 404 }
 405 
 406 // !!!!! Special hack to get all types of calls to specify the byte offset
 407 //       from the start of the call to the point where the return address
 408 //       will point.
 409 int MachCallStaticJavaNode::ret_addr_offset()
 410 {
 411   int offset = 5; // 5 bytes from start of call to where return address points
 412   offset += clear_avx_size();
 413   if (_method_handle_invoke)
 414     offset += preserve_SP_size();
 415   return offset;
 416 }
 417 
 418 int MachCallDynamicJavaNode::ret_addr_offset()
 419 {
 420   int offset = 15; // 15 bytes from start of call to where return address points
 421   offset += clear_avx_size();
 422   return offset;
 423 }
 424 
 425 int MachCallRuntimeNode::ret_addr_offset() {
 426   int offset = 13; // movq r10,#addr; callq (r10)
 427   offset += clear_avx_size();
 428   return offset;
 429 }
 430 
 431 // Indicate if the safepoint node needs the polling page as an input,
 432 // it does if the polling page is more than disp32 away.
 433 bool SafePointNode::needs_polling_address_input()
 434 {
 435   return Assembler::is_polling_page_far();
 436 }
 437 
 438 //
 439 // Compute padding required for nodes which need alignment
 440 //
 441 
 442 // The address of the call instruction needs to be 4-byte aligned to
 443 // ensure that it does not span a cache line so that it can be patched.
 444 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 445 {
 446   current_offset += clear_avx_size(); // skip vzeroupper
 447   current_offset += 1; // skip call opcode byte
 448   return round_to(current_offset, alignment_required()) - current_offset;
 449 }
 450 
 451 // The address of the call instruction needs to be 4-byte aligned to
 452 // ensure that it does not span a cache line so that it can be patched.
 453 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 454 {
 455   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 456   current_offset += clear_avx_size(); // skip vzeroupper
 457   current_offset += 1; // skip call opcode byte
 458   return round_to(current_offset, alignment_required()) - current_offset;
 459 }
 460 
 461 // The address of the call instruction needs to be 4-byte aligned to
 462 // ensure that it does not span a cache line so that it can be patched.
 463 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 464 {
 465   current_offset += clear_avx_size(); // skip vzeroupper
 466   current_offset += 11; // skip movq instruction + call opcode byte
 467   return round_to(current_offset, alignment_required()) - current_offset;
 468 }
 469 
 470 // EMIT_RM()
 471 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 472   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 473   cbuf.insts()->emit_int8(c);
 474 }
 475 
 476 // EMIT_CC()
 477 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 478   unsigned char c = (unsigned char) (f1 | f2);
 479   cbuf.insts()->emit_int8(c);
 480 }
 481 
 482 // EMIT_OPCODE()
 483 void emit_opcode(CodeBuffer &cbuf, int code) {
 484   cbuf.insts()->emit_int8((unsigned char) code);
 485 }
 486 
 487 // EMIT_OPCODE() w/ relocation information
 488 void emit_opcode(CodeBuffer &cbuf,
 489                  int code, relocInfo::relocType reloc, int offset, int format)
 490 {
 491   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 492   emit_opcode(cbuf, code);
 493 }
 494 
 495 // EMIT_D8()
 496 void emit_d8(CodeBuffer &cbuf, int d8) {
 497   cbuf.insts()->emit_int8((unsigned char) d8);
 498 }
 499 
 500 // EMIT_D16()
 501 void emit_d16(CodeBuffer &cbuf, int d16) {
 502   cbuf.insts()->emit_int16(d16);
 503 }
 504 
 505 // EMIT_D32()
 506 void emit_d32(CodeBuffer &cbuf, int d32) {
 507   cbuf.insts()->emit_int32(d32);
 508 }
 509 
 510 // EMIT_D64()
 511 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 512   cbuf.insts()->emit_int64(d64);
 513 }
 514 
 515 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 516 void emit_d32_reloc(CodeBuffer& cbuf,
 517                     int d32,
 518                     relocInfo::relocType reloc,
 519                     int format)
 520 {
 521   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 522   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 523   cbuf.insts()->emit_int32(d32);
 524 }
 525 
 526 // emit 32 bit value and construct relocation entry from RelocationHolder
 527 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 528 #ifdef ASSERT
 529   if (rspec.reloc()->type() == relocInfo::oop_type &&
 530       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 531     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 532     assert(cast_to_oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 533   }
 534 #endif
 535   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 536   cbuf.insts()->emit_int32(d32);
 537 }
 538 
 539 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 540   address next_ip = cbuf.insts_end() + 4;
 541   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 542                  external_word_Relocation::spec(addr),
 543                  RELOC_DISP32);
 544 }
 545 
 546 
 547 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 548 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 549   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 550   cbuf.insts()->emit_int64(d64);
 551 }
 552 
 553 // emit 64 bit value and construct relocation entry from RelocationHolder
 554 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 555 #ifdef ASSERT
 556   if (rspec.reloc()->type() == relocInfo::oop_type &&
 557       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 558     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 559     assert(cast_to_oop(d64)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d64)->is_scavengable()),
 560            "cannot embed scavengable oops in code");
 561   }
 562 #endif
 563   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 564   cbuf.insts()->emit_int64(d64);
 565 }
 566 
 567 // Access stack slot for load or store
 568 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 569 {
 570   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 571   if (-0x80 <= disp && disp < 0x80) {
 572     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 573     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 574     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 575   } else {
 576     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 577     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 578     emit_d32(cbuf, disp);     // Displacement // R/M byte
 579   }
 580 }
 581 
 582    // rRegI ereg, memory mem) %{    // emit_reg_mem
 583 void encode_RegMem(CodeBuffer &cbuf,
 584                    int reg,
 585                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 586 {
 587   assert(disp_reloc == relocInfo::none, "cannot have disp");
 588   int regenc = reg & 7;
 589   int baseenc = base & 7;
 590   int indexenc = index & 7;
 591 
 592   // There is no index & no scale, use form without SIB byte
 593   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 594     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 595     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 596       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 597     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 598       // If 8-bit displacement, mode 0x1
 599       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 600       emit_d8(cbuf, disp);
 601     } else {
 602       // If 32-bit displacement
 603       if (base == -1) { // Special flag for absolute address
 604         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 605         if (disp_reloc != relocInfo::none) {
 606           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 607         } else {
 608           emit_d32(cbuf, disp);
 609         }
 610       } else {
 611         // Normal base + offset
 612         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 613         if (disp_reloc != relocInfo::none) {
 614           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 615         } else {
 616           emit_d32(cbuf, disp);
 617         }
 618       }
 619     }
 620   } else {
 621     // Else, encode with the SIB byte
 622     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 623     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 624       // If no displacement
 625       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 626       emit_rm(cbuf, scale, indexenc, baseenc);
 627     } else {
 628       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 629         // If 8-bit displacement, mode 0x1
 630         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 631         emit_rm(cbuf, scale, indexenc, baseenc);
 632         emit_d8(cbuf, disp);
 633       } else {
 634         // If 32-bit displacement
 635         if (base == 0x04 ) {
 636           emit_rm(cbuf, 0x2, regenc, 0x4);
 637           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 638         } else {
 639           emit_rm(cbuf, 0x2, regenc, 0x4);
 640           emit_rm(cbuf, scale, indexenc, baseenc); // *
 641         }
 642         if (disp_reloc != relocInfo::none) {
 643           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 644         } else {
 645           emit_d32(cbuf, disp);
 646         }
 647       }
 648     }
 649   }
 650 }
 651 
 652 // This could be in MacroAssembler but it's fairly C2 specific
 653 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 654   Label exit;
 655   __ jccb(Assembler::noParity, exit);
 656   __ pushf();
 657   //
 658   // comiss/ucomiss instructions set ZF,PF,CF flags and
 659   // zero OF,AF,SF for NaN values.
 660   // Fixup flags by zeroing ZF,PF so that compare of NaN
 661   // values returns 'less than' result (CF is set).
 662   // Leave the rest of flags unchanged.
 663   //
 664   //    7 6 5 4 3 2 1 0
 665   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 666   //    0 0 1 0 1 0 1 1   (0x2B)
 667   //
 668   __ andq(Address(rsp, 0), 0xffffff2b);
 669   __ popf();
 670   __ bind(exit);
 671 }
 672 
 673 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 674   Label done;
 675   __ movl(dst, -1);
 676   __ jcc(Assembler::parity, done);
 677   __ jcc(Assembler::below, done);
 678   __ setb(Assembler::notEqual, dst);
 679   __ movzbl(dst, dst);
 680   __ bind(done);
 681 }
 682 
 683 
 684 //=============================================================================
 685 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 686 
 687 int Compile::ConstantTable::calculate_table_base_offset() const {
 688   return 0;  // absolute addressing, no offset
 689 }
 690 
 691 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 692   // Empty encoding
 693 }
 694 
 695 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 696   return 0;
 697 }
 698 
 699 #ifndef PRODUCT
 700 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 701   st->print("# MachConstantBaseNode (empty encoding)");
 702 }
 703 #endif
 704 
 705 
 706 //=============================================================================
 707 #ifndef PRODUCT
 708 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 709   Compile* C = ra_->C;
 710 
 711   int framesize = C->frame_slots() << LogBytesPerInt;
 712   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 713   // Remove wordSize for return addr which is already pushed.
 714   framesize -= wordSize;
 715 
 716   if (C->need_stack_bang(framesize)) {
 717     framesize -= wordSize;
 718     st->print("# stack bang");
 719     st->print("\n\t");
 720     st->print("pushq   rbp\t# Save rbp");
 721     if (framesize) {
 722       st->print("\n\t");
 723       st->print("subq    rsp, #%d\t# Create frame",framesize);
 724     }
 725   } else {
 726     st->print("subq    rsp, #%d\t# Create frame",framesize);
 727     st->print("\n\t");
 728     framesize -= wordSize;
 729     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 730   }
 731 
 732   if (VerifyStackAtCalls) {
 733     st->print("\n\t");
 734     framesize -= wordSize;
 735     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 736 #ifdef ASSERT
 737     st->print("\n\t");
 738     st->print("# stack alignment check");
 739 #endif
 740   }
 741   st->cr();
 742 }
 743 #endif
 744 
 745 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 746   Compile* C = ra_->C;
 747   MacroAssembler _masm(&cbuf);
 748 
 749   int framesize = C->frame_slots() << LogBytesPerInt;
 750 
 751   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 752 
 753   C->set_frame_complete(cbuf.insts_size());
 754 
 755   if (C->has_mach_constant_base_node()) {
 756     // NOTE: We set the table base offset here because users might be
 757     // emitted before MachConstantBaseNode.
 758     Compile::ConstantTable& constant_table = C->constant_table();
 759     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 760   }
 761 }
 762 
 763 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 764 {
 765   return MachNode::size(ra_); // too many variables; just compute it
 766                               // the hard way
 767 }
 768 
 769 int MachPrologNode::reloc() const
 770 {
 771   return 0; // a large enough number
 772 }
 773 
 774 //=============================================================================
 775 #ifndef PRODUCT
 776 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 777 {
 778   Compile* C = ra_->C;
 779   if (C->max_vector_size() > 16) {
 780     st->print("vzeroupper");
 781     st->cr(); st->print("\t");
 782   }
 783 
 784   int framesize = C->frame_slots() << LogBytesPerInt;
 785   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 786   // Remove word for return adr already pushed
 787   // and RBP
 788   framesize -= 2*wordSize;
 789 
 790   if (framesize) {
 791     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 792     st->print("\t");
 793   }
 794 
 795   st->print_cr("popq   rbp");
 796   if (do_polling() && C->is_method_compilation()) {
 797     st->print("\t");
 798     if (Assembler::is_polling_page_far()) {
 799       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 800                    "testl  rax, [rscratch1]\t"
 801                    "# Safepoint: poll for GC");
 802     } else {
 803       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 804                    "# Safepoint: poll for GC");
 805     }
 806   }
 807 }
 808 #endif
 809 
 810 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 811 {
 812   Compile* C = ra_->C;
 813   if (C->max_vector_size() > 16) {
 814     // Clear upper bits of YMM registers when current compiled code uses
 815     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 816     MacroAssembler _masm(&cbuf);
 817     __ vzeroupper();
 818   }
 819 
 820   int framesize = C->frame_slots() << LogBytesPerInt;
 821   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 822   // Remove word for return adr already pushed
 823   // and RBP
 824   framesize -= 2*wordSize;
 825 
 826   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 827 
 828   if (framesize) {
 829     emit_opcode(cbuf, Assembler::REX_W);
 830     if (framesize < 0x80) {
 831       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 832       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 833       emit_d8(cbuf, framesize);
 834     } else {
 835       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 836       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 837       emit_d32(cbuf, framesize);
 838     }
 839   }
 840 
 841   // popq rbp
 842   emit_opcode(cbuf, 0x58 | RBP_enc);
 843 
 844   if (do_polling() && C->is_method_compilation()) {
 845     MacroAssembler _masm(&cbuf);
 846     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 847     if (Assembler::is_polling_page_far()) {
 848       __ lea(rscratch1, polling_page);
 849       __ relocate(relocInfo::poll_return_type);
 850       __ testl(rax, Address(rscratch1, 0));
 851     } else {
 852       __ testl(rax, polling_page);
 853     }
 854   }
 855 }
 856 
 857 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 858 {
 859   return MachNode::size(ra_); // too many variables; just compute it
 860                               // the hard way
 861 }
 862 
 863 int MachEpilogNode::reloc() const
 864 {
 865   return 2; // a large enough number
 866 }
 867 
 868 const Pipeline* MachEpilogNode::pipeline() const
 869 {
 870   return MachNode::pipeline_class();
 871 }
 872 
 873 int MachEpilogNode::safepoint_offset() const
 874 {
 875   return 0;
 876 }
 877 
 878 //=============================================================================
 879 
 880 enum RC {
 881   rc_bad,
 882   rc_int,
 883   rc_float,
 884   rc_stack
 885 };
 886 
 887 static enum RC rc_class(OptoReg::Name reg)
 888 {
 889   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 890 
 891   if (OptoReg::is_stack(reg)) return rc_stack;
 892 
 893   VMReg r = OptoReg::as_VMReg(reg);
 894 
 895   if (r->is_Register()) return rc_int;
 896 
 897   assert(r->is_XMMRegister(), "must be");
 898   return rc_float;
 899 }
 900 
 901 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 902 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 903                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 904 
 905 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 906                             int stack_offset, int reg, uint ireg, outputStream* st);
 907 
 908 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 909                                       int dst_offset, uint ireg, outputStream* st) {
 910   if (cbuf) {
 911     MacroAssembler _masm(cbuf);
 912     switch (ireg) {
 913     case Op_VecS:
 914       __ movq(Address(rsp, -8), rax);
 915       __ movl(rax, Address(rsp, src_offset));
 916       __ movl(Address(rsp, dst_offset), rax);
 917       __ movq(rax, Address(rsp, -8));
 918       break;
 919     case Op_VecD:
 920       __ pushq(Address(rsp, src_offset));
 921       __ popq (Address(rsp, dst_offset));
 922       break;
 923     case Op_VecX:
 924       __ pushq(Address(rsp, src_offset));
 925       __ popq (Address(rsp, dst_offset));
 926       __ pushq(Address(rsp, src_offset+8));
 927       __ popq (Address(rsp, dst_offset+8));
 928       break;
 929     case Op_VecY:
 930       __ vmovdqu(Address(rsp, -32), xmm0);
 931       __ vmovdqu(xmm0, Address(rsp, src_offset));
 932       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 933       __ vmovdqu(xmm0, Address(rsp, -32));
 934       break;
 935     default:
 936       ShouldNotReachHere();
 937     }
 938 #ifndef PRODUCT
 939   } else {
 940     switch (ireg) {
 941     case Op_VecS:
 942       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 943                 "movl    rax, [rsp + #%d]\n\t"
 944                 "movl    [rsp + #%d], rax\n\t"
 945                 "movq    rax, [rsp - #8]",
 946                 src_offset, dst_offset);
 947       break;
 948     case Op_VecD:
 949       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 950                 "popq    [rsp + #%d]",
 951                 src_offset, dst_offset);
 952       break;
 953      case Op_VecX:
 954       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 955                 "popq    [rsp + #%d]\n\t"
 956                 "pushq   [rsp + #%d]\n\t"
 957                 "popq    [rsp + #%d]",
 958                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 959       break;
 960     case Op_VecY:
 961       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 962                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 963                 "vmovdqu [rsp + #%d], xmm0\n\t"
 964                 "vmovdqu xmm0, [rsp - #32]",
 965                 src_offset, dst_offset);
 966       break;
 967     default:
 968       ShouldNotReachHere();
 969     }
 970 #endif
 971   }
 972 }
 973 
 974 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 975                                        PhaseRegAlloc* ra_,
 976                                        bool do_size,
 977                                        outputStream* st) const {
 978   assert(cbuf != NULL || st  != NULL, "sanity");
 979   // Get registers to move
 980   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 981   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 982   OptoReg::Name dst_second = ra_->get_reg_second(this);
 983   OptoReg::Name dst_first = ra_->get_reg_first(this);
 984 
 985   enum RC src_second_rc = rc_class(src_second);
 986   enum RC src_first_rc = rc_class(src_first);
 987   enum RC dst_second_rc = rc_class(dst_second);
 988   enum RC dst_first_rc = rc_class(dst_first);
 989 
 990   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 991          "must move at least 1 register" );
 992 
 993   if (src_first == dst_first && src_second == dst_second) {
 994     // Self copy, no move
 995     return 0;
 996   }
 997   if (bottom_type()->isa_vect() != NULL) {
 998     uint ireg = ideal_reg();
 999     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1000     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1001     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1002       // mem -> mem
1003       int src_offset = ra_->reg2offset(src_first);
1004       int dst_offset = ra_->reg2offset(dst_first);
1005       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1006     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1007       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1008     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1009       int stack_offset = ra_->reg2offset(dst_first);
1010       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1011     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1012       int stack_offset = ra_->reg2offset(src_first);
1013       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1014     } else {
1015       ShouldNotReachHere();
1016     }
1017     return 0;
1018   }
1019   if (src_first_rc == rc_stack) {
1020     // mem ->
1021     if (dst_first_rc == rc_stack) {
1022       // mem -> mem
1023       assert(src_second != dst_first, "overlap");
1024       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1025           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1026         // 64-bit
1027         int src_offset = ra_->reg2offset(src_first);
1028         int dst_offset = ra_->reg2offset(dst_first);
1029         if (cbuf) {
1030           MacroAssembler _masm(cbuf);
1031           __ pushq(Address(rsp, src_offset));
1032           __ popq (Address(rsp, dst_offset));
1033 #ifndef PRODUCT
1034         } else {
1035           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                     "popq    [rsp + #%d]",
1037                      src_offset, dst_offset);
1038 #endif
1039         }
1040       } else {
1041         // 32-bit
1042         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1043         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1044         // No pushl/popl, so:
1045         int src_offset = ra_->reg2offset(src_first);
1046         int dst_offset = ra_->reg2offset(dst_first);
1047         if (cbuf) {
1048           MacroAssembler _masm(cbuf);
1049           __ movq(Address(rsp, -8), rax);
1050           __ movl(rax, Address(rsp, src_offset));
1051           __ movl(Address(rsp, dst_offset), rax);
1052           __ movq(rax, Address(rsp, -8));
1053 #ifndef PRODUCT
1054         } else {
1055           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1056                     "movl    rax, [rsp + #%d]\n\t"
1057                     "movl    [rsp + #%d], rax\n\t"
1058                     "movq    rax, [rsp - #8]",
1059                      src_offset, dst_offset);
1060 #endif
1061         }
1062       }
1063       return 0;
1064     } else if (dst_first_rc == rc_int) {
1065       // mem -> gpr
1066       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1067           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1068         // 64-bit
1069         int offset = ra_->reg2offset(src_first);
1070         if (cbuf) {
1071           MacroAssembler _masm(cbuf);
1072           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1073 #ifndef PRODUCT
1074         } else {
1075           st->print("movq    %s, [rsp + #%d]\t# spill",
1076                      Matcher::regName[dst_first],
1077                      offset);
1078 #endif
1079         }
1080       } else {
1081         // 32-bit
1082         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1083         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1084         int offset = ra_->reg2offset(src_first);
1085         if (cbuf) {
1086           MacroAssembler _masm(cbuf);
1087           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1088 #ifndef PRODUCT
1089         } else {
1090           st->print("movl    %s, [rsp + #%d]\t# spill",
1091                      Matcher::regName[dst_first],
1092                      offset);
1093 #endif
1094         }
1095       }
1096       return 0;
1097     } else if (dst_first_rc == rc_float) {
1098       // mem-> xmm
1099       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1100           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1101         // 64-bit
1102         int offset = ra_->reg2offset(src_first);
1103         if (cbuf) {
1104           MacroAssembler _masm(cbuf);
1105           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1106 #ifndef PRODUCT
1107         } else {
1108           st->print("%s  %s, [rsp + #%d]\t# spill",
1109                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1110                      Matcher::regName[dst_first],
1111                      offset);
1112 #endif
1113         }
1114       } else {
1115         // 32-bit
1116         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1117         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1118         int offset = ra_->reg2offset(src_first);
1119         if (cbuf) {
1120           MacroAssembler _masm(cbuf);
1121           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1122 #ifndef PRODUCT
1123         } else {
1124           st->print("movss   %s, [rsp + #%d]\t# spill",
1125                      Matcher::regName[dst_first],
1126                      offset);
1127 #endif
1128         }
1129       }
1130       return 0;
1131     }
1132   } else if (src_first_rc == rc_int) {
1133     // gpr ->
1134     if (dst_first_rc == rc_stack) {
1135       // gpr -> mem
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int offset = ra_->reg2offset(dst_first);
1140         if (cbuf) {
1141           MacroAssembler _masm(cbuf);
1142           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1143 #ifndef PRODUCT
1144         } else {
1145           st->print("movq    [rsp + #%d], %s\t# spill",
1146                      offset,
1147                      Matcher::regName[src_first]);
1148 #endif
1149         }
1150       } else {
1151         // 32-bit
1152         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1153         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1154         int offset = ra_->reg2offset(dst_first);
1155         if (cbuf) {
1156           MacroAssembler _masm(cbuf);
1157           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1158 #ifndef PRODUCT
1159         } else {
1160           st->print("movl    [rsp + #%d], %s\t# spill",
1161                      offset,
1162                      Matcher::regName[src_first]);
1163 #endif
1164         }
1165       }
1166       return 0;
1167     } else if (dst_first_rc == rc_int) {
1168       // gpr -> gpr
1169       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1170           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1171         // 64-bit
1172         if (cbuf) {
1173           MacroAssembler _masm(cbuf);
1174           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1175                   as_Register(Matcher::_regEncode[src_first]));
1176 #ifndef PRODUCT
1177         } else {
1178           st->print("movq    %s, %s\t# spill",
1179                      Matcher::regName[dst_first],
1180                      Matcher::regName[src_first]);
1181 #endif
1182         }
1183         return 0;
1184       } else {
1185         // 32-bit
1186         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1187         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1188         if (cbuf) {
1189           MacroAssembler _masm(cbuf);
1190           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1191                   as_Register(Matcher::_regEncode[src_first]));
1192 #ifndef PRODUCT
1193         } else {
1194           st->print("movl    %s, %s\t# spill",
1195                      Matcher::regName[dst_first],
1196                      Matcher::regName[src_first]);
1197 #endif
1198         }
1199         return 0;
1200       }
1201     } else if (dst_first_rc == rc_float) {
1202       // gpr -> xmm
1203       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1204           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1205         // 64-bit
1206         if (cbuf) {
1207           MacroAssembler _masm(cbuf);
1208           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1209 #ifndef PRODUCT
1210         } else {
1211           st->print("movdq   %s, %s\t# spill",
1212                      Matcher::regName[dst_first],
1213                      Matcher::regName[src_first]);
1214 #endif
1215         }
1216       } else {
1217         // 32-bit
1218         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1219         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1220         if (cbuf) {
1221           MacroAssembler _masm(cbuf);
1222           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1223 #ifndef PRODUCT
1224         } else {
1225           st->print("movdl   %s, %s\t# spill",
1226                      Matcher::regName[dst_first],
1227                      Matcher::regName[src_first]);
1228 #endif
1229         }
1230       }
1231       return 0;
1232     }
1233   } else if (src_first_rc == rc_float) {
1234     // xmm ->
1235     if (dst_first_rc == rc_stack) {
1236       // xmm -> mem
1237       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1238           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1239         // 64-bit
1240         int offset = ra_->reg2offset(dst_first);
1241         if (cbuf) {
1242           MacroAssembler _masm(cbuf);
1243           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1244 #ifndef PRODUCT
1245         } else {
1246           st->print("movsd   [rsp + #%d], %s\t# spill",
1247                      offset,
1248                      Matcher::regName[src_first]);
1249 #endif
1250         }
1251       } else {
1252         // 32-bit
1253         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1254         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1255         int offset = ra_->reg2offset(dst_first);
1256         if (cbuf) {
1257           MacroAssembler _masm(cbuf);
1258           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1259 #ifndef PRODUCT
1260         } else {
1261           st->print("movss   [rsp + #%d], %s\t# spill",
1262                      offset,
1263                      Matcher::regName[src_first]);
1264 #endif
1265         }
1266       }
1267       return 0;
1268     } else if (dst_first_rc == rc_int) {
1269       // xmm -> gpr
1270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1272         // 64-bit
1273         if (cbuf) {
1274           MacroAssembler _masm(cbuf);
1275           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1276 #ifndef PRODUCT
1277         } else {
1278           st->print("movdq   %s, %s\t# spill",
1279                      Matcher::regName[dst_first],
1280                      Matcher::regName[src_first]);
1281 #endif
1282         }
1283       } else {
1284         // 32-bit
1285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1287         if (cbuf) {
1288           MacroAssembler _masm(cbuf);
1289           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1290 #ifndef PRODUCT
1291         } else {
1292           st->print("movdl   %s, %s\t# spill",
1293                      Matcher::regName[dst_first],
1294                      Matcher::regName[src_first]);
1295 #endif
1296         }
1297       }
1298       return 0;
1299     } else if (dst_first_rc == rc_float) {
1300       // xmm -> xmm
1301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1303         // 64-bit
1304         if (cbuf) {
1305           MacroAssembler _masm(cbuf);
1306           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1307 #ifndef PRODUCT
1308         } else {
1309           st->print("%s  %s, %s\t# spill",
1310                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1311                      Matcher::regName[dst_first],
1312                      Matcher::regName[src_first]);
1313 #endif
1314         }
1315       } else {
1316         // 32-bit
1317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1319         if (cbuf) {
1320           MacroAssembler _masm(cbuf);
1321           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1322 #ifndef PRODUCT
1323         } else {
1324           st->print("%s  %s, %s\t# spill",
1325                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1326                      Matcher::regName[dst_first],
1327                      Matcher::regName[src_first]);
1328 #endif
1329         }
1330       }
1331       return 0;
1332     }
1333   }
1334 
1335   assert(0," foo ");
1336   Unimplemented();
1337   return 0;
1338 }
1339 
1340 #ifndef PRODUCT
1341 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1342   implementation(NULL, ra_, false, st);
1343 }
1344 #endif
1345 
1346 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1347   implementation(&cbuf, ra_, false, NULL);
1348 }
1349 
1350 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1351   return MachNode::size(ra_);
1352 }
1353 
1354 //=============================================================================
1355 #ifndef PRODUCT
1356 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1357 {
1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359   int reg = ra_->get_reg_first(this);
1360   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1361             Matcher::regName[reg], offset);
1362 }
1363 #endif
1364 
1365 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1366 {
1367   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1368   int reg = ra_->get_encode(this);
1369   if (offset >= 0x80) {
1370     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1371     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1372     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1373     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1374     emit_d32(cbuf, offset);
1375   } else {
1376     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1377     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1378     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1379     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1380     emit_d8(cbuf, offset);
1381   }
1382 }
1383 
1384 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1385 {
1386   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1387   return (offset < 0x80) ? 5 : 8; // REX
1388 }
1389 
1390 //=============================================================================
1391 #ifndef PRODUCT
1392 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1393 {
1394   if (UseCompressedClassPointers) {
1395     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1396     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1397     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1398   } else {
1399     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1400                  "# Inline cache check");
1401   }
1402   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1403   st->print_cr("\tnop\t# nops to align entry point");
1404 }
1405 #endif
1406 
1407 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1408 {
1409   MacroAssembler masm(&cbuf);
1410   uint insts_size = cbuf.insts_size();
1411   if (UseCompressedClassPointers) {
1412     masm.load_klass(rscratch1, j_rarg0);
1413     masm.cmpptr(rax, rscratch1);
1414   } else {
1415     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1416   }
1417 
1418   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1419 
1420   /* WARNING these NOPs are critical so that verified entry point is properly
1421      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1422   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1423   if (OptoBreakpoint) {
1424     // Leave space for int3
1425     nops_cnt -= 1;
1426   }
1427   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1428   if (nops_cnt > 0)
1429     masm.nop(nops_cnt);
1430 }
1431 
1432 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1433 {
1434   return MachNode::size(ra_); // too many variables; just compute it
1435                               // the hard way
1436 }
1437 
1438 
1439 //=============================================================================
1440 uint size_exception_handler()
1441 {
1442   // NativeCall instruction size is the same as NativeJump.
1443   // Note that this value is also credited (in output.cpp) to
1444   // the size of the code section.
1445   return NativeJump::instruction_size;
1446 }
1447 
1448 // Emit exception handler code.
1449 int emit_exception_handler(CodeBuffer& cbuf)
1450 {
1451 
1452   // Note that the code buffer's insts_mark is always relative to insts.
1453   // That's why we must use the macroassembler to generate a handler.
1454   MacroAssembler _masm(&cbuf);
1455   address base =
1456   __ start_a_stub(size_exception_handler());
1457   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1458   int offset = __ offset();
1459   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1460   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1461   __ end_a_stub();
1462   return offset;
1463 }
1464 
1465 uint size_deopt_handler()
1466 {
1467   // three 5 byte instructions
1468   return 15;
1469 }
1470 
1471 // Emit deopt handler code.
1472 int emit_deopt_handler(CodeBuffer& cbuf)
1473 {
1474 
1475   // Note that the code buffer's insts_mark is always relative to insts.
1476   // That's why we must use the macroassembler to generate a handler.
1477   MacroAssembler _masm(&cbuf);
1478   address base =
1479   __ start_a_stub(size_deopt_handler());
1480   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1481   int offset = __ offset();
1482   address the_pc = (address) __ pc();
1483   Label next;
1484   // push a "the_pc" on the stack without destroying any registers
1485   // as they all may be live.
1486 
1487   // push address of "next"
1488   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1489   __ bind(next);
1490   // adjust it so it matches "the_pc"
1491   __ subptr(Address(rsp, 0), __ offset() - offset);
1492   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1493   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1494   __ end_a_stub();
1495   return offset;
1496 }
1497 
1498 int Matcher::regnum_to_fpu_offset(int regnum)
1499 {
1500   return regnum - 32; // The FP registers are in the second chunk
1501 }
1502 
1503 // This is UltraSparc specific, true just means we have fast l2f conversion
1504 const bool Matcher::convL2FSupported(void) {
1505   return true;
1506 }
1507 
1508 // Is this branch offset short enough that a short branch can be used?
1509 //
1510 // NOTE: If the platform does not provide any short branch variants, then
1511 //       this method should return false for offset 0.
1512 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1513   // The passed offset is relative to address of the branch.
1514   // On 86 a branch displacement is calculated relative to address
1515   // of a next instruction.
1516   offset -= br_size;
1517 
1518   // the short version of jmpConUCF2 contains multiple branches,
1519   // making the reach slightly less
1520   if (rule == jmpConUCF2_rule)
1521     return (-126 <= offset && offset <= 125);
1522   return (-128 <= offset && offset <= 127);
1523 }
1524 
1525 const bool Matcher::isSimpleConstant64(jlong value) {
1526   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1527   //return value == (int) value;  // Cf. storeImmL and immL32.
1528 
1529   // Probably always true, even if a temp register is required.
1530   return true;
1531 }
1532 
1533 // The ecx parameter to rep stosq for the ClearArray node is in words.
1534 const bool Matcher::init_array_count_is_in_bytes = false;
1535 
1536 // Threshold size for cleararray.
1537 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1538 
1539 // No additional cost for CMOVL.
1540 const int Matcher::long_cmove_cost() { return 0; }
1541 
1542 // No CMOVF/CMOVD with SSE2
1543 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1544 
1545 // Should the Matcher clone shifts on addressing modes, expecting them
1546 // to be subsumed into complex addressing expressions or compute them
1547 // into registers?  True for Intel but false for most RISCs
1548 const bool Matcher::clone_shift_expressions = true;
1549 
1550 // Do we need to mask the count passed to shift instructions or does
1551 // the cpu only look at the lower 5/6 bits anyway?
1552 const bool Matcher::need_masked_shift_count = false;
1553 
1554 bool Matcher::narrow_oop_use_complex_address() {
1555   assert(UseCompressedOops, "only for compressed oops code");
1556   return (LogMinObjAlignmentInBytes <= 3);
1557 }
1558 
1559 bool Matcher::narrow_klass_use_complex_address() {
1560   assert(UseCompressedClassPointers, "only for compressed klass code");
1561   return (LogKlassAlignmentInBytes <= 3);
1562 }
1563 
1564 // Is it better to copy float constants, or load them directly from
1565 // memory?  Intel can load a float constant from a direct address,
1566 // requiring no extra registers.  Most RISCs will have to materialize
1567 // an address into a register first, so they would do better to copy
1568 // the constant from stack.
1569 const bool Matcher::rematerialize_float_constants = true; // XXX
1570 
1571 // If CPU can load and store mis-aligned doubles directly then no
1572 // fixup is needed.  Else we split the double into 2 integer pieces
1573 // and move it piece-by-piece.  Only happens when passing doubles into
1574 // C code as the Java calling convention forces doubles to be aligned.
1575 const bool Matcher::misaligned_doubles_ok = true;
1576 
1577 // No-op on amd64
1578 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1579 
1580 // Advertise here if the CPU requires explicit rounding operations to
1581 // implement the UseStrictFP mode.
1582 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1583 
1584 // Are floats conerted to double when stored to stack during deoptimization?
1585 // On x64 it is stored without convertion so we can use normal access.
1586 bool Matcher::float_in_double() { return false; }
1587 
1588 // Do ints take an entire long register or just half?
1589 const bool Matcher::int_in_long = true;
1590 
1591 // Return whether or not this register is ever used as an argument.
1592 // This function is used on startup to build the trampoline stubs in
1593 // generateOptoStub.  Registers not mentioned will be killed by the VM
1594 // call in the trampoline, and arguments in those registers not be
1595 // available to the callee.
1596 bool Matcher::can_be_java_arg(int reg)
1597 {
1598   return
1599     reg ==  RDI_num || reg == RDI_H_num ||
1600     reg ==  RSI_num || reg == RSI_H_num ||
1601     reg ==  RDX_num || reg == RDX_H_num ||
1602     reg ==  RCX_num || reg == RCX_H_num ||
1603     reg ==   R8_num || reg ==  R8_H_num ||
1604     reg ==   R9_num || reg ==  R9_H_num ||
1605     reg ==  R12_num || reg == R12_H_num ||
1606     reg == XMM0_num || reg == XMM0b_num ||
1607     reg == XMM1_num || reg == XMM1b_num ||
1608     reg == XMM2_num || reg == XMM2b_num ||
1609     reg == XMM3_num || reg == XMM3b_num ||
1610     reg == XMM4_num || reg == XMM4b_num ||
1611     reg == XMM5_num || reg == XMM5b_num ||
1612     reg == XMM6_num || reg == XMM6b_num ||
1613     reg == XMM7_num || reg == XMM7b_num;
1614 }
1615 
1616 bool Matcher::is_spillable_arg(int reg)
1617 {
1618   return can_be_java_arg(reg);
1619 }
1620 
1621 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1622   // In 64 bit mode a code which use multiply when
1623   // devisor is constant is faster than hardware
1624   // DIV instruction (it uses MulHiL).
1625   return false;
1626 }
1627 
1628 // Register for DIVI projection of divmodI
1629 RegMask Matcher::divI_proj_mask() {
1630   return INT_RAX_REG_mask();
1631 }
1632 
1633 // Register for MODI projection of divmodI
1634 RegMask Matcher::modI_proj_mask() {
1635   return INT_RDX_REG_mask();
1636 }
1637 
1638 // Register for DIVL projection of divmodL
1639 RegMask Matcher::divL_proj_mask() {
1640   return LONG_RAX_REG_mask();
1641 }
1642 
1643 // Register for MODL projection of divmodL
1644 RegMask Matcher::modL_proj_mask() {
1645   return LONG_RDX_REG_mask();
1646 }
1647 
1648 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1649   return PTR_RBP_REG_mask();
1650 }
1651 
1652 const RegMask Matcher::mathExactI_result_proj_mask() {
1653   return INT_RAX_REG_mask();
1654 }
1655 
1656 const RegMask Matcher::mathExactI_flags_proj_mask() {
1657   return INT_FLAGS_mask();
1658 }
1659 
1660 %}
1661 
1662 //----------ENCODING BLOCK-----------------------------------------------------
1663 // This block specifies the encoding classes used by the compiler to
1664 // output byte streams.  Encoding classes are parameterized macros
1665 // used by Machine Instruction Nodes in order to generate the bit
1666 // encoding of the instruction.  Operands specify their base encoding
1667 // interface with the interface keyword.  There are currently
1668 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1669 // COND_INTER.  REG_INTER causes an operand to generate a function
1670 // which returns its register number when queried.  CONST_INTER causes
1671 // an operand to generate a function which returns the value of the
1672 // constant when queried.  MEMORY_INTER causes an operand to generate
1673 // four functions which return the Base Register, the Index Register,
1674 // the Scale Value, and the Offset Value of the operand when queried.
1675 // COND_INTER causes an operand to generate six functions which return
1676 // the encoding code (ie - encoding bits for the instruction)
1677 // associated with each basic boolean condition for a conditional
1678 // instruction.
1679 //
1680 // Instructions specify two basic values for encoding.  Again, a
1681 // function is available to check if the constant displacement is an
1682 // oop. They use the ins_encode keyword to specify their encoding
1683 // classes (which must be a sequence of enc_class names, and their
1684 // parameters, specified in the encoding block), and they use the
1685 // opcode keyword to specify, in order, their primary, secondary, and
1686 // tertiary opcode.  Only the opcode sections which a particular
1687 // instruction needs for encoding need to be specified.
1688 encode %{
1689   // Build emit functions for each basic byte or larger field in the
1690   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1691   // from C++ code in the enc_class source block.  Emit functions will
1692   // live in the main source block for now.  In future, we can
1693   // generalize this by adding a syntax that specifies the sizes of
1694   // fields in an order, so that the adlc can build the emit functions
1695   // automagically
1696 
1697   // Emit primary opcode
1698   enc_class OpcP
1699   %{
1700     emit_opcode(cbuf, $primary);
1701   %}
1702 
1703   // Emit secondary opcode
1704   enc_class OpcS
1705   %{
1706     emit_opcode(cbuf, $secondary);
1707   %}
1708 
1709   // Emit tertiary opcode
1710   enc_class OpcT
1711   %{
1712     emit_opcode(cbuf, $tertiary);
1713   %}
1714 
1715   // Emit opcode directly
1716   enc_class Opcode(immI d8)
1717   %{
1718     emit_opcode(cbuf, $d8$$constant);
1719   %}
1720 
1721   // Emit size prefix
1722   enc_class SizePrefix
1723   %{
1724     emit_opcode(cbuf, 0x66);
1725   %}
1726 
1727   enc_class reg(rRegI reg)
1728   %{
1729     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1730   %}
1731 
1732   enc_class reg_reg(rRegI dst, rRegI src)
1733   %{
1734     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1735   %}
1736 
1737   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1738   %{
1739     emit_opcode(cbuf, $opcode$$constant);
1740     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1741   %}
1742 
1743   enc_class cdql_enc(no_rax_rdx_RegI div)
1744   %{
1745     // Full implementation of Java idiv and irem; checks for
1746     // special case as described in JVM spec., p.243 & p.271.
1747     //
1748     //         normal case                           special case
1749     //
1750     // input : rax: dividend                         min_int
1751     //         reg: divisor                          -1
1752     //
1753     // output: rax: quotient  (= rax idiv reg)       min_int
1754     //         rdx: remainder (= rax irem reg)       0
1755     //
1756     //  Code sequnce:
1757     //
1758     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1759     //    5:   75 07/08                jne    e <normal>
1760     //    7:   33 d2                   xor    %edx,%edx
1761     //  [div >= 8 -> offset + 1]
1762     //  [REX_B]
1763     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1764     //    c:   74 03/04                je     11 <done>
1765     // 000000000000000e <normal>:
1766     //    e:   99                      cltd
1767     //  [div >= 8 -> offset + 1]
1768     //  [REX_B]
1769     //    f:   f7 f9                   idiv   $div
1770     // 0000000000000011 <done>:
1771 
1772     // cmp    $0x80000000,%eax
1773     emit_opcode(cbuf, 0x3d);
1774     emit_d8(cbuf, 0x00);
1775     emit_d8(cbuf, 0x00);
1776     emit_d8(cbuf, 0x00);
1777     emit_d8(cbuf, 0x80);
1778 
1779     // jne    e <normal>
1780     emit_opcode(cbuf, 0x75);
1781     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1782 
1783     // xor    %edx,%edx
1784     emit_opcode(cbuf, 0x33);
1785     emit_d8(cbuf, 0xD2);
1786 
1787     // cmp    $0xffffffffffffffff,%ecx
1788     if ($div$$reg >= 8) {
1789       emit_opcode(cbuf, Assembler::REX_B);
1790     }
1791     emit_opcode(cbuf, 0x83);
1792     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1793     emit_d8(cbuf, 0xFF);
1794 
1795     // je     11 <done>
1796     emit_opcode(cbuf, 0x74);
1797     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1798 
1799     // <normal>
1800     // cltd
1801     emit_opcode(cbuf, 0x99);
1802 
1803     // idivl (note: must be emitted by the user of this rule)
1804     // <done>
1805   %}
1806 
1807   enc_class cdqq_enc(no_rax_rdx_RegL div)
1808   %{
1809     // Full implementation of Java ldiv and lrem; checks for
1810     // special case as described in JVM spec., p.243 & p.271.
1811     //
1812     //         normal case                           special case
1813     //
1814     // input : rax: dividend                         min_long
1815     //         reg: divisor                          -1
1816     //
1817     // output: rax: quotient  (= rax idiv reg)       min_long
1818     //         rdx: remainder (= rax irem reg)       0
1819     //
1820     //  Code sequnce:
1821     //
1822     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1823     //    7:   00 00 80
1824     //    a:   48 39 d0                cmp    %rdx,%rax
1825     //    d:   75 08                   jne    17 <normal>
1826     //    f:   33 d2                   xor    %edx,%edx
1827     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1828     //   15:   74 05                   je     1c <done>
1829     // 0000000000000017 <normal>:
1830     //   17:   48 99                   cqto
1831     //   19:   48 f7 f9                idiv   $div
1832     // 000000000000001c <done>:
1833 
1834     // mov    $0x8000000000000000,%rdx
1835     emit_opcode(cbuf, Assembler::REX_W);
1836     emit_opcode(cbuf, 0xBA);
1837     emit_d8(cbuf, 0x00);
1838     emit_d8(cbuf, 0x00);
1839     emit_d8(cbuf, 0x00);
1840     emit_d8(cbuf, 0x00);
1841     emit_d8(cbuf, 0x00);
1842     emit_d8(cbuf, 0x00);
1843     emit_d8(cbuf, 0x00);
1844     emit_d8(cbuf, 0x80);
1845 
1846     // cmp    %rdx,%rax
1847     emit_opcode(cbuf, Assembler::REX_W);
1848     emit_opcode(cbuf, 0x39);
1849     emit_d8(cbuf, 0xD0);
1850 
1851     // jne    17 <normal>
1852     emit_opcode(cbuf, 0x75);
1853     emit_d8(cbuf, 0x08);
1854 
1855     // xor    %edx,%edx
1856     emit_opcode(cbuf, 0x33);
1857     emit_d8(cbuf, 0xD2);
1858 
1859     // cmp    $0xffffffffffffffff,$div
1860     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1861     emit_opcode(cbuf, 0x83);
1862     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1863     emit_d8(cbuf, 0xFF);
1864 
1865     // je     1e <done>
1866     emit_opcode(cbuf, 0x74);
1867     emit_d8(cbuf, 0x05);
1868 
1869     // <normal>
1870     // cqto
1871     emit_opcode(cbuf, Assembler::REX_W);
1872     emit_opcode(cbuf, 0x99);
1873 
1874     // idivq (note: must be emitted by the user of this rule)
1875     // <done>
1876   %}
1877 
1878   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1879   enc_class OpcSE(immI imm)
1880   %{
1881     // Emit primary opcode and set sign-extend bit
1882     // Check for 8-bit immediate, and set sign extend bit in opcode
1883     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1884       emit_opcode(cbuf, $primary | 0x02);
1885     } else {
1886       // 32-bit immediate
1887       emit_opcode(cbuf, $primary);
1888     }
1889   %}
1890 
1891   enc_class OpcSErm(rRegI dst, immI imm)
1892   %{
1893     // OpcSEr/m
1894     int dstenc = $dst$$reg;
1895     if (dstenc >= 8) {
1896       emit_opcode(cbuf, Assembler::REX_B);
1897       dstenc -= 8;
1898     }
1899     // Emit primary opcode and set sign-extend bit
1900     // Check for 8-bit immediate, and set sign extend bit in opcode
1901     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1902       emit_opcode(cbuf, $primary | 0x02);
1903     } else {
1904       // 32-bit immediate
1905       emit_opcode(cbuf, $primary);
1906     }
1907     // Emit r/m byte with secondary opcode, after primary opcode.
1908     emit_rm(cbuf, 0x3, $secondary, dstenc);
1909   %}
1910 
1911   enc_class OpcSErm_wide(rRegL dst, immI imm)
1912   %{
1913     // OpcSEr/m
1914     int dstenc = $dst$$reg;
1915     if (dstenc < 8) {
1916       emit_opcode(cbuf, Assembler::REX_W);
1917     } else {
1918       emit_opcode(cbuf, Assembler::REX_WB);
1919       dstenc -= 8;
1920     }
1921     // Emit primary opcode and set sign-extend bit
1922     // Check for 8-bit immediate, and set sign extend bit in opcode
1923     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1924       emit_opcode(cbuf, $primary | 0x02);
1925     } else {
1926       // 32-bit immediate
1927       emit_opcode(cbuf, $primary);
1928     }
1929     // Emit r/m byte with secondary opcode, after primary opcode.
1930     emit_rm(cbuf, 0x3, $secondary, dstenc);
1931   %}
1932 
1933   enc_class Con8or32(immI imm)
1934   %{
1935     // Check for 8-bit immediate, and set sign extend bit in opcode
1936     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1937       $$$emit8$imm$$constant;
1938     } else {
1939       // 32-bit immediate
1940       $$$emit32$imm$$constant;
1941     }
1942   %}
1943 
1944   enc_class opc2_reg(rRegI dst)
1945   %{
1946     // BSWAP
1947     emit_cc(cbuf, $secondary, $dst$$reg);
1948   %}
1949 
1950   enc_class opc3_reg(rRegI dst)
1951   %{
1952     // BSWAP
1953     emit_cc(cbuf, $tertiary, $dst$$reg);
1954   %}
1955 
1956   enc_class reg_opc(rRegI div)
1957   %{
1958     // INC, DEC, IDIV, IMOD, JMP indirect, ...
1959     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
1960   %}
1961 
1962   enc_class enc_cmov(cmpOp cop)
1963   %{
1964     // CMOV
1965     $$$emit8$primary;
1966     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1967   %}
1968 
1969   enc_class enc_PartialSubtypeCheck()
1970   %{
1971     Register Rrdi = as_Register(RDI_enc); // result register
1972     Register Rrax = as_Register(RAX_enc); // super class
1973     Register Rrcx = as_Register(RCX_enc); // killed
1974     Register Rrsi = as_Register(RSI_enc); // sub class
1975     Label miss;
1976     const bool set_cond_codes = true;
1977 
1978     MacroAssembler _masm(&cbuf);
1979     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1980                                      NULL, &miss,
1981                                      /*set_cond_codes:*/ true);
1982     if ($primary) {
1983       __ xorptr(Rrdi, Rrdi);
1984     }
1985     __ bind(miss);
1986   %}
1987 
1988   enc_class clear_avx %{
1989     debug_only(int off0 = cbuf.insts_size());
1990     if (ra_->C->max_vector_size() > 16) {
1991       // Clear upper bits of YMM registers when current compiled code uses
1992       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1993       MacroAssembler _masm(&cbuf);
1994       __ vzeroupper();
1995     }
1996     debug_only(int off1 = cbuf.insts_size());
1997     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1998   %}
1999 
2000   enc_class Java_To_Runtime(method meth) %{
2001     // No relocation needed
2002     MacroAssembler _masm(&cbuf);
2003     __ mov64(r10, (int64_t) $meth$$method);
2004     __ call(r10);
2005   %}
2006 
2007   enc_class Java_To_Interpreter(method meth)
2008   %{
2009     // CALL Java_To_Interpreter
2010     // This is the instruction starting address for relocation info.
2011     cbuf.set_insts_mark();
2012     $$$emit8$primary;
2013     // CALL directly to the runtime
2014     emit_d32_reloc(cbuf,
2015                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2016                    runtime_call_Relocation::spec(),
2017                    RELOC_DISP32);
2018   %}
2019 
2020   enc_class Java_Static_Call(method meth)
2021   %{
2022     // JAVA STATIC CALL
2023     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2024     // determine who we intended to call.
2025     cbuf.set_insts_mark();
2026     $$$emit8$primary;
2027 
2028     if (!_method) {
2029       emit_d32_reloc(cbuf,
2030                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2031                      runtime_call_Relocation::spec(),
2032                      RELOC_DISP32);
2033     } else if (_optimized_virtual) {
2034       emit_d32_reloc(cbuf,
2035                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2036                      opt_virtual_call_Relocation::spec(),
2037                      RELOC_DISP32);
2038     } else {
2039       emit_d32_reloc(cbuf,
2040                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2041                      static_call_Relocation::spec(),
2042                      RELOC_DISP32);
2043     }
2044     if (_method) {
2045       // Emit stub for static call.
2046       CompiledStaticCall::emit_to_interp_stub(cbuf);
2047     }
2048   %}
2049 
2050   enc_class Java_Dynamic_Call(method meth) %{
2051     MacroAssembler _masm(&cbuf);
2052     __ ic_call((address)$meth$$method);
2053   %}
2054 
2055   enc_class Java_Compiled_Call(method meth)
2056   %{
2057     // JAVA COMPILED CALL
2058     int disp = in_bytes(Method:: from_compiled_offset());
2059 
2060     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2061     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2062 
2063     // callq *disp(%rax)
2064     cbuf.set_insts_mark();
2065     $$$emit8$primary;
2066     if (disp < 0x80) {
2067       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2068       emit_d8(cbuf, disp); // Displacement
2069     } else {
2070       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2071       emit_d32(cbuf, disp); // Displacement
2072     }
2073   %}
2074 
2075   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2076   %{
2077     // SAL, SAR, SHR
2078     int dstenc = $dst$$reg;
2079     if (dstenc >= 8) {
2080       emit_opcode(cbuf, Assembler::REX_B);
2081       dstenc -= 8;
2082     }
2083     $$$emit8$primary;
2084     emit_rm(cbuf, 0x3, $secondary, dstenc);
2085     $$$emit8$shift$$constant;
2086   %}
2087 
2088   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2089   %{
2090     // SAL, SAR, SHR
2091     int dstenc = $dst$$reg;
2092     if (dstenc < 8) {
2093       emit_opcode(cbuf, Assembler::REX_W);
2094     } else {
2095       emit_opcode(cbuf, Assembler::REX_WB);
2096       dstenc -= 8;
2097     }
2098     $$$emit8$primary;
2099     emit_rm(cbuf, 0x3, $secondary, dstenc);
2100     $$$emit8$shift$$constant;
2101   %}
2102 
2103   enc_class load_immI(rRegI dst, immI src)
2104   %{
2105     int dstenc = $dst$$reg;
2106     if (dstenc >= 8) {
2107       emit_opcode(cbuf, Assembler::REX_B);
2108       dstenc -= 8;
2109     }
2110     emit_opcode(cbuf, 0xB8 | dstenc);
2111     $$$emit32$src$$constant;
2112   %}
2113 
2114   enc_class load_immL(rRegL dst, immL src)
2115   %{
2116     int dstenc = $dst$$reg;
2117     if (dstenc < 8) {
2118       emit_opcode(cbuf, Assembler::REX_W);
2119     } else {
2120       emit_opcode(cbuf, Assembler::REX_WB);
2121       dstenc -= 8;
2122     }
2123     emit_opcode(cbuf, 0xB8 | dstenc);
2124     emit_d64(cbuf, $src$$constant);
2125   %}
2126 
2127   enc_class load_immUL32(rRegL dst, immUL32 src)
2128   %{
2129     // same as load_immI, but this time we care about zeroes in the high word
2130     int dstenc = $dst$$reg;
2131     if (dstenc >= 8) {
2132       emit_opcode(cbuf, Assembler::REX_B);
2133       dstenc -= 8;
2134     }
2135     emit_opcode(cbuf, 0xB8 | dstenc);
2136     $$$emit32$src$$constant;
2137   %}
2138 
2139   enc_class load_immL32(rRegL dst, immL32 src)
2140   %{
2141     int dstenc = $dst$$reg;
2142     if (dstenc < 8) {
2143       emit_opcode(cbuf, Assembler::REX_W);
2144     } else {
2145       emit_opcode(cbuf, Assembler::REX_WB);
2146       dstenc -= 8;
2147     }
2148     emit_opcode(cbuf, 0xC7);
2149     emit_rm(cbuf, 0x03, 0x00, dstenc);
2150     $$$emit32$src$$constant;
2151   %}
2152 
2153   enc_class load_immP31(rRegP dst, immP32 src)
2154   %{
2155     // same as load_immI, but this time we care about zeroes in the high word
2156     int dstenc = $dst$$reg;
2157     if (dstenc >= 8) {
2158       emit_opcode(cbuf, Assembler::REX_B);
2159       dstenc -= 8;
2160     }
2161     emit_opcode(cbuf, 0xB8 | dstenc);
2162     $$$emit32$src$$constant;
2163   %}
2164 
2165   enc_class load_immP(rRegP dst, immP src)
2166   %{
2167     int dstenc = $dst$$reg;
2168     if (dstenc < 8) {
2169       emit_opcode(cbuf, Assembler::REX_W);
2170     } else {
2171       emit_opcode(cbuf, Assembler::REX_WB);
2172       dstenc -= 8;
2173     }
2174     emit_opcode(cbuf, 0xB8 | dstenc);
2175     // This next line should be generated from ADLC
2176     if ($src->constant_reloc() != relocInfo::none) {
2177       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2178     } else {
2179       emit_d64(cbuf, $src$$constant);
2180     }
2181   %}
2182 
2183   enc_class Con32(immI src)
2184   %{
2185     // Output immediate
2186     $$$emit32$src$$constant;
2187   %}
2188 
2189   enc_class Con32F_as_bits(immF src)
2190   %{
2191     // Output Float immediate bits
2192     jfloat jf = $src$$constant;
2193     jint jf_as_bits = jint_cast(jf);
2194     emit_d32(cbuf, jf_as_bits);
2195   %}
2196 
2197   enc_class Con16(immI src)
2198   %{
2199     // Output immediate
2200     $$$emit16$src$$constant;
2201   %}
2202 
2203   // How is this different from Con32??? XXX
2204   enc_class Con_d32(immI src)
2205   %{
2206     emit_d32(cbuf,$src$$constant);
2207   %}
2208 
2209   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2210     // Output immediate memory reference
2211     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2212     emit_d32(cbuf, 0x00);
2213   %}
2214 
2215   enc_class lock_prefix()
2216   %{
2217     if (os::is_MP()) {
2218       emit_opcode(cbuf, 0xF0); // lock
2219     }
2220   %}
2221 
2222   enc_class REX_mem(memory mem)
2223   %{
2224     if ($mem$$base >= 8) {
2225       if ($mem$$index < 8) {
2226         emit_opcode(cbuf, Assembler::REX_B);
2227       } else {
2228         emit_opcode(cbuf, Assembler::REX_XB);
2229       }
2230     } else {
2231       if ($mem$$index >= 8) {
2232         emit_opcode(cbuf, Assembler::REX_X);
2233       }
2234     }
2235   %}
2236 
2237   enc_class REX_mem_wide(memory mem)
2238   %{
2239     if ($mem$$base >= 8) {
2240       if ($mem$$index < 8) {
2241         emit_opcode(cbuf, Assembler::REX_WB);
2242       } else {
2243         emit_opcode(cbuf, Assembler::REX_WXB);
2244       }
2245     } else {
2246       if ($mem$$index < 8) {
2247         emit_opcode(cbuf, Assembler::REX_W);
2248       } else {
2249         emit_opcode(cbuf, Assembler::REX_WX);
2250       }
2251     }
2252   %}
2253 
2254   // for byte regs
2255   enc_class REX_breg(rRegI reg)
2256   %{
2257     if ($reg$$reg >= 4) {
2258       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2259     }
2260   %}
2261 
2262   // for byte regs
2263   enc_class REX_reg_breg(rRegI dst, rRegI src)
2264   %{
2265     if ($dst$$reg < 8) {
2266       if ($src$$reg >= 4) {
2267         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2268       }
2269     } else {
2270       if ($src$$reg < 8) {
2271         emit_opcode(cbuf, Assembler::REX_R);
2272       } else {
2273         emit_opcode(cbuf, Assembler::REX_RB);
2274       }
2275     }
2276   %}
2277 
2278   // for byte regs
2279   enc_class REX_breg_mem(rRegI reg, memory mem)
2280   %{
2281     if ($reg$$reg < 8) {
2282       if ($mem$$base < 8) {
2283         if ($mem$$index >= 8) {
2284           emit_opcode(cbuf, Assembler::REX_X);
2285         } else if ($reg$$reg >= 4) {
2286           emit_opcode(cbuf, Assembler::REX);
2287         }
2288       } else {
2289         if ($mem$$index < 8) {
2290           emit_opcode(cbuf, Assembler::REX_B);
2291         } else {
2292           emit_opcode(cbuf, Assembler::REX_XB);
2293         }
2294       }
2295     } else {
2296       if ($mem$$base < 8) {
2297         if ($mem$$index < 8) {
2298           emit_opcode(cbuf, Assembler::REX_R);
2299         } else {
2300           emit_opcode(cbuf, Assembler::REX_RX);
2301         }
2302       } else {
2303         if ($mem$$index < 8) {
2304           emit_opcode(cbuf, Assembler::REX_RB);
2305         } else {
2306           emit_opcode(cbuf, Assembler::REX_RXB);
2307         }
2308       }
2309     }
2310   %}
2311 
2312   enc_class REX_reg(rRegI reg)
2313   %{
2314     if ($reg$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317   %}
2318 
2319   enc_class REX_reg_wide(rRegI reg)
2320   %{
2321     if ($reg$$reg < 8) {
2322       emit_opcode(cbuf, Assembler::REX_W);
2323     } else {
2324       emit_opcode(cbuf, Assembler::REX_WB);
2325     }
2326   %}
2327 
2328   enc_class REX_reg_reg(rRegI dst, rRegI src)
2329   %{
2330     if ($dst$$reg < 8) {
2331       if ($src$$reg >= 8) {
2332         emit_opcode(cbuf, Assembler::REX_B);
2333       }
2334     } else {
2335       if ($src$$reg < 8) {
2336         emit_opcode(cbuf, Assembler::REX_R);
2337       } else {
2338         emit_opcode(cbuf, Assembler::REX_RB);
2339       }
2340     }
2341   %}
2342 
2343   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2344   %{
2345     if ($dst$$reg < 8) {
2346       if ($src$$reg < 8) {
2347         emit_opcode(cbuf, Assembler::REX_W);
2348       } else {
2349         emit_opcode(cbuf, Assembler::REX_WB);
2350       }
2351     } else {
2352       if ($src$$reg < 8) {
2353         emit_opcode(cbuf, Assembler::REX_WR);
2354       } else {
2355         emit_opcode(cbuf, Assembler::REX_WRB);
2356       }
2357     }
2358   %}
2359 
2360   enc_class REX_reg_mem(rRegI reg, memory mem)
2361   %{
2362     if ($reg$$reg < 8) {
2363       if ($mem$$base < 8) {
2364         if ($mem$$index >= 8) {
2365           emit_opcode(cbuf, Assembler::REX_X);
2366         }
2367       } else {
2368         if ($mem$$index < 8) {
2369           emit_opcode(cbuf, Assembler::REX_B);
2370         } else {
2371           emit_opcode(cbuf, Assembler::REX_XB);
2372         }
2373       }
2374     } else {
2375       if ($mem$$base < 8) {
2376         if ($mem$$index < 8) {
2377           emit_opcode(cbuf, Assembler::REX_R);
2378         } else {
2379           emit_opcode(cbuf, Assembler::REX_RX);
2380         }
2381       } else {
2382         if ($mem$$index < 8) {
2383           emit_opcode(cbuf, Assembler::REX_RB);
2384         } else {
2385           emit_opcode(cbuf, Assembler::REX_RXB);
2386         }
2387       }
2388     }
2389   %}
2390 
2391   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2392   %{
2393     if ($reg$$reg < 8) {
2394       if ($mem$$base < 8) {
2395         if ($mem$$index < 8) {
2396           emit_opcode(cbuf, Assembler::REX_W);
2397         } else {
2398           emit_opcode(cbuf, Assembler::REX_WX);
2399         }
2400       } else {
2401         if ($mem$$index < 8) {
2402           emit_opcode(cbuf, Assembler::REX_WB);
2403         } else {
2404           emit_opcode(cbuf, Assembler::REX_WXB);
2405         }
2406       }
2407     } else {
2408       if ($mem$$base < 8) {
2409         if ($mem$$index < 8) {
2410           emit_opcode(cbuf, Assembler::REX_WR);
2411         } else {
2412           emit_opcode(cbuf, Assembler::REX_WRX);
2413         }
2414       } else {
2415         if ($mem$$index < 8) {
2416           emit_opcode(cbuf, Assembler::REX_WRB);
2417         } else {
2418           emit_opcode(cbuf, Assembler::REX_WRXB);
2419         }
2420       }
2421     }
2422   %}
2423 
2424   enc_class reg_mem(rRegI ereg, memory mem)
2425   %{
2426     // High registers handle in encode_RegMem
2427     int reg = $ereg$$reg;
2428     int base = $mem$$base;
2429     int index = $mem$$index;
2430     int scale = $mem$$scale;
2431     int disp = $mem$$disp;
2432     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2433 
2434     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2435   %}
2436 
2437   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2438   %{
2439     int rm_byte_opcode = $rm_opcode$$constant;
2440 
2441     // High registers handle in encode_RegMem
2442     int base = $mem$$base;
2443     int index = $mem$$index;
2444     int scale = $mem$$scale;
2445     int displace = $mem$$disp;
2446 
2447     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2448                                             // working with static
2449                                             // globals
2450     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2451                   disp_reloc);
2452   %}
2453 
2454   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2455   %{
2456     int reg_encoding = $dst$$reg;
2457     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2458     int index        = 0x04;            // 0x04 indicates no index
2459     int scale        = 0x00;            // 0x00 indicates no scale
2460     int displace     = $src1$$constant; // 0x00 indicates no displacement
2461     relocInfo::relocType disp_reloc = relocInfo::none;
2462     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2463                   disp_reloc);
2464   %}
2465 
2466   enc_class neg_reg(rRegI dst)
2467   %{
2468     int dstenc = $dst$$reg;
2469     if (dstenc >= 8) {
2470       emit_opcode(cbuf, Assembler::REX_B);
2471       dstenc -= 8;
2472     }
2473     // NEG $dst
2474     emit_opcode(cbuf, 0xF7);
2475     emit_rm(cbuf, 0x3, 0x03, dstenc);
2476   %}
2477 
2478   enc_class neg_reg_wide(rRegI dst)
2479   %{
2480     int dstenc = $dst$$reg;
2481     if (dstenc < 8) {
2482       emit_opcode(cbuf, Assembler::REX_W);
2483     } else {
2484       emit_opcode(cbuf, Assembler::REX_WB);
2485       dstenc -= 8;
2486     }
2487     // NEG $dst
2488     emit_opcode(cbuf, 0xF7);
2489     emit_rm(cbuf, 0x3, 0x03, dstenc);
2490   %}
2491 
2492   enc_class setLT_reg(rRegI dst)
2493   %{
2494     int dstenc = $dst$$reg;
2495     if (dstenc >= 8) {
2496       emit_opcode(cbuf, Assembler::REX_B);
2497       dstenc -= 8;
2498     } else if (dstenc >= 4) {
2499       emit_opcode(cbuf, Assembler::REX);
2500     }
2501     // SETLT $dst
2502     emit_opcode(cbuf, 0x0F);
2503     emit_opcode(cbuf, 0x9C);
2504     emit_rm(cbuf, 0x3, 0x0, dstenc);
2505   %}
2506 
2507   enc_class setNZ_reg(rRegI dst)
2508   %{
2509     int dstenc = $dst$$reg;
2510     if (dstenc >= 8) {
2511       emit_opcode(cbuf, Assembler::REX_B);
2512       dstenc -= 8;
2513     } else if (dstenc >= 4) {
2514       emit_opcode(cbuf, Assembler::REX);
2515     }
2516     // SETNZ $dst
2517     emit_opcode(cbuf, 0x0F);
2518     emit_opcode(cbuf, 0x95);
2519     emit_rm(cbuf, 0x3, 0x0, dstenc);
2520   %}
2521 
2522 
2523   // Compare the lonogs and set -1, 0, or 1 into dst
2524   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2525   %{
2526     int src1enc = $src1$$reg;
2527     int src2enc = $src2$$reg;
2528     int dstenc = $dst$$reg;
2529 
2530     // cmpq $src1, $src2
2531     if (src1enc < 8) {
2532       if (src2enc < 8) {
2533         emit_opcode(cbuf, Assembler::REX_W);
2534       } else {
2535         emit_opcode(cbuf, Assembler::REX_WB);
2536       }
2537     } else {
2538       if (src2enc < 8) {
2539         emit_opcode(cbuf, Assembler::REX_WR);
2540       } else {
2541         emit_opcode(cbuf, Assembler::REX_WRB);
2542       }
2543     }
2544     emit_opcode(cbuf, 0x3B);
2545     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2546 
2547     // movl $dst, -1
2548     if (dstenc >= 8) {
2549       emit_opcode(cbuf, Assembler::REX_B);
2550     }
2551     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2552     emit_d32(cbuf, -1);
2553 
2554     // jl,s done
2555     emit_opcode(cbuf, 0x7C);
2556     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2557 
2558     // setne $dst
2559     if (dstenc >= 4) {
2560       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2561     }
2562     emit_opcode(cbuf, 0x0F);
2563     emit_opcode(cbuf, 0x95);
2564     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2565 
2566     // movzbl $dst, $dst
2567     if (dstenc >= 4) {
2568       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2569     }
2570     emit_opcode(cbuf, 0x0F);
2571     emit_opcode(cbuf, 0xB6);
2572     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2573   %}
2574 
2575   enc_class Push_ResultXD(regD dst) %{
2576     MacroAssembler _masm(&cbuf);
2577     __ fstp_d(Address(rsp, 0));
2578     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2579     __ addptr(rsp, 8);
2580   %}
2581 
2582   enc_class Push_SrcXD(regD src) %{
2583     MacroAssembler _masm(&cbuf);
2584     __ subptr(rsp, 8);
2585     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2586     __ fld_d(Address(rsp, 0));
2587   %}
2588 
2589 
2590   // obj: object to lock
2591   // box: box address (header location) -- killed
2592   // tmp: rax -- killed
2593   // scr: rbx -- killed
2594   //
2595   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2596   // from i486.ad.  See that file for comments.
2597   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2598   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2599 
2600 
2601   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2602   %{
2603     Register objReg = as_Register((int)$obj$$reg);
2604     Register boxReg = as_Register((int)$box$$reg);
2605     Register tmpReg = as_Register($tmp$$reg);
2606     Register scrReg = as_Register($scr$$reg);
2607     MacroAssembler masm(&cbuf);
2608 
2609     // Verify uniqueness of register assignments -- necessary but not sufficient
2610     assert (objReg != boxReg && objReg != tmpReg &&
2611             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2612 
2613     if (_counters != NULL) {
2614       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2615     }
2616     if (EmitSync & 1) {
2617         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2618         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2619         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2620     } else
2621     if (EmitSync & 2) {
2622         Label DONE_LABEL;
2623         if (UseBiasedLocking) {
2624            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2625           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2626         }
2627         // QQQ was movl...
2628         masm.movptr(tmpReg, 0x1);
2629         masm.orptr(tmpReg, Address(objReg, 0));
2630         masm.movptr(Address(boxReg, 0), tmpReg);
2631         if (os::is_MP()) {
2632           masm.lock();
2633         }
2634         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2635         masm.jcc(Assembler::equal, DONE_LABEL);
2636 
2637         // Recursive locking
2638         masm.subptr(tmpReg, rsp);
2639         masm.andptr(tmpReg, 7 - os::vm_page_size());
2640         masm.movptr(Address(boxReg, 0), tmpReg);
2641 
2642         masm.bind(DONE_LABEL);
2643         masm.nop(); // avoid branch to branch
2644     } else {
2645         Label DONE_LABEL, IsInflated, Egress;
2646 
2647         masm.movptr(tmpReg, Address(objReg, 0)) ;
2648         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2649         masm.jcc   (Assembler::notZero, IsInflated) ;
2650 
2651         // it's stack-locked, biased or neutral
2652         // TODO: optimize markword triage order to reduce the number of
2653         // conditional branches in the most common cases.
2654         // Beware -- there's a subtle invariant that fetch of the markword
2655         // at [FETCH], below, will never observe a biased encoding (*101b).
2656         // If this invariant is not held we'll suffer exclusion (safety) failure.
2657 
2658         if (UseBiasedLocking && !UseOptoBiasInlining) {
2659           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2660           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2661         }
2662 
2663         // was q will it destroy high?
2664         masm.orl   (tmpReg, 1) ;
2665         masm.movptr(Address(boxReg, 0), tmpReg) ;
2666         if (os::is_MP()) { masm.lock(); }
2667         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2668         if (_counters != NULL) {
2669            masm.cond_inc32(Assembler::equal,
2670                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2671         }
2672         masm.jcc   (Assembler::equal, DONE_LABEL);
2673 
2674         // Recursive locking
2675         masm.subptr(tmpReg, rsp);
2676         masm.andptr(tmpReg, 7 - os::vm_page_size());
2677         masm.movptr(Address(boxReg, 0), tmpReg);
2678         if (_counters != NULL) {
2679            masm.cond_inc32(Assembler::equal,
2680                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2681         }
2682         masm.jmp   (DONE_LABEL) ;
2683 
2684         masm.bind  (IsInflated) ;
2685         // It's inflated
2686 
2687         // TODO: someday avoid the ST-before-CAS penalty by
2688         // relocating (deferring) the following ST.
2689         // We should also think about trying a CAS without having
2690         // fetched _owner.  If the CAS is successful we may
2691         // avoid an RTO->RTS upgrade on the $line.
2692         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2693         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2694 
2695         masm.mov    (boxReg, tmpReg) ;
2696         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2697         masm.testptr(tmpReg, tmpReg) ;
2698         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2699 
2700         // It's inflated and appears unlocked
2701         if (os::is_MP()) { masm.lock(); }
2702         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2703         // Intentional fall-through into DONE_LABEL ...
2704 
2705         masm.bind  (DONE_LABEL) ;
2706         masm.nop   () ;                 // avoid jmp to jmp
2707     }
2708   %}
2709 
2710   // obj: object to unlock
2711   // box: box address (displaced header location), killed
2712   // RBX: killed tmp; cannot be obj nor box
2713   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2714   %{
2715 
2716     Register objReg = as_Register($obj$$reg);
2717     Register boxReg = as_Register($box$$reg);
2718     Register tmpReg = as_Register($tmp$$reg);
2719     MacroAssembler masm(&cbuf);
2720 
2721     if (EmitSync & 4) {
2722        masm.cmpptr(rsp, 0) ;
2723     } else
2724     if (EmitSync & 8) {
2725        Label DONE_LABEL;
2726        if (UseBiasedLocking) {
2727          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2728        }
2729 
2730        // Check whether the displaced header is 0
2731        //(=> recursive unlock)
2732        masm.movptr(tmpReg, Address(boxReg, 0));
2733        masm.testptr(tmpReg, tmpReg);
2734        masm.jcc(Assembler::zero, DONE_LABEL);
2735 
2736        // If not recursive lock, reset the header to displaced header
2737        if (os::is_MP()) {
2738          masm.lock();
2739        }
2740        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2741        masm.bind(DONE_LABEL);
2742        masm.nop(); // avoid branch to branch
2743     } else {
2744        Label DONE_LABEL, Stacked, CheckSucc ;
2745 
2746        if (UseBiasedLocking && !UseOptoBiasInlining) {
2747          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2748        }
2749 
2750        masm.movptr(tmpReg, Address(objReg, 0)) ;
2751        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
2752        masm.jcc   (Assembler::zero, DONE_LABEL) ;
2753        masm.testl (tmpReg, 0x02) ;
2754        masm.jcc   (Assembler::zero, Stacked) ;
2755 
2756        // It's inflated
2757        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2758        masm.xorptr(boxReg, r15_thread) ;
2759        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2760        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
2761        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2762        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2763        masm.jcc   (Assembler::notZero, CheckSucc) ;
2764        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2765        masm.jmp   (DONE_LABEL) ;
2766 
2767        if ((EmitSync & 65536) == 0) {
2768          Label LSuccess, LGoSlowPath ;
2769          masm.bind  (CheckSucc) ;
2770          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2771          masm.jcc   (Assembler::zero, LGoSlowPath) ;
2772 
2773          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
2774          // the explicit ST;MEMBAR combination, but masm doesn't currently support
2775          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
2776          // are all faster when the write buffer is populated.
2777          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2778          if (os::is_MP()) {
2779             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
2780          }
2781          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2782          masm.jcc   (Assembler::notZero, LSuccess) ;
2783 
2784          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
2785          if (os::is_MP()) { masm.lock(); }
2786          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
2787          masm.jcc   (Assembler::notEqual, LSuccess) ;
2788          // Intentional fall-through into slow-path
2789 
2790          masm.bind  (LGoSlowPath) ;
2791          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
2792          masm.jmp   (DONE_LABEL) ;
2793 
2794          masm.bind  (LSuccess) ;
2795          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
2796          masm.jmp   (DONE_LABEL) ;
2797        }
2798 
2799        masm.bind  (Stacked) ;
2800        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
2801        if (os::is_MP()) { masm.lock(); }
2802        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2803 
2804        if (EmitSync & 65536) {
2805           masm.bind (CheckSucc) ;
2806        }
2807        masm.bind(DONE_LABEL);
2808        if (EmitSync & 32768) {
2809           masm.nop();                      // avoid branch to branch
2810        }
2811     }
2812   %}
2813 
2814 
2815   enc_class enc_rethrow()
2816   %{
2817     cbuf.set_insts_mark();
2818     emit_opcode(cbuf, 0xE9); // jmp entry
2819     emit_d32_reloc(cbuf,
2820                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2821                    runtime_call_Relocation::spec(),
2822                    RELOC_DISP32);
2823   %}
2824 
2825 %}
2826 
2827 
2828 
2829 //----------FRAME--------------------------------------------------------------
2830 // Definition of frame structure and management information.
2831 //
2832 //  S T A C K   L A Y O U T    Allocators stack-slot number
2833 //                             |   (to get allocators register number
2834 //  G  Owned by    |        |  v    add OptoReg::stack0())
2835 //  r   CALLER     |        |
2836 //  o     |        +--------+      pad to even-align allocators stack-slot
2837 //  w     V        |  pad0  |        numbers; owned by CALLER
2838 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2839 //  h     ^        |   in   |  5
2840 //        |        |  args  |  4   Holes in incoming args owned by SELF
2841 //  |     |        |        |  3
2842 //  |     |        +--------+
2843 //  V     |        | old out|      Empty on Intel, window on Sparc
2844 //        |    old |preserve|      Must be even aligned.
2845 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2846 //        |        |   in   |  3   area for Intel ret address
2847 //     Owned by    |preserve|      Empty on Sparc.
2848 //       SELF      +--------+
2849 //        |        |  pad2  |  2   pad to align old SP
2850 //        |        +--------+  1
2851 //        |        | locks  |  0
2852 //        |        +--------+----> OptoReg::stack0(), even aligned
2853 //        |        |  pad1  | 11   pad to align new SP
2854 //        |        +--------+
2855 //        |        |        | 10
2856 //        |        | spills |  9   spills
2857 //        V        |        |  8   (pad0 slot for callee)
2858 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2859 //        ^        |  out   |  7
2860 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2861 //     Owned by    +--------+
2862 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2863 //        |    new |preserve|      Must be even-aligned.
2864 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2865 //        |        |        |
2866 //
2867 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2868 //         known from SELF's arguments and the Java calling convention.
2869 //         Region 6-7 is determined per call site.
2870 // Note 2: If the calling convention leaves holes in the incoming argument
2871 //         area, those holes are owned by SELF.  Holes in the outgoing area
2872 //         are owned by the CALLEE.  Holes should not be nessecary in the
2873 //         incoming area, as the Java calling convention is completely under
2874 //         the control of the AD file.  Doubles can be sorted and packed to
2875 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2876 //         varargs C calling conventions.
2877 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2878 //         even aligned with pad0 as needed.
2879 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2880 //         region 6-11 is even aligned; it may be padded out more so that
2881 //         the region from SP to FP meets the minimum stack alignment.
2882 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2883 //         alignment.  Region 11, pad1, may be dynamically extended so that
2884 //         SP meets the minimum alignment.
2885 
2886 frame
2887 %{
2888   // What direction does stack grow in (assumed to be same for C & Java)
2889   stack_direction(TOWARDS_LOW);
2890 
2891   // These three registers define part of the calling convention
2892   // between compiled code and the interpreter.
2893   inline_cache_reg(RAX);                // Inline Cache Register
2894   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2895                                         // calling interpreter
2896 
2897   // Optional: name the operand used by cisc-spilling to access
2898   // [stack_pointer + offset]
2899   cisc_spilling_operand_name(indOffset32);
2900 
2901   // Number of stack slots consumed by locking an object
2902   sync_stack_slots(2);
2903 
2904   // Compiled code's Frame Pointer
2905   frame_pointer(RSP);
2906 
2907   // Interpreter stores its frame pointer in a register which is
2908   // stored to the stack by I2CAdaptors.
2909   // I2CAdaptors convert from interpreted java to compiled java.
2910   interpreter_frame_pointer(RBP);
2911 
2912   // Stack alignment requirement
2913   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2914 
2915   // Number of stack slots between incoming argument block and the start of
2916   // a new frame.  The PROLOG must add this many slots to the stack.  The
2917   // EPILOG must remove this many slots.  amd64 needs two slots for
2918   // return address.
2919   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2920 
2921   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2922   // for calls to C.  Supports the var-args backing area for register parms.
2923   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2924 
2925   // The after-PROLOG location of the return address.  Location of
2926   // return address specifies a type (REG or STACK) and a number
2927   // representing the register number (i.e. - use a register name) or
2928   // stack slot.
2929   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2930   // Otherwise, it is above the locks and verification slot and alignment word
2931   return_addr(STACK - 2 +
2932               round_to((Compile::current()->in_preserve_stack_slots() +
2933                         Compile::current()->fixed_slots()),
2934                        stack_alignment_in_slots()));
2935 
2936   // Body of function which returns an integer array locating
2937   // arguments either in registers or in stack slots.  Passed an array
2938   // of ideal registers called "sig" and a "length" count.  Stack-slot
2939   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2940   // arguments for a CALLEE.  Incoming stack arguments are
2941   // automatically biased by the preserve_stack_slots field above.
2942 
2943   calling_convention
2944   %{
2945     // No difference between ingoing/outgoing just pass false
2946     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2947   %}
2948 
2949   c_calling_convention
2950   %{
2951     // This is obviously always outgoing
2952     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
2953   %}
2954 
2955   // Location of compiled Java return values.  Same as C for now.
2956   return_value
2957   %{
2958     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2959            "only return normal values");
2960 
2961     static const int lo[Op_RegL + 1] = {
2962       0,
2963       0,
2964       RAX_num,  // Op_RegN
2965       RAX_num,  // Op_RegI
2966       RAX_num,  // Op_RegP
2967       XMM0_num, // Op_RegF
2968       XMM0_num, // Op_RegD
2969       RAX_num   // Op_RegL
2970     };
2971     static const int hi[Op_RegL + 1] = {
2972       0,
2973       0,
2974       OptoReg::Bad, // Op_RegN
2975       OptoReg::Bad, // Op_RegI
2976       RAX_H_num,    // Op_RegP
2977       OptoReg::Bad, // Op_RegF
2978       XMM0b_num,    // Op_RegD
2979       RAX_H_num     // Op_RegL
2980     };
2981     // Excluded flags and vector registers.
2982     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
2983     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2984   %}
2985 %}
2986 
2987 //----------ATTRIBUTES---------------------------------------------------------
2988 //----------Operand Attributes-------------------------------------------------
2989 op_attrib op_cost(0);        // Required cost attribute
2990 
2991 //----------Instruction Attributes---------------------------------------------
2992 ins_attrib ins_cost(100);       // Required cost attribute
2993 ins_attrib ins_size(8);         // Required size attribute (in bits)
2994 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2995                                 // a non-matching short branch variant
2996                                 // of some long branch?
2997 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2998                                 // be a power of 2) specifies the
2999                                 // alignment that some part of the
3000                                 // instruction (not necessarily the
3001                                 // start) requires.  If > 1, a
3002                                 // compute_padding() function must be
3003                                 // provided for the instruction
3004 
3005 //----------OPERANDS-----------------------------------------------------------
3006 // Operand definitions must precede instruction definitions for correct parsing
3007 // in the ADLC because operands constitute user defined types which are used in
3008 // instruction definitions.
3009 
3010 //----------Simple Operands----------------------------------------------------
3011 // Immediate Operands
3012 // Integer Immediate
3013 operand immI()
3014 %{
3015   match(ConI);
3016 
3017   op_cost(10);
3018   format %{ %}
3019   interface(CONST_INTER);
3020 %}
3021 
3022 // Constant for test vs zero
3023 operand immI0()
3024 %{
3025   predicate(n->get_int() == 0);
3026   match(ConI);
3027 
3028   op_cost(0);
3029   format %{ %}
3030   interface(CONST_INTER);
3031 %}
3032 
3033 // Constant for increment
3034 operand immI1()
3035 %{
3036   predicate(n->get_int() == 1);
3037   match(ConI);
3038 
3039   op_cost(0);
3040   format %{ %}
3041   interface(CONST_INTER);
3042 %}
3043 
3044 // Constant for decrement
3045 operand immI_M1()
3046 %{
3047   predicate(n->get_int() == -1);
3048   match(ConI);
3049 
3050   op_cost(0);
3051   format %{ %}
3052   interface(CONST_INTER);
3053 %}
3054 
3055 // Valid scale values for addressing modes
3056 operand immI2()
3057 %{
3058   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3059   match(ConI);
3060 
3061   format %{ %}
3062   interface(CONST_INTER);
3063 %}
3064 
3065 operand immI8()
3066 %{
3067   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3068   match(ConI);
3069 
3070   op_cost(5);
3071   format %{ %}
3072   interface(CONST_INTER);
3073 %}
3074 
3075 operand immI16()
3076 %{
3077   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3078   match(ConI);
3079 
3080   op_cost(10);
3081   format %{ %}
3082   interface(CONST_INTER);
3083 %}
3084 
3085 // Constant for long shifts
3086 operand immI_32()
3087 %{
3088   predicate( n->get_int() == 32 );
3089   match(ConI);
3090 
3091   op_cost(0);
3092   format %{ %}
3093   interface(CONST_INTER);
3094 %}
3095 
3096 // Constant for long shifts
3097 operand immI_64()
3098 %{
3099   predicate( n->get_int() == 64 );
3100   match(ConI);
3101 
3102   op_cost(0);
3103   format %{ %}
3104   interface(CONST_INTER);
3105 %}
3106 
3107 // Pointer Immediate
3108 operand immP()
3109 %{
3110   match(ConP);
3111 
3112   op_cost(10);
3113   format %{ %}
3114   interface(CONST_INTER);
3115 %}
3116 
3117 // NULL Pointer Immediate
3118 operand immP0()
3119 %{
3120   predicate(n->get_ptr() == 0);
3121   match(ConP);
3122 
3123   op_cost(5);
3124   format %{ %}
3125   interface(CONST_INTER);
3126 %}
3127 
3128 // Pointer Immediate
3129 operand immN() %{
3130   match(ConN);
3131 
3132   op_cost(10);
3133   format %{ %}
3134   interface(CONST_INTER);
3135 %}
3136 
3137 operand immNKlass() %{
3138   match(ConNKlass);
3139 
3140   op_cost(10);
3141   format %{ %}
3142   interface(CONST_INTER);
3143 %}
3144 
3145 // NULL Pointer Immediate
3146 operand immN0() %{
3147   predicate(n->get_narrowcon() == 0);
3148   match(ConN);
3149 
3150   op_cost(5);
3151   format %{ %}
3152   interface(CONST_INTER);
3153 %}
3154 
3155 operand immP31()
3156 %{
3157   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3158             && (n->get_ptr() >> 31) == 0);
3159   match(ConP);
3160 
3161   op_cost(5);
3162   format %{ %}
3163   interface(CONST_INTER);
3164 %}
3165 
3166 
3167 // Long Immediate
3168 operand immL()
3169 %{
3170   match(ConL);
3171 
3172   op_cost(20);
3173   format %{ %}
3174   interface(CONST_INTER);
3175 %}
3176 
3177 // Long Immediate 8-bit
3178 operand immL8()
3179 %{
3180   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3181   match(ConL);
3182 
3183   op_cost(5);
3184   format %{ %}
3185   interface(CONST_INTER);
3186 %}
3187 
3188 // Long Immediate 32-bit unsigned
3189 operand immUL32()
3190 %{
3191   predicate(n->get_long() == (unsigned int) (n->get_long()));
3192   match(ConL);
3193 
3194   op_cost(10);
3195   format %{ %}
3196   interface(CONST_INTER);
3197 %}
3198 
3199 // Long Immediate 32-bit signed
3200 operand immL32()
3201 %{
3202   predicate(n->get_long() == (int) (n->get_long()));
3203   match(ConL);
3204 
3205   op_cost(15);
3206   format %{ %}
3207   interface(CONST_INTER);
3208 %}
3209 
3210 // Long Immediate zero
3211 operand immL0()
3212 %{
3213   predicate(n->get_long() == 0L);
3214   match(ConL);
3215 
3216   op_cost(10);
3217   format %{ %}
3218   interface(CONST_INTER);
3219 %}
3220 
3221 // Constant for increment
3222 operand immL1()
3223 %{
3224   predicate(n->get_long() == 1);
3225   match(ConL);
3226 
3227   format %{ %}
3228   interface(CONST_INTER);
3229 %}
3230 
3231 // Constant for decrement
3232 operand immL_M1()
3233 %{
3234   predicate(n->get_long() == -1);
3235   match(ConL);
3236 
3237   format %{ %}
3238   interface(CONST_INTER);
3239 %}
3240 
3241 // Long Immediate: the value 10
3242 operand immL10()
3243 %{
3244   predicate(n->get_long() == 10);
3245   match(ConL);
3246 
3247   format %{ %}
3248   interface(CONST_INTER);
3249 %}
3250 
3251 // Long immediate from 0 to 127.
3252 // Used for a shorter form of long mul by 10.
3253 operand immL_127()
3254 %{
3255   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3256   match(ConL);
3257 
3258   op_cost(10);
3259   format %{ %}
3260   interface(CONST_INTER);
3261 %}
3262 
3263 // Long Immediate: low 32-bit mask
3264 operand immL_32bits()
3265 %{
3266   predicate(n->get_long() == 0xFFFFFFFFL);
3267   match(ConL);
3268   op_cost(20);
3269 
3270   format %{ %}
3271   interface(CONST_INTER);
3272 %}
3273 
3274 // Float Immediate zero
3275 operand immF0()
3276 %{
3277   predicate(jint_cast(n->getf()) == 0);
3278   match(ConF);
3279 
3280   op_cost(5);
3281   format %{ %}
3282   interface(CONST_INTER);
3283 %}
3284 
3285 // Float Immediate
3286 operand immF()
3287 %{
3288   match(ConF);
3289 
3290   op_cost(15);
3291   format %{ %}
3292   interface(CONST_INTER);
3293 %}
3294 
3295 // Double Immediate zero
3296 operand immD0()
3297 %{
3298   predicate(jlong_cast(n->getd()) == 0);
3299   match(ConD);
3300 
3301   op_cost(5);
3302   format %{ %}
3303   interface(CONST_INTER);
3304 %}
3305 
3306 // Double Immediate
3307 operand immD()
3308 %{
3309   match(ConD);
3310 
3311   op_cost(15);
3312   format %{ %}
3313   interface(CONST_INTER);
3314 %}
3315 
3316 // Immediates for special shifts (sign extend)
3317 
3318 // Constants for increment
3319 operand immI_16()
3320 %{
3321   predicate(n->get_int() == 16);
3322   match(ConI);
3323 
3324   format %{ %}
3325   interface(CONST_INTER);
3326 %}
3327 
3328 operand immI_24()
3329 %{
3330   predicate(n->get_int() == 24);
3331   match(ConI);
3332 
3333   format %{ %}
3334   interface(CONST_INTER);
3335 %}
3336 
3337 // Constant for byte-wide masking
3338 operand immI_255()
3339 %{
3340   predicate(n->get_int() == 255);
3341   match(ConI);
3342 
3343   format %{ %}
3344   interface(CONST_INTER);
3345 %}
3346 
3347 // Constant for short-wide masking
3348 operand immI_65535()
3349 %{
3350   predicate(n->get_int() == 65535);
3351   match(ConI);
3352 
3353   format %{ %}
3354   interface(CONST_INTER);
3355 %}
3356 
3357 // Constant for byte-wide masking
3358 operand immL_255()
3359 %{
3360   predicate(n->get_long() == 255);
3361   match(ConL);
3362 
3363   format %{ %}
3364   interface(CONST_INTER);
3365 %}
3366 
3367 // Constant for short-wide masking
3368 operand immL_65535()
3369 %{
3370   predicate(n->get_long() == 65535);
3371   match(ConL);
3372 
3373   format %{ %}
3374   interface(CONST_INTER);
3375 %}
3376 
3377 // Register Operands
3378 // Integer Register
3379 operand rRegI()
3380 %{
3381   constraint(ALLOC_IN_RC(int_reg));
3382   match(RegI);
3383 
3384   match(rax_RegI);
3385   match(rbx_RegI);
3386   match(rcx_RegI);
3387   match(rdx_RegI);
3388   match(rdi_RegI);
3389 
3390   format %{ %}
3391   interface(REG_INTER);
3392 %}
3393 
3394 // Special Registers
3395 operand rax_RegI()
3396 %{
3397   constraint(ALLOC_IN_RC(int_rax_reg));
3398   match(RegI);
3399   match(rRegI);
3400 
3401   format %{ "RAX" %}
3402   interface(REG_INTER);
3403 %}
3404 
3405 // Special Registers
3406 operand rbx_RegI()
3407 %{
3408   constraint(ALLOC_IN_RC(int_rbx_reg));
3409   match(RegI);
3410   match(rRegI);
3411 
3412   format %{ "RBX" %}
3413   interface(REG_INTER);
3414 %}
3415 
3416 operand rcx_RegI()
3417 %{
3418   constraint(ALLOC_IN_RC(int_rcx_reg));
3419   match(RegI);
3420   match(rRegI);
3421 
3422   format %{ "RCX" %}
3423   interface(REG_INTER);
3424 %}
3425 
3426 operand rdx_RegI()
3427 %{
3428   constraint(ALLOC_IN_RC(int_rdx_reg));
3429   match(RegI);
3430   match(rRegI);
3431 
3432   format %{ "RDX" %}
3433   interface(REG_INTER);
3434 %}
3435 
3436 operand rdi_RegI()
3437 %{
3438   constraint(ALLOC_IN_RC(int_rdi_reg));
3439   match(RegI);
3440   match(rRegI);
3441 
3442   format %{ "RDI" %}
3443   interface(REG_INTER);
3444 %}
3445 
3446 operand no_rcx_RegI()
3447 %{
3448   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3449   match(RegI);
3450   match(rax_RegI);
3451   match(rbx_RegI);
3452   match(rdx_RegI);
3453   match(rdi_RegI);
3454 
3455   format %{ %}
3456   interface(REG_INTER);
3457 %}
3458 
3459 operand no_rax_rdx_RegI()
3460 %{
3461   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3462   match(RegI);
3463   match(rbx_RegI);
3464   match(rcx_RegI);
3465   match(rdi_RegI);
3466 
3467   format %{ %}
3468   interface(REG_INTER);
3469 %}
3470 
3471 // Pointer Register
3472 operand any_RegP()
3473 %{
3474   constraint(ALLOC_IN_RC(any_reg));
3475   match(RegP);
3476   match(rax_RegP);
3477   match(rbx_RegP);
3478   match(rdi_RegP);
3479   match(rsi_RegP);
3480   match(rbp_RegP);
3481   match(r15_RegP);
3482   match(rRegP);
3483 
3484   format %{ %}
3485   interface(REG_INTER);
3486 %}
3487 
3488 operand rRegP()
3489 %{
3490   constraint(ALLOC_IN_RC(ptr_reg));
3491   match(RegP);
3492   match(rax_RegP);
3493   match(rbx_RegP);
3494   match(rdi_RegP);
3495   match(rsi_RegP);
3496   match(rbp_RegP);
3497   match(r15_RegP);  // See Q&A below about r15_RegP.
3498 
3499   format %{ %}
3500   interface(REG_INTER);
3501 %}
3502 
3503 operand rRegN() %{
3504   constraint(ALLOC_IN_RC(int_reg));
3505   match(RegN);
3506 
3507   format %{ %}
3508   interface(REG_INTER);
3509 %}
3510 
3511 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3512 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3513 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3514 // The output of an instruction is controlled by the allocator, which respects
3515 // register class masks, not match rules.  Unless an instruction mentions
3516 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3517 // by the allocator as an input.
3518 
3519 operand no_rax_RegP()
3520 %{
3521   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3522   match(RegP);
3523   match(rbx_RegP);
3524   match(rsi_RegP);
3525   match(rdi_RegP);
3526 
3527   format %{ %}
3528   interface(REG_INTER);
3529 %}
3530 
3531 operand no_rbp_RegP()
3532 %{
3533   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3534   match(RegP);
3535   match(rbx_RegP);
3536   match(rsi_RegP);
3537   match(rdi_RegP);
3538 
3539   format %{ %}
3540   interface(REG_INTER);
3541 %}
3542 
3543 operand no_rax_rbx_RegP()
3544 %{
3545   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3546   match(RegP);
3547   match(rsi_RegP);
3548   match(rdi_RegP);
3549 
3550   format %{ %}
3551   interface(REG_INTER);
3552 %}
3553 
3554 // Special Registers
3555 // Return a pointer value
3556 operand rax_RegP()
3557 %{
3558   constraint(ALLOC_IN_RC(ptr_rax_reg));
3559   match(RegP);
3560   match(rRegP);
3561 
3562   format %{ %}
3563   interface(REG_INTER);
3564 %}
3565 
3566 // Special Registers
3567 // Return a compressed pointer value
3568 operand rax_RegN()
3569 %{
3570   constraint(ALLOC_IN_RC(int_rax_reg));
3571   match(RegN);
3572   match(rRegN);
3573 
3574   format %{ %}
3575   interface(REG_INTER);
3576 %}
3577 
3578 // Used in AtomicAdd
3579 operand rbx_RegP()
3580 %{
3581   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3582   match(RegP);
3583   match(rRegP);
3584 
3585   format %{ %}
3586   interface(REG_INTER);
3587 %}
3588 
3589 operand rsi_RegP()
3590 %{
3591   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3592   match(RegP);
3593   match(rRegP);
3594 
3595   format %{ %}
3596   interface(REG_INTER);
3597 %}
3598 
3599 // Used in rep stosq
3600 operand rdi_RegP()
3601 %{
3602   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3603   match(RegP);
3604   match(rRegP);
3605 
3606   format %{ %}
3607   interface(REG_INTER);
3608 %}
3609 
3610 operand rbp_RegP()
3611 %{
3612   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3613   match(RegP);
3614   match(rRegP);
3615 
3616   format %{ %}
3617   interface(REG_INTER);
3618 %}
3619 
3620 operand r15_RegP()
3621 %{
3622   constraint(ALLOC_IN_RC(ptr_r15_reg));
3623   match(RegP);
3624   match(rRegP);
3625 
3626   format %{ %}
3627   interface(REG_INTER);
3628 %}
3629 
3630 operand rRegL()
3631 %{
3632   constraint(ALLOC_IN_RC(long_reg));
3633   match(RegL);
3634   match(rax_RegL);
3635   match(rdx_RegL);
3636 
3637   format %{ %}
3638   interface(REG_INTER);
3639 %}
3640 
3641 // Special Registers
3642 operand no_rax_rdx_RegL()
3643 %{
3644   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3645   match(RegL);
3646   match(rRegL);
3647 
3648   format %{ %}
3649   interface(REG_INTER);
3650 %}
3651 
3652 operand no_rax_RegL()
3653 %{
3654   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3655   match(RegL);
3656   match(rRegL);
3657   match(rdx_RegL);
3658 
3659   format %{ %}
3660   interface(REG_INTER);
3661 %}
3662 
3663 operand no_rcx_RegL()
3664 %{
3665   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3666   match(RegL);
3667   match(rRegL);
3668 
3669   format %{ %}
3670   interface(REG_INTER);
3671 %}
3672 
3673 operand rax_RegL()
3674 %{
3675   constraint(ALLOC_IN_RC(long_rax_reg));
3676   match(RegL);
3677   match(rRegL);
3678 
3679   format %{ "RAX" %}
3680   interface(REG_INTER);
3681 %}
3682 
3683 operand rcx_RegL()
3684 %{
3685   constraint(ALLOC_IN_RC(long_rcx_reg));
3686   match(RegL);
3687   match(rRegL);
3688 
3689   format %{ %}
3690   interface(REG_INTER);
3691 %}
3692 
3693 operand rdx_RegL()
3694 %{
3695   constraint(ALLOC_IN_RC(long_rdx_reg));
3696   match(RegL);
3697   match(rRegL);
3698 
3699   format %{ %}
3700   interface(REG_INTER);
3701 %}
3702 
3703 // Flags register, used as output of compare instructions
3704 operand rFlagsReg()
3705 %{
3706   constraint(ALLOC_IN_RC(int_flags));
3707   match(RegFlags);
3708 
3709   format %{ "RFLAGS" %}
3710   interface(REG_INTER);
3711 %}
3712 
3713 // Flags register, used as output of FLOATING POINT compare instructions
3714 operand rFlagsRegU()
3715 %{
3716   constraint(ALLOC_IN_RC(int_flags));
3717   match(RegFlags);
3718 
3719   format %{ "RFLAGS_U" %}
3720   interface(REG_INTER);
3721 %}
3722 
3723 operand rFlagsRegUCF() %{
3724   constraint(ALLOC_IN_RC(int_flags));
3725   match(RegFlags);
3726   predicate(false);
3727 
3728   format %{ "RFLAGS_U_CF" %}
3729   interface(REG_INTER);
3730 %}
3731 
3732 // Float register operands
3733 operand regF()
3734 %{
3735   constraint(ALLOC_IN_RC(float_reg));
3736   match(RegF);
3737 
3738   format %{ %}
3739   interface(REG_INTER);
3740 %}
3741 
3742 // Double register operands
3743 operand regD()
3744 %{
3745   constraint(ALLOC_IN_RC(double_reg));
3746   match(RegD);
3747 
3748   format %{ %}
3749   interface(REG_INTER);
3750 %}
3751 
3752 //----------Memory Operands----------------------------------------------------
3753 // Direct Memory Operand
3754 // operand direct(immP addr)
3755 // %{
3756 //   match(addr);
3757 
3758 //   format %{ "[$addr]" %}
3759 //   interface(MEMORY_INTER) %{
3760 //     base(0xFFFFFFFF);
3761 //     index(0x4);
3762 //     scale(0x0);
3763 //     disp($addr);
3764 //   %}
3765 // %}
3766 
3767 // Indirect Memory Operand
3768 operand indirect(any_RegP reg)
3769 %{
3770   constraint(ALLOC_IN_RC(ptr_reg));
3771   match(reg);
3772 
3773   format %{ "[$reg]" %}
3774   interface(MEMORY_INTER) %{
3775     base($reg);
3776     index(0x4);
3777     scale(0x0);
3778     disp(0x0);
3779   %}
3780 %}
3781 
3782 // Indirect Memory Plus Short Offset Operand
3783 operand indOffset8(any_RegP reg, immL8 off)
3784 %{
3785   constraint(ALLOC_IN_RC(ptr_reg));
3786   match(AddP reg off);
3787 
3788   format %{ "[$reg + $off (8-bit)]" %}
3789   interface(MEMORY_INTER) %{
3790     base($reg);
3791     index(0x4);
3792     scale(0x0);
3793     disp($off);
3794   %}
3795 %}
3796 
3797 // Indirect Memory Plus Long Offset Operand
3798 operand indOffset32(any_RegP reg, immL32 off)
3799 %{
3800   constraint(ALLOC_IN_RC(ptr_reg));
3801   match(AddP reg off);
3802 
3803   format %{ "[$reg + $off (32-bit)]" %}
3804   interface(MEMORY_INTER) %{
3805     base($reg);
3806     index(0x4);
3807     scale(0x0);
3808     disp($off);
3809   %}
3810 %}
3811 
3812 // Indirect Memory Plus Index Register Plus Offset Operand
3813 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3814 %{
3815   constraint(ALLOC_IN_RC(ptr_reg));
3816   match(AddP (AddP reg lreg) off);
3817 
3818   op_cost(10);
3819   format %{"[$reg + $off + $lreg]" %}
3820   interface(MEMORY_INTER) %{
3821     base($reg);
3822     index($lreg);
3823     scale(0x0);
3824     disp($off);
3825   %}
3826 %}
3827 
3828 // Indirect Memory Plus Index Register Plus Offset Operand
3829 operand indIndex(any_RegP reg, rRegL lreg)
3830 %{
3831   constraint(ALLOC_IN_RC(ptr_reg));
3832   match(AddP reg lreg);
3833 
3834   op_cost(10);
3835   format %{"[$reg + $lreg]" %}
3836   interface(MEMORY_INTER) %{
3837     base($reg);
3838     index($lreg);
3839     scale(0x0);
3840     disp(0x0);
3841   %}
3842 %}
3843 
3844 // Indirect Memory Times Scale Plus Index Register
3845 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3846 %{
3847   constraint(ALLOC_IN_RC(ptr_reg));
3848   match(AddP reg (LShiftL lreg scale));
3849 
3850   op_cost(10);
3851   format %{"[$reg + $lreg << $scale]" %}
3852   interface(MEMORY_INTER) %{
3853     base($reg);
3854     index($lreg);
3855     scale($scale);
3856     disp(0x0);
3857   %}
3858 %}
3859 
3860 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3861 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3862 %{
3863   constraint(ALLOC_IN_RC(ptr_reg));
3864   match(AddP (AddP reg (LShiftL lreg scale)) off);
3865 
3866   op_cost(10);
3867   format %{"[$reg + $off + $lreg << $scale]" %}
3868   interface(MEMORY_INTER) %{
3869     base($reg);
3870     index($lreg);
3871     scale($scale);
3872     disp($off);
3873   %}
3874 %}
3875 
3876 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3877 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3878 %{
3879   constraint(ALLOC_IN_RC(ptr_reg));
3880   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3881   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3882 
3883   op_cost(10);
3884   format %{"[$reg + $off + $idx << $scale]" %}
3885   interface(MEMORY_INTER) %{
3886     base($reg);
3887     index($idx);
3888     scale($scale);
3889     disp($off);
3890   %}
3891 %}
3892 
3893 // Indirect Narrow Oop Plus Offset Operand
3894 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3895 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3896 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3897   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3898   constraint(ALLOC_IN_RC(ptr_reg));
3899   match(AddP (DecodeN reg) off);
3900 
3901   op_cost(10);
3902   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3903   interface(MEMORY_INTER) %{
3904     base(0xc); // R12
3905     index($reg);
3906     scale(0x3);
3907     disp($off);
3908   %}
3909 %}
3910 
3911 // Indirect Memory Operand
3912 operand indirectNarrow(rRegN reg)
3913 %{
3914   predicate(Universe::narrow_oop_shift() == 0);
3915   constraint(ALLOC_IN_RC(ptr_reg));
3916   match(DecodeN reg);
3917 
3918   format %{ "[$reg]" %}
3919   interface(MEMORY_INTER) %{
3920     base($reg);
3921     index(0x4);
3922     scale(0x0);
3923     disp(0x0);
3924   %}
3925 %}
3926 
3927 // Indirect Memory Plus Short Offset Operand
3928 operand indOffset8Narrow(rRegN reg, immL8 off)
3929 %{
3930   predicate(Universe::narrow_oop_shift() == 0);
3931   constraint(ALLOC_IN_RC(ptr_reg));
3932   match(AddP (DecodeN reg) off);
3933 
3934   format %{ "[$reg + $off (8-bit)]" %}
3935   interface(MEMORY_INTER) %{
3936     base($reg);
3937     index(0x4);
3938     scale(0x0);
3939     disp($off);
3940   %}
3941 %}
3942 
3943 // Indirect Memory Plus Long Offset Operand
3944 operand indOffset32Narrow(rRegN reg, immL32 off)
3945 %{
3946   predicate(Universe::narrow_oop_shift() == 0);
3947   constraint(ALLOC_IN_RC(ptr_reg));
3948   match(AddP (DecodeN reg) off);
3949 
3950   format %{ "[$reg + $off (32-bit)]" %}
3951   interface(MEMORY_INTER) %{
3952     base($reg);
3953     index(0x4);
3954     scale(0x0);
3955     disp($off);
3956   %}
3957 %}
3958 
3959 // Indirect Memory Plus Index Register Plus Offset Operand
3960 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3961 %{
3962   predicate(Universe::narrow_oop_shift() == 0);
3963   constraint(ALLOC_IN_RC(ptr_reg));
3964   match(AddP (AddP (DecodeN reg) lreg) off);
3965 
3966   op_cost(10);
3967   format %{"[$reg + $off + $lreg]" %}
3968   interface(MEMORY_INTER) %{
3969     base($reg);
3970     index($lreg);
3971     scale(0x0);
3972     disp($off);
3973   %}
3974 %}
3975 
3976 // Indirect Memory Plus Index Register Plus Offset Operand
3977 operand indIndexNarrow(rRegN reg, rRegL lreg)
3978 %{
3979   predicate(Universe::narrow_oop_shift() == 0);
3980   constraint(ALLOC_IN_RC(ptr_reg));
3981   match(AddP (DecodeN reg) lreg);
3982 
3983   op_cost(10);
3984   format %{"[$reg + $lreg]" %}
3985   interface(MEMORY_INTER) %{
3986     base($reg);
3987     index($lreg);
3988     scale(0x0);
3989     disp(0x0);
3990   %}
3991 %}
3992 
3993 // Indirect Memory Times Scale Plus Index Register
3994 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
3995 %{
3996   predicate(Universe::narrow_oop_shift() == 0);
3997   constraint(ALLOC_IN_RC(ptr_reg));
3998   match(AddP (DecodeN reg) (LShiftL lreg scale));
3999 
4000   op_cost(10);
4001   format %{"[$reg + $lreg << $scale]" %}
4002   interface(MEMORY_INTER) %{
4003     base($reg);
4004     index($lreg);
4005     scale($scale);
4006     disp(0x0);
4007   %}
4008 %}
4009 
4010 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4011 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4012 %{
4013   predicate(Universe::narrow_oop_shift() == 0);
4014   constraint(ALLOC_IN_RC(ptr_reg));
4015   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4016 
4017   op_cost(10);
4018   format %{"[$reg + $off + $lreg << $scale]" %}
4019   interface(MEMORY_INTER) %{
4020     base($reg);
4021     index($lreg);
4022     scale($scale);
4023     disp($off);
4024   %}
4025 %}
4026 
4027 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4028 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4029 %{
4030   constraint(ALLOC_IN_RC(ptr_reg));
4031   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4032   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4033 
4034   op_cost(10);
4035   format %{"[$reg + $off + $idx << $scale]" %}
4036   interface(MEMORY_INTER) %{
4037     base($reg);
4038     index($idx);
4039     scale($scale);
4040     disp($off);
4041   %}
4042 %}
4043 
4044 //----------Special Memory Operands--------------------------------------------
4045 // Stack Slot Operand - This operand is used for loading and storing temporary
4046 //                      values on the stack where a match requires a value to
4047 //                      flow through memory.
4048 operand stackSlotP(sRegP reg)
4049 %{
4050   constraint(ALLOC_IN_RC(stack_slots));
4051   // No match rule because this operand is only generated in matching
4052 
4053   format %{ "[$reg]" %}
4054   interface(MEMORY_INTER) %{
4055     base(0x4);   // RSP
4056     index(0x4);  // No Index
4057     scale(0x0);  // No Scale
4058     disp($reg);  // Stack Offset
4059   %}
4060 %}
4061 
4062 operand stackSlotI(sRegI reg)
4063 %{
4064   constraint(ALLOC_IN_RC(stack_slots));
4065   // No match rule because this operand is only generated in matching
4066 
4067   format %{ "[$reg]" %}
4068   interface(MEMORY_INTER) %{
4069     base(0x4);   // RSP
4070     index(0x4);  // No Index
4071     scale(0x0);  // No Scale
4072     disp($reg);  // Stack Offset
4073   %}
4074 %}
4075 
4076 operand stackSlotF(sRegF reg)
4077 %{
4078   constraint(ALLOC_IN_RC(stack_slots));
4079   // No match rule because this operand is only generated in matching
4080 
4081   format %{ "[$reg]" %}
4082   interface(MEMORY_INTER) %{
4083     base(0x4);   // RSP
4084     index(0x4);  // No Index
4085     scale(0x0);  // No Scale
4086     disp($reg);  // Stack Offset
4087   %}
4088 %}
4089 
4090 operand stackSlotD(sRegD reg)
4091 %{
4092   constraint(ALLOC_IN_RC(stack_slots));
4093   // No match rule because this operand is only generated in matching
4094 
4095   format %{ "[$reg]" %}
4096   interface(MEMORY_INTER) %{
4097     base(0x4);   // RSP
4098     index(0x4);  // No Index
4099     scale(0x0);  // No Scale
4100     disp($reg);  // Stack Offset
4101   %}
4102 %}
4103 operand stackSlotL(sRegL reg)
4104 %{
4105   constraint(ALLOC_IN_RC(stack_slots));
4106   // No match rule because this operand is only generated in matching
4107 
4108   format %{ "[$reg]" %}
4109   interface(MEMORY_INTER) %{
4110     base(0x4);   // RSP
4111     index(0x4);  // No Index
4112     scale(0x0);  // No Scale
4113     disp($reg);  // Stack Offset
4114   %}
4115 %}
4116 
4117 //----------Conditional Branch Operands----------------------------------------
4118 // Comparison Op  - This is the operation of the comparison, and is limited to
4119 //                  the following set of codes:
4120 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4121 //
4122 // Other attributes of the comparison, such as unsignedness, are specified
4123 // by the comparison instruction that sets a condition code flags register.
4124 // That result is represented by a flags operand whose subtype is appropriate
4125 // to the unsignedness (etc.) of the comparison.
4126 //
4127 // Later, the instruction which matches both the Comparison Op (a Bool) and
4128 // the flags (produced by the Cmp) specifies the coding of the comparison op
4129 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4130 
4131 // Comparision Code
4132 operand cmpOp()
4133 %{
4134   match(Bool);
4135 
4136   format %{ "" %}
4137   interface(COND_INTER) %{
4138     equal(0x4, "e");
4139     not_equal(0x5, "ne");
4140     less(0xC, "l");
4141     greater_equal(0xD, "ge");
4142     less_equal(0xE, "le");
4143     greater(0xF, "g");
4144     overflow(0x0, "o");
4145     no_overflow(0x1, "no");
4146   %}
4147 %}
4148 
4149 // Comparison Code, unsigned compare.  Used by FP also, with
4150 // C2 (unordered) turned into GT or LT already.  The other bits
4151 // C0 and C3 are turned into Carry & Zero flags.
4152 operand cmpOpU()
4153 %{
4154   match(Bool);
4155 
4156   format %{ "" %}
4157   interface(COND_INTER) %{
4158     equal(0x4, "e");
4159     not_equal(0x5, "ne");
4160     less(0x2, "b");
4161     greater_equal(0x3, "nb");
4162     less_equal(0x6, "be");
4163     greater(0x7, "nbe");
4164     overflow(0x0, "o");
4165     no_overflow(0x1, "no");
4166   %}
4167 %}
4168 
4169 
4170 // Floating comparisons that don't require any fixup for the unordered case
4171 operand cmpOpUCF() %{
4172   match(Bool);
4173   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4174             n->as_Bool()->_test._test == BoolTest::ge ||
4175             n->as_Bool()->_test._test == BoolTest::le ||
4176             n->as_Bool()->_test._test == BoolTest::gt);
4177   format %{ "" %}
4178   interface(COND_INTER) %{
4179     equal(0x4, "e");
4180     not_equal(0x5, "ne");
4181     less(0x2, "b");
4182     greater_equal(0x3, "nb");
4183     less_equal(0x6, "be");
4184     greater(0x7, "nbe");
4185     overflow(0x0, "o");
4186     no_overflow(0x1, "no");
4187   %}
4188 %}
4189 
4190 
4191 // Floating comparisons that can be fixed up with extra conditional jumps
4192 operand cmpOpUCF2() %{
4193   match(Bool);
4194   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4195             n->as_Bool()->_test._test == BoolTest::eq);
4196   format %{ "" %}
4197   interface(COND_INTER) %{
4198     equal(0x4, "e");
4199     not_equal(0x5, "ne");
4200     less(0x2, "b");
4201     greater_equal(0x3, "nb");
4202     less_equal(0x6, "be");
4203     greater(0x7, "nbe");
4204     overflow(0x0, "o");
4205     no_overflow(0x1, "no");
4206   %}
4207 %}
4208 
4209 
4210 //----------OPERAND CLASSES----------------------------------------------------
4211 // Operand Classes are groups of operands that are used as to simplify
4212 // instruction definitions by not requiring the AD writer to specify separate
4213 // instructions for every form of operand when the instruction accepts
4214 // multiple operand types with the same basic encoding and format.  The classic
4215 // case of this is memory operands.
4216 
4217 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4218                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4219                indCompressedOopOffset,
4220                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4221                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4222                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
4223 
4224 //----------PIPELINE-----------------------------------------------------------
4225 // Rules which define the behavior of the target architectures pipeline.
4226 pipeline %{
4227 
4228 //----------ATTRIBUTES---------------------------------------------------------
4229 attributes %{
4230   variable_size_instructions;        // Fixed size instructions
4231   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4232   instruction_unit_size = 1;         // An instruction is 1 bytes long
4233   instruction_fetch_unit_size = 16;  // The processor fetches one line
4234   instruction_fetch_units = 1;       // of 16 bytes
4235 
4236   // List of nop instructions
4237   nops( MachNop );
4238 %}
4239 
4240 //----------RESOURCES----------------------------------------------------------
4241 // Resources are the functional units available to the machine
4242 
4243 // Generic P2/P3 pipeline
4244 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4245 // 3 instructions decoded per cycle.
4246 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4247 // 3 ALU op, only ALU0 handles mul instructions.
4248 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4249            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4250            BR, FPU,
4251            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4252 
4253 //----------PIPELINE DESCRIPTION-----------------------------------------------
4254 // Pipeline Description specifies the stages in the machine's pipeline
4255 
4256 // Generic P2/P3 pipeline
4257 pipe_desc(S0, S1, S2, S3, S4, S5);
4258 
4259 //----------PIPELINE CLASSES---------------------------------------------------
4260 // Pipeline Classes describe the stages in which input and output are
4261 // referenced by the hardware pipeline.
4262 
4263 // Naming convention: ialu or fpu
4264 // Then: _reg
4265 // Then: _reg if there is a 2nd register
4266 // Then: _long if it's a pair of instructions implementing a long
4267 // Then: _fat if it requires the big decoder
4268 //   Or: _mem if it requires the big decoder and a memory unit.
4269 
4270 // Integer ALU reg operation
4271 pipe_class ialu_reg(rRegI dst)
4272 %{
4273     single_instruction;
4274     dst    : S4(write);
4275     dst    : S3(read);
4276     DECODE : S0;        // any decoder
4277     ALU    : S3;        // any alu
4278 %}
4279 
4280 // Long ALU reg operation
4281 pipe_class ialu_reg_long(rRegL dst)
4282 %{
4283     instruction_count(2);
4284     dst    : S4(write);
4285     dst    : S3(read);
4286     DECODE : S0(2);     // any 2 decoders
4287     ALU    : S3(2);     // both alus
4288 %}
4289 
4290 // Integer ALU reg operation using big decoder
4291 pipe_class ialu_reg_fat(rRegI dst)
4292 %{
4293     single_instruction;
4294     dst    : S4(write);
4295     dst    : S3(read);
4296     D0     : S0;        // big decoder only
4297     ALU    : S3;        // any alu
4298 %}
4299 
4300 // Long ALU reg operation using big decoder
4301 pipe_class ialu_reg_long_fat(rRegL dst)
4302 %{
4303     instruction_count(2);
4304     dst    : S4(write);
4305     dst    : S3(read);
4306     D0     : S0(2);     // big decoder only; twice
4307     ALU    : S3(2);     // any 2 alus
4308 %}
4309 
4310 // Integer ALU reg-reg operation
4311 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4312 %{
4313     single_instruction;
4314     dst    : S4(write);
4315     src    : S3(read);
4316     DECODE : S0;        // any decoder
4317     ALU    : S3;        // any alu
4318 %}
4319 
4320 // Long ALU reg-reg operation
4321 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4322 %{
4323     instruction_count(2);
4324     dst    : S4(write);
4325     src    : S3(read);
4326     DECODE : S0(2);     // any 2 decoders
4327     ALU    : S3(2);     // both alus
4328 %}
4329 
4330 // Integer ALU reg-reg operation
4331 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4332 %{
4333     single_instruction;
4334     dst    : S4(write);
4335     src    : S3(read);
4336     D0     : S0;        // big decoder only
4337     ALU    : S3;        // any alu
4338 %}
4339 
4340 // Long ALU reg-reg operation
4341 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4342 %{
4343     instruction_count(2);
4344     dst    : S4(write);
4345     src    : S3(read);
4346     D0     : S0(2);     // big decoder only; twice
4347     ALU    : S3(2);     // both alus
4348 %}
4349 
4350 // Integer ALU reg-mem operation
4351 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4352 %{
4353     single_instruction;
4354     dst    : S5(write);
4355     mem    : S3(read);
4356     D0     : S0;        // big decoder only
4357     ALU    : S4;        // any alu
4358     MEM    : S3;        // any mem
4359 %}
4360 
4361 // Integer mem operation (prefetch)
4362 pipe_class ialu_mem(memory mem)
4363 %{
4364     single_instruction;
4365     mem    : S3(read);
4366     D0     : S0;        // big decoder only
4367     MEM    : S3;        // any mem
4368 %}
4369 
4370 // Integer Store to Memory
4371 pipe_class ialu_mem_reg(memory mem, rRegI src)
4372 %{
4373     single_instruction;
4374     mem    : S3(read);
4375     src    : S5(read);
4376     D0     : S0;        // big decoder only
4377     ALU    : S4;        // any alu
4378     MEM    : S3;
4379 %}
4380 
4381 // // Long Store to Memory
4382 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4383 // %{
4384 //     instruction_count(2);
4385 //     mem    : S3(read);
4386 //     src    : S5(read);
4387 //     D0     : S0(2);          // big decoder only; twice
4388 //     ALU    : S4(2);     // any 2 alus
4389 //     MEM    : S3(2);  // Both mems
4390 // %}
4391 
4392 // Integer Store to Memory
4393 pipe_class ialu_mem_imm(memory mem)
4394 %{
4395     single_instruction;
4396     mem    : S3(read);
4397     D0     : S0;        // big decoder only
4398     ALU    : S4;        // any alu
4399     MEM    : S3;
4400 %}
4401 
4402 // Integer ALU0 reg-reg operation
4403 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4404 %{
4405     single_instruction;
4406     dst    : S4(write);
4407     src    : S3(read);
4408     D0     : S0;        // Big decoder only
4409     ALU0   : S3;        // only alu0
4410 %}
4411 
4412 // Integer ALU0 reg-mem operation
4413 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4414 %{
4415     single_instruction;
4416     dst    : S5(write);
4417     mem    : S3(read);
4418     D0     : S0;        // big decoder only
4419     ALU0   : S4;        // ALU0 only
4420     MEM    : S3;        // any mem
4421 %}
4422 
4423 // Integer ALU reg-reg operation
4424 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4425 %{
4426     single_instruction;
4427     cr     : S4(write);
4428     src1   : S3(read);
4429     src2   : S3(read);
4430     DECODE : S0;        // any decoder
4431     ALU    : S3;        // any alu
4432 %}
4433 
4434 // Integer ALU reg-imm operation
4435 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4436 %{
4437     single_instruction;
4438     cr     : S4(write);
4439     src1   : S3(read);
4440     DECODE : S0;        // any decoder
4441     ALU    : S3;        // any alu
4442 %}
4443 
4444 // Integer ALU reg-mem operation
4445 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4446 %{
4447     single_instruction;
4448     cr     : S4(write);
4449     src1   : S3(read);
4450     src2   : S3(read);
4451     D0     : S0;        // big decoder only
4452     ALU    : S4;        // any alu
4453     MEM    : S3;
4454 %}
4455 
4456 // Conditional move reg-reg
4457 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4458 %{
4459     instruction_count(4);
4460     y      : S4(read);
4461     q      : S3(read);
4462     p      : S3(read);
4463     DECODE : S0(4);     // any decoder
4464 %}
4465 
4466 // Conditional move reg-reg
4467 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4468 %{
4469     single_instruction;
4470     dst    : S4(write);
4471     src    : S3(read);
4472     cr     : S3(read);
4473     DECODE : S0;        // any decoder
4474 %}
4475 
4476 // Conditional move reg-mem
4477 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4478 %{
4479     single_instruction;
4480     dst    : S4(write);
4481     src    : S3(read);
4482     cr     : S3(read);
4483     DECODE : S0;        // any decoder
4484     MEM    : S3;
4485 %}
4486 
4487 // Conditional move reg-reg long
4488 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4489 %{
4490     single_instruction;
4491     dst    : S4(write);
4492     src    : S3(read);
4493     cr     : S3(read);
4494     DECODE : S0(2);     // any 2 decoders
4495 %}
4496 
4497 // XXX
4498 // // Conditional move double reg-reg
4499 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4500 // %{
4501 //     single_instruction;
4502 //     dst    : S4(write);
4503 //     src    : S3(read);
4504 //     cr     : S3(read);
4505 //     DECODE : S0;     // any decoder
4506 // %}
4507 
4508 // Float reg-reg operation
4509 pipe_class fpu_reg(regD dst)
4510 %{
4511     instruction_count(2);
4512     dst    : S3(read);
4513     DECODE : S0(2);     // any 2 decoders
4514     FPU    : S3;
4515 %}
4516 
4517 // Float reg-reg operation
4518 pipe_class fpu_reg_reg(regD dst, regD src)
4519 %{
4520     instruction_count(2);
4521     dst    : S4(write);
4522     src    : S3(read);
4523     DECODE : S0(2);     // any 2 decoders
4524     FPU    : S3;
4525 %}
4526 
4527 // Float reg-reg operation
4528 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4529 %{
4530     instruction_count(3);
4531     dst    : S4(write);
4532     src1   : S3(read);
4533     src2   : S3(read);
4534     DECODE : S0(3);     // any 3 decoders
4535     FPU    : S3(2);
4536 %}
4537 
4538 // Float reg-reg operation
4539 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4540 %{
4541     instruction_count(4);
4542     dst    : S4(write);
4543     src1   : S3(read);
4544     src2   : S3(read);
4545     src3   : S3(read);
4546     DECODE : S0(4);     // any 3 decoders
4547     FPU    : S3(2);
4548 %}
4549 
4550 // Float reg-reg operation
4551 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4552 %{
4553     instruction_count(4);
4554     dst    : S4(write);
4555     src1   : S3(read);
4556     src2   : S3(read);
4557     src3   : S3(read);
4558     DECODE : S1(3);     // any 3 decoders
4559     D0     : S0;        // Big decoder only
4560     FPU    : S3(2);
4561     MEM    : S3;
4562 %}
4563 
4564 // Float reg-mem operation
4565 pipe_class fpu_reg_mem(regD dst, memory mem)
4566 %{
4567     instruction_count(2);
4568     dst    : S5(write);
4569     mem    : S3(read);
4570     D0     : S0;        // big decoder only
4571     DECODE : S1;        // any decoder for FPU POP
4572     FPU    : S4;
4573     MEM    : S3;        // any mem
4574 %}
4575 
4576 // Float reg-mem operation
4577 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4578 %{
4579     instruction_count(3);
4580     dst    : S5(write);
4581     src1   : S3(read);
4582     mem    : S3(read);
4583     D0     : S0;        // big decoder only
4584     DECODE : S1(2);     // any decoder for FPU POP
4585     FPU    : S4;
4586     MEM    : S3;        // any mem
4587 %}
4588 
4589 // Float mem-reg operation
4590 pipe_class fpu_mem_reg(memory mem, regD src)
4591 %{
4592     instruction_count(2);
4593     src    : S5(read);
4594     mem    : S3(read);
4595     DECODE : S0;        // any decoder for FPU PUSH
4596     D0     : S1;        // big decoder only
4597     FPU    : S4;
4598     MEM    : S3;        // any mem
4599 %}
4600 
4601 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4602 %{
4603     instruction_count(3);
4604     src1   : S3(read);
4605     src2   : S3(read);
4606     mem    : S3(read);
4607     DECODE : S0(2);     // any decoder for FPU PUSH
4608     D0     : S1;        // big decoder only
4609     FPU    : S4;
4610     MEM    : S3;        // any mem
4611 %}
4612 
4613 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4614 %{
4615     instruction_count(3);
4616     src1   : S3(read);
4617     src2   : S3(read);
4618     mem    : S4(read);
4619     DECODE : S0;        // any decoder for FPU PUSH
4620     D0     : S0(2);     // big decoder only
4621     FPU    : S4;
4622     MEM    : S3(2);     // any mem
4623 %}
4624 
4625 pipe_class fpu_mem_mem(memory dst, memory src1)
4626 %{
4627     instruction_count(2);
4628     src1   : S3(read);
4629     dst    : S4(read);
4630     D0     : S0(2);     // big decoder only
4631     MEM    : S3(2);     // any mem
4632 %}
4633 
4634 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4635 %{
4636     instruction_count(3);
4637     src1   : S3(read);
4638     src2   : S3(read);
4639     dst    : S4(read);
4640     D0     : S0(3);     // big decoder only
4641     FPU    : S4;
4642     MEM    : S3(3);     // any mem
4643 %}
4644 
4645 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4646 %{
4647     instruction_count(3);
4648     src1   : S4(read);
4649     mem    : S4(read);
4650     DECODE : S0;        // any decoder for FPU PUSH
4651     D0     : S0(2);     // big decoder only
4652     FPU    : S4;
4653     MEM    : S3(2);     // any mem
4654 %}
4655 
4656 // Float load constant
4657 pipe_class fpu_reg_con(regD dst)
4658 %{
4659     instruction_count(2);
4660     dst    : S5(write);
4661     D0     : S0;        // big decoder only for the load
4662     DECODE : S1;        // any decoder for FPU POP
4663     FPU    : S4;
4664     MEM    : S3;        // any mem
4665 %}
4666 
4667 // Float load constant
4668 pipe_class fpu_reg_reg_con(regD dst, regD src)
4669 %{
4670     instruction_count(3);
4671     dst    : S5(write);
4672     src    : S3(read);
4673     D0     : S0;        // big decoder only for the load
4674     DECODE : S1(2);     // any decoder for FPU POP
4675     FPU    : S4;
4676     MEM    : S3;        // any mem
4677 %}
4678 
4679 // UnConditional branch
4680 pipe_class pipe_jmp(label labl)
4681 %{
4682     single_instruction;
4683     BR   : S3;
4684 %}
4685 
4686 // Conditional branch
4687 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4688 %{
4689     single_instruction;
4690     cr    : S1(read);
4691     BR    : S3;
4692 %}
4693 
4694 // Allocation idiom
4695 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4696 %{
4697     instruction_count(1); force_serialization;
4698     fixed_latency(6);
4699     heap_ptr : S3(read);
4700     DECODE   : S0(3);
4701     D0       : S2;
4702     MEM      : S3;
4703     ALU      : S3(2);
4704     dst      : S5(write);
4705     BR       : S5;
4706 %}
4707 
4708 // Generic big/slow expanded idiom
4709 pipe_class pipe_slow()
4710 %{
4711     instruction_count(10); multiple_bundles; force_serialization;
4712     fixed_latency(100);
4713     D0  : S0(2);
4714     MEM : S3(2);
4715 %}
4716 
4717 // The real do-nothing guy
4718 pipe_class empty()
4719 %{
4720     instruction_count(0);
4721 %}
4722 
4723 // Define the class for the Nop node
4724 define
4725 %{
4726    MachNop = empty;
4727 %}
4728 
4729 %}
4730 
4731 //----------INSTRUCTIONS-------------------------------------------------------
4732 //
4733 // match      -- States which machine-independent subtree may be replaced
4734 //               by this instruction.
4735 // ins_cost   -- The estimated cost of this instruction is used by instruction
4736 //               selection to identify a minimum cost tree of machine
4737 //               instructions that matches a tree of machine-independent
4738 //               instructions.
4739 // format     -- A string providing the disassembly for this instruction.
4740 //               The value of an instruction's operand may be inserted
4741 //               by referring to it with a '$' prefix.
4742 // opcode     -- Three instruction opcodes may be provided.  These are referred
4743 //               to within an encode class as $primary, $secondary, and $tertiary
4744 //               rrspectively.  The primary opcode is commonly used to
4745 //               indicate the type of machine instruction, while secondary
4746 //               and tertiary are often used for prefix options or addressing
4747 //               modes.
4748 // ins_encode -- A list of encode classes with parameters. The encode class
4749 //               name must have been defined in an 'enc_class' specification
4750 //               in the encode section of the architecture description.
4751 
4752 
4753 //----------Load/Store/Move Instructions---------------------------------------
4754 //----------Load Instructions--------------------------------------------------
4755 
4756 // Load Byte (8 bit signed)
4757 instruct loadB(rRegI dst, memory mem)
4758 %{
4759   match(Set dst (LoadB mem));
4760 
4761   ins_cost(125);
4762   format %{ "movsbl  $dst, $mem\t# byte" %}
4763 
4764   ins_encode %{
4765     __ movsbl($dst$$Register, $mem$$Address);
4766   %}
4767 
4768   ins_pipe(ialu_reg_mem);
4769 %}
4770 
4771 // Load Byte (8 bit signed) into Long Register
4772 instruct loadB2L(rRegL dst, memory mem)
4773 %{
4774   match(Set dst (ConvI2L (LoadB mem)));
4775 
4776   ins_cost(125);
4777   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4778 
4779   ins_encode %{
4780     __ movsbq($dst$$Register, $mem$$Address);
4781   %}
4782 
4783   ins_pipe(ialu_reg_mem);
4784 %}
4785 
4786 // Load Unsigned Byte (8 bit UNsigned)
4787 instruct loadUB(rRegI dst, memory mem)
4788 %{
4789   match(Set dst (LoadUB mem));
4790 
4791   ins_cost(125);
4792   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4793 
4794   ins_encode %{
4795     __ movzbl($dst$$Register, $mem$$Address);
4796   %}
4797 
4798   ins_pipe(ialu_reg_mem);
4799 %}
4800 
4801 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4802 instruct loadUB2L(rRegL dst, memory mem)
4803 %{
4804   match(Set dst (ConvI2L (LoadUB mem)));
4805 
4806   ins_cost(125);
4807   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4808 
4809   ins_encode %{
4810     __ movzbq($dst$$Register, $mem$$Address);
4811   %}
4812 
4813   ins_pipe(ialu_reg_mem);
4814 %}
4815 
4816 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
4817 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
4818   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4819   effect(KILL cr);
4820 
4821   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
4822             "andl    $dst, $mask" %}
4823   ins_encode %{
4824     Register Rdst = $dst$$Register;
4825     __ movzbq(Rdst, $mem$$Address);
4826     __ andl(Rdst, $mask$$constant);
4827   %}
4828   ins_pipe(ialu_reg_mem);
4829 %}
4830 
4831 // Load Short (16 bit signed)
4832 instruct loadS(rRegI dst, memory mem)
4833 %{
4834   match(Set dst (LoadS mem));
4835 
4836   ins_cost(125);
4837   format %{ "movswl $dst, $mem\t# short" %}
4838 
4839   ins_encode %{
4840     __ movswl($dst$$Register, $mem$$Address);
4841   %}
4842 
4843   ins_pipe(ialu_reg_mem);
4844 %}
4845 
4846 // Load Short (16 bit signed) to Byte (8 bit signed)
4847 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4848   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4849 
4850   ins_cost(125);
4851   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4852   ins_encode %{
4853     __ movsbl($dst$$Register, $mem$$Address);
4854   %}
4855   ins_pipe(ialu_reg_mem);
4856 %}
4857 
4858 // Load Short (16 bit signed) into Long Register
4859 instruct loadS2L(rRegL dst, memory mem)
4860 %{
4861   match(Set dst (ConvI2L (LoadS mem)));
4862 
4863   ins_cost(125);
4864   format %{ "movswq $dst, $mem\t# short -> long" %}
4865 
4866   ins_encode %{
4867     __ movswq($dst$$Register, $mem$$Address);
4868   %}
4869 
4870   ins_pipe(ialu_reg_mem);
4871 %}
4872 
4873 // Load Unsigned Short/Char (16 bit UNsigned)
4874 instruct loadUS(rRegI dst, memory mem)
4875 %{
4876   match(Set dst (LoadUS mem));
4877 
4878   ins_cost(125);
4879   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
4880 
4881   ins_encode %{
4882     __ movzwl($dst$$Register, $mem$$Address);
4883   %}
4884 
4885   ins_pipe(ialu_reg_mem);
4886 %}
4887 
4888 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
4889 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4890   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
4891 
4892   ins_cost(125);
4893   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
4894   ins_encode %{
4895     __ movsbl($dst$$Register, $mem$$Address);
4896   %}
4897   ins_pipe(ialu_reg_mem);
4898 %}
4899 
4900 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
4901 instruct loadUS2L(rRegL dst, memory mem)
4902 %{
4903   match(Set dst (ConvI2L (LoadUS mem)));
4904 
4905   ins_cost(125);
4906   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
4907 
4908   ins_encode %{
4909     __ movzwq($dst$$Register, $mem$$Address);
4910   %}
4911 
4912   ins_pipe(ialu_reg_mem);
4913 %}
4914 
4915 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
4916 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
4917   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4918 
4919   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
4920   ins_encode %{
4921     __ movzbq($dst$$Register, $mem$$Address);
4922   %}
4923   ins_pipe(ialu_reg_mem);
4924 %}
4925 
4926 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
4927 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
4928   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4929   effect(KILL cr);
4930 
4931   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
4932             "andl    $dst, $mask" %}
4933   ins_encode %{
4934     Register Rdst = $dst$$Register;
4935     __ movzwq(Rdst, $mem$$Address);
4936     __ andl(Rdst, $mask$$constant);
4937   %}
4938   ins_pipe(ialu_reg_mem);
4939 %}
4940 
4941 // Load Integer
4942 instruct loadI(rRegI dst, memory mem)
4943 %{
4944   match(Set dst (LoadI mem));
4945 
4946   ins_cost(125);
4947   format %{ "movl    $dst, $mem\t# int" %}
4948 
4949   ins_encode %{
4950     __ movl($dst$$Register, $mem$$Address);
4951   %}
4952 
4953   ins_pipe(ialu_reg_mem);
4954 %}
4955 
4956 // Load Integer (32 bit signed) to Byte (8 bit signed)
4957 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4958   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
4959 
4960   ins_cost(125);
4961   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
4962   ins_encode %{
4963     __ movsbl($dst$$Register, $mem$$Address);
4964   %}
4965   ins_pipe(ialu_reg_mem);
4966 %}
4967 
4968 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
4969 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
4970   match(Set dst (AndI (LoadI mem) mask));
4971 
4972   ins_cost(125);
4973   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
4974   ins_encode %{
4975     __ movzbl($dst$$Register, $mem$$Address);
4976   %}
4977   ins_pipe(ialu_reg_mem);
4978 %}
4979 
4980 // Load Integer (32 bit signed) to Short (16 bit signed)
4981 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
4982   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
4983 
4984   ins_cost(125);
4985   format %{ "movswl  $dst, $mem\t# int -> short" %}
4986   ins_encode %{
4987     __ movswl($dst$$Register, $mem$$Address);
4988   %}
4989   ins_pipe(ialu_reg_mem);
4990 %}
4991 
4992 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
4993 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
4994   match(Set dst (AndI (LoadI mem) mask));
4995 
4996   ins_cost(125);
4997   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
4998   ins_encode %{
4999     __ movzwl($dst$$Register, $mem$$Address);
5000   %}
5001   ins_pipe(ialu_reg_mem);
5002 %}
5003 
5004 // Load Integer into Long Register
5005 instruct loadI2L(rRegL dst, memory mem)
5006 %{
5007   match(Set dst (ConvI2L (LoadI mem)));
5008 
5009   ins_cost(125);
5010   format %{ "movslq  $dst, $mem\t# int -> long" %}
5011 
5012   ins_encode %{
5013     __ movslq($dst$$Register, $mem$$Address);
5014   %}
5015 
5016   ins_pipe(ialu_reg_mem);
5017 %}
5018 
5019 // Load Integer with mask 0xFF into Long Register
5020 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5021   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5022 
5023   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5024   ins_encode %{
5025     __ movzbq($dst$$Register, $mem$$Address);
5026   %}
5027   ins_pipe(ialu_reg_mem);
5028 %}
5029 
5030 // Load Integer with mask 0xFFFF into Long Register
5031 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5032   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5033 
5034   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5035   ins_encode %{
5036     __ movzwq($dst$$Register, $mem$$Address);
5037   %}
5038   ins_pipe(ialu_reg_mem);
5039 %}
5040 
5041 // Load Integer with a 32-bit mask into Long Register
5042 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5043   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5044   effect(KILL cr);
5045 
5046   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
5047             "andl    $dst, $mask" %}
5048   ins_encode %{
5049     Register Rdst = $dst$$Register;
5050     __ movl(Rdst, $mem$$Address);
5051     __ andl(Rdst, $mask$$constant);
5052   %}
5053   ins_pipe(ialu_reg_mem);
5054 %}
5055 
5056 // Load Unsigned Integer into Long Register
5057 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
5058 %{
5059   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5060 
5061   ins_cost(125);
5062   format %{ "movl    $dst, $mem\t# uint -> long" %}
5063 
5064   ins_encode %{
5065     __ movl($dst$$Register, $mem$$Address);
5066   %}
5067 
5068   ins_pipe(ialu_reg_mem);
5069 %}
5070 
5071 // Load Long
5072 instruct loadL(rRegL dst, memory mem)
5073 %{
5074   match(Set dst (LoadL mem));
5075 
5076   ins_cost(125);
5077   format %{ "movq    $dst, $mem\t# long" %}
5078 
5079   ins_encode %{
5080     __ movq($dst$$Register, $mem$$Address);
5081   %}
5082 
5083   ins_pipe(ialu_reg_mem); // XXX
5084 %}
5085 
5086 // Load Range
5087 instruct loadRange(rRegI dst, memory mem)
5088 %{
5089   match(Set dst (LoadRange mem));
5090 
5091   ins_cost(125); // XXX
5092   format %{ "movl    $dst, $mem\t# range" %}
5093   opcode(0x8B);
5094   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5095   ins_pipe(ialu_reg_mem);
5096 %}
5097 
5098 // Load Pointer
5099 instruct loadP(rRegP dst, memory mem)
5100 %{
5101   match(Set dst (LoadP mem));
5102 
5103   ins_cost(125); // XXX
5104   format %{ "movq    $dst, $mem\t# ptr" %}
5105   opcode(0x8B);
5106   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5107   ins_pipe(ialu_reg_mem); // XXX
5108 %}
5109 
5110 // Load Compressed Pointer
5111 instruct loadN(rRegN dst, memory mem)
5112 %{
5113    match(Set dst (LoadN mem));
5114 
5115    ins_cost(125); // XXX
5116    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5117    ins_encode %{
5118      __ movl($dst$$Register, $mem$$Address);
5119    %}
5120    ins_pipe(ialu_reg_mem); // XXX
5121 %}
5122 
5123 
5124 // Load Klass Pointer
5125 instruct loadKlass(rRegP dst, memory mem)
5126 %{
5127   match(Set dst (LoadKlass mem));
5128 
5129   ins_cost(125); // XXX
5130   format %{ "movq    $dst, $mem\t# class" %}
5131   opcode(0x8B);
5132   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5133   ins_pipe(ialu_reg_mem); // XXX
5134 %}
5135 
5136 // Load narrow Klass Pointer
5137 instruct loadNKlass(rRegN dst, memory mem)
5138 %{
5139   match(Set dst (LoadNKlass mem));
5140 
5141   ins_cost(125); // XXX
5142   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5143   ins_encode %{
5144     __ movl($dst$$Register, $mem$$Address);
5145   %}
5146   ins_pipe(ialu_reg_mem); // XXX
5147 %}
5148 
5149 // Load Float
5150 instruct loadF(regF dst, memory mem)
5151 %{
5152   match(Set dst (LoadF mem));
5153 
5154   ins_cost(145); // XXX
5155   format %{ "movss   $dst, $mem\t# float" %}
5156   ins_encode %{
5157     __ movflt($dst$$XMMRegister, $mem$$Address);
5158   %}
5159   ins_pipe(pipe_slow); // XXX
5160 %}
5161 
5162 // Load Double
5163 instruct loadD_partial(regD dst, memory mem)
5164 %{
5165   predicate(!UseXmmLoadAndClearUpper);
5166   match(Set dst (LoadD mem));
5167 
5168   ins_cost(145); // XXX
5169   format %{ "movlpd  $dst, $mem\t# double" %}
5170   ins_encode %{
5171     __ movdbl($dst$$XMMRegister, $mem$$Address);
5172   %}
5173   ins_pipe(pipe_slow); // XXX
5174 %}
5175 
5176 instruct loadD(regD dst, memory mem)
5177 %{
5178   predicate(UseXmmLoadAndClearUpper);
5179   match(Set dst (LoadD mem));
5180 
5181   ins_cost(145); // XXX
5182   format %{ "movsd   $dst, $mem\t# double" %}
5183   ins_encode %{
5184     __ movdbl($dst$$XMMRegister, $mem$$Address);
5185   %}
5186   ins_pipe(pipe_slow); // XXX
5187 %}
5188 
5189 // Load Effective Address
5190 instruct leaP8(rRegP dst, indOffset8 mem)
5191 %{
5192   match(Set dst mem);
5193 
5194   ins_cost(110); // XXX
5195   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5196   opcode(0x8D);
5197   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5198   ins_pipe(ialu_reg_reg_fat);
5199 %}
5200 
5201 instruct leaP32(rRegP dst, indOffset32 mem)
5202 %{
5203   match(Set dst mem);
5204 
5205   ins_cost(110);
5206   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5207   opcode(0x8D);
5208   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5209   ins_pipe(ialu_reg_reg_fat);
5210 %}
5211 
5212 // instruct leaPIdx(rRegP dst, indIndex mem)
5213 // %{
5214 //   match(Set dst mem);
5215 
5216 //   ins_cost(110);
5217 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5218 //   opcode(0x8D);
5219 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5220 //   ins_pipe(ialu_reg_reg_fat);
5221 // %}
5222 
5223 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5224 %{
5225   match(Set dst mem);
5226 
5227   ins_cost(110);
5228   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5229   opcode(0x8D);
5230   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5231   ins_pipe(ialu_reg_reg_fat);
5232 %}
5233 
5234 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5235 %{
5236   match(Set dst mem);
5237 
5238   ins_cost(110);
5239   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5240   opcode(0x8D);
5241   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5242   ins_pipe(ialu_reg_reg_fat);
5243 %}
5244 
5245 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5246 %{
5247   match(Set dst mem);
5248 
5249   ins_cost(110);
5250   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5251   opcode(0x8D);
5252   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5253   ins_pipe(ialu_reg_reg_fat);
5254 %}
5255 
5256 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5257 %{
5258   match(Set dst mem);
5259 
5260   ins_cost(110);
5261   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5262   opcode(0x8D);
5263   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5264   ins_pipe(ialu_reg_reg_fat);
5265 %}
5266 
5267 // Load Effective Address which uses Narrow (32-bits) oop
5268 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5269 %{
5270   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5271   match(Set dst mem);
5272 
5273   ins_cost(110);
5274   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5275   opcode(0x8D);
5276   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5277   ins_pipe(ialu_reg_reg_fat);
5278 %}
5279 
5280 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5281 %{
5282   predicate(Universe::narrow_oop_shift() == 0);
5283   match(Set dst mem);
5284 
5285   ins_cost(110); // XXX
5286   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5287   opcode(0x8D);
5288   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5289   ins_pipe(ialu_reg_reg_fat);
5290 %}
5291 
5292 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5293 %{
5294   predicate(Universe::narrow_oop_shift() == 0);
5295   match(Set dst mem);
5296 
5297   ins_cost(110);
5298   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5299   opcode(0x8D);
5300   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5301   ins_pipe(ialu_reg_reg_fat);
5302 %}
5303 
5304 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5305 %{
5306   predicate(Universe::narrow_oop_shift() == 0);
5307   match(Set dst mem);
5308 
5309   ins_cost(110);
5310   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5311   opcode(0x8D);
5312   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5313   ins_pipe(ialu_reg_reg_fat);
5314 %}
5315 
5316 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5317 %{
5318   predicate(Universe::narrow_oop_shift() == 0);
5319   match(Set dst mem);
5320 
5321   ins_cost(110);
5322   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5323   opcode(0x8D);
5324   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5325   ins_pipe(ialu_reg_reg_fat);
5326 %}
5327 
5328 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5329 %{
5330   predicate(Universe::narrow_oop_shift() == 0);
5331   match(Set dst mem);
5332 
5333   ins_cost(110);
5334   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5335   opcode(0x8D);
5336   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5337   ins_pipe(ialu_reg_reg_fat);
5338 %}
5339 
5340 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5341 %{
5342   predicate(Universe::narrow_oop_shift() == 0);
5343   match(Set dst mem);
5344 
5345   ins_cost(110);
5346   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5347   opcode(0x8D);
5348   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5349   ins_pipe(ialu_reg_reg_fat);
5350 %}
5351 
5352 instruct loadConI(rRegI dst, immI src)
5353 %{
5354   match(Set dst src);
5355 
5356   format %{ "movl    $dst, $src\t# int" %}
5357   ins_encode(load_immI(dst, src));
5358   ins_pipe(ialu_reg_fat); // XXX
5359 %}
5360 
5361 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5362 %{
5363   match(Set dst src);
5364   effect(KILL cr);
5365 
5366   ins_cost(50);
5367   format %{ "xorl    $dst, $dst\t# int" %}
5368   opcode(0x33); /* + rd */
5369   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5370   ins_pipe(ialu_reg);
5371 %}
5372 
5373 instruct loadConL(rRegL dst, immL src)
5374 %{
5375   match(Set dst src);
5376 
5377   ins_cost(150);
5378   format %{ "movq    $dst, $src\t# long" %}
5379   ins_encode(load_immL(dst, src));
5380   ins_pipe(ialu_reg);
5381 %}
5382 
5383 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5384 %{
5385   match(Set dst src);
5386   effect(KILL cr);
5387 
5388   ins_cost(50);
5389   format %{ "xorl    $dst, $dst\t# long" %}
5390   opcode(0x33); /* + rd */
5391   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5392   ins_pipe(ialu_reg); // XXX
5393 %}
5394 
5395 instruct loadConUL32(rRegL dst, immUL32 src)
5396 %{
5397   match(Set dst src);
5398 
5399   ins_cost(60);
5400   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5401   ins_encode(load_immUL32(dst, src));
5402   ins_pipe(ialu_reg);
5403 %}
5404 
5405 instruct loadConL32(rRegL dst, immL32 src)
5406 %{
5407   match(Set dst src);
5408 
5409   ins_cost(70);
5410   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5411   ins_encode(load_immL32(dst, src));
5412   ins_pipe(ialu_reg);
5413 %}
5414 
5415 instruct loadConP(rRegP dst, immP con) %{
5416   match(Set dst con);
5417 
5418   format %{ "movq    $dst, $con\t# ptr" %}
5419   ins_encode(load_immP(dst, con));
5420   ins_pipe(ialu_reg_fat); // XXX
5421 %}
5422 
5423 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5424 %{
5425   match(Set dst src);
5426   effect(KILL cr);
5427 
5428   ins_cost(50);
5429   format %{ "xorl    $dst, $dst\t# ptr" %}
5430   opcode(0x33); /* + rd */
5431   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5432   ins_pipe(ialu_reg);
5433 %}
5434 
5435 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5436 %{
5437   match(Set dst src);
5438   effect(KILL cr);
5439 
5440   ins_cost(60);
5441   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5442   ins_encode(load_immP31(dst, src));
5443   ins_pipe(ialu_reg);
5444 %}
5445 
5446 instruct loadConF(regF dst, immF con) %{
5447   match(Set dst con);
5448   ins_cost(125);
5449   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5450   ins_encode %{
5451     __ movflt($dst$$XMMRegister, $constantaddress($con));
5452   %}
5453   ins_pipe(pipe_slow);
5454 %}
5455 
5456 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5457   match(Set dst src);
5458   effect(KILL cr);
5459   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5460   ins_encode %{
5461     __ xorq($dst$$Register, $dst$$Register);
5462   %}
5463   ins_pipe(ialu_reg);
5464 %}
5465 
5466 instruct loadConN(rRegN dst, immN src) %{
5467   match(Set dst src);
5468 
5469   ins_cost(125);
5470   format %{ "movl    $dst, $src\t# compressed ptr" %}
5471   ins_encode %{
5472     address con = (address)$src$$constant;
5473     if (con == NULL) {
5474       ShouldNotReachHere();
5475     } else {
5476       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5477     }
5478   %}
5479   ins_pipe(ialu_reg_fat); // XXX
5480 %}
5481 
5482 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5483   match(Set dst src);
5484 
5485   ins_cost(125);
5486   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5487   ins_encode %{
5488     address con = (address)$src$$constant;
5489     if (con == NULL) {
5490       ShouldNotReachHere();
5491     } else {
5492       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5493     }
5494   %}
5495   ins_pipe(ialu_reg_fat); // XXX
5496 %}
5497 
5498 instruct loadConF0(regF dst, immF0 src)
5499 %{
5500   match(Set dst src);
5501   ins_cost(100);
5502 
5503   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5504   ins_encode %{
5505     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5506   %}
5507   ins_pipe(pipe_slow);
5508 %}
5509 
5510 // Use the same format since predicate() can not be used here.
5511 instruct loadConD(regD dst, immD con) %{
5512   match(Set dst con);
5513   ins_cost(125);
5514   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5515   ins_encode %{
5516     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5517   %}
5518   ins_pipe(pipe_slow);
5519 %}
5520 
5521 instruct loadConD0(regD dst, immD0 src)
5522 %{
5523   match(Set dst src);
5524   ins_cost(100);
5525 
5526   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5527   ins_encode %{
5528     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5529   %}
5530   ins_pipe(pipe_slow);
5531 %}
5532 
5533 instruct loadSSI(rRegI dst, stackSlotI src)
5534 %{
5535   match(Set dst src);
5536 
5537   ins_cost(125);
5538   format %{ "movl    $dst, $src\t# int stk" %}
5539   opcode(0x8B);
5540   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5541   ins_pipe(ialu_reg_mem);
5542 %}
5543 
5544 instruct loadSSL(rRegL dst, stackSlotL src)
5545 %{
5546   match(Set dst src);
5547 
5548   ins_cost(125);
5549   format %{ "movq    $dst, $src\t# long stk" %}
5550   opcode(0x8B);
5551   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5552   ins_pipe(ialu_reg_mem);
5553 %}
5554 
5555 instruct loadSSP(rRegP dst, stackSlotP src)
5556 %{
5557   match(Set dst src);
5558 
5559   ins_cost(125);
5560   format %{ "movq    $dst, $src\t# ptr stk" %}
5561   opcode(0x8B);
5562   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5563   ins_pipe(ialu_reg_mem);
5564 %}
5565 
5566 instruct loadSSF(regF dst, stackSlotF src)
5567 %{
5568   match(Set dst src);
5569 
5570   ins_cost(125);
5571   format %{ "movss   $dst, $src\t# float stk" %}
5572   ins_encode %{
5573     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5574   %}
5575   ins_pipe(pipe_slow); // XXX
5576 %}
5577 
5578 // Use the same format since predicate() can not be used here.
5579 instruct loadSSD(regD dst, stackSlotD src)
5580 %{
5581   match(Set dst src);
5582 
5583   ins_cost(125);
5584   format %{ "movsd   $dst, $src\t# double stk" %}
5585   ins_encode  %{
5586     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5587   %}
5588   ins_pipe(pipe_slow); // XXX
5589 %}
5590 
5591 // Prefetch instructions.
5592 // Must be safe to execute with invalid address (cannot fault).
5593 
5594 instruct prefetchr( memory mem ) %{
5595   predicate(ReadPrefetchInstr==3);
5596   match(PrefetchRead mem);
5597   ins_cost(125);
5598 
5599   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5600   ins_encode %{
5601     __ prefetchr($mem$$Address);
5602   %}
5603   ins_pipe(ialu_mem);
5604 %}
5605 
5606 instruct prefetchrNTA( memory mem ) %{
5607   predicate(ReadPrefetchInstr==0);
5608   match(PrefetchRead mem);
5609   ins_cost(125);
5610 
5611   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5612   ins_encode %{
5613     __ prefetchnta($mem$$Address);
5614   %}
5615   ins_pipe(ialu_mem);
5616 %}
5617 
5618 instruct prefetchrT0( memory mem ) %{
5619   predicate(ReadPrefetchInstr==1);
5620   match(PrefetchRead mem);
5621   ins_cost(125);
5622 
5623   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5624   ins_encode %{
5625     __ prefetcht0($mem$$Address);
5626   %}
5627   ins_pipe(ialu_mem);
5628 %}
5629 
5630 instruct prefetchrT2( memory mem ) %{
5631   predicate(ReadPrefetchInstr==2);
5632   match(PrefetchRead mem);
5633   ins_cost(125);
5634 
5635   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5636   ins_encode %{
5637     __ prefetcht2($mem$$Address);
5638   %}
5639   ins_pipe(ialu_mem);
5640 %}
5641 
5642 instruct prefetchwNTA( memory mem ) %{
5643   match(PrefetchWrite mem);
5644   ins_cost(125);
5645 
5646   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5647   ins_encode %{
5648     __ prefetchnta($mem$$Address);
5649   %}
5650   ins_pipe(ialu_mem);
5651 %}
5652 
5653 // Prefetch instructions for allocation.
5654 
5655 instruct prefetchAlloc( memory mem ) %{
5656   predicate(AllocatePrefetchInstr==3);
5657   match(PrefetchAllocation mem);
5658   ins_cost(125);
5659 
5660   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5661   ins_encode %{
5662     __ prefetchw($mem$$Address);
5663   %}
5664   ins_pipe(ialu_mem);
5665 %}
5666 
5667 instruct prefetchAllocNTA( memory mem ) %{
5668   predicate(AllocatePrefetchInstr==0);
5669   match(PrefetchAllocation mem);
5670   ins_cost(125);
5671 
5672   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5673   ins_encode %{
5674     __ prefetchnta($mem$$Address);
5675   %}
5676   ins_pipe(ialu_mem);
5677 %}
5678 
5679 instruct prefetchAllocT0( memory mem ) %{
5680   predicate(AllocatePrefetchInstr==1);
5681   match(PrefetchAllocation mem);
5682   ins_cost(125);
5683 
5684   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5685   ins_encode %{
5686     __ prefetcht0($mem$$Address);
5687   %}
5688   ins_pipe(ialu_mem);
5689 %}
5690 
5691 instruct prefetchAllocT2( memory mem ) %{
5692   predicate(AllocatePrefetchInstr==2);
5693   match(PrefetchAllocation mem);
5694   ins_cost(125);
5695 
5696   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5697   ins_encode %{
5698     __ prefetcht2($mem$$Address);
5699   %}
5700   ins_pipe(ialu_mem);
5701 %}
5702 
5703 //----------Store Instructions-------------------------------------------------
5704 
5705 // Store Byte
5706 instruct storeB(memory mem, rRegI src)
5707 %{
5708   match(Set mem (StoreB mem src));
5709 
5710   ins_cost(125); // XXX
5711   format %{ "movb    $mem, $src\t# byte" %}
5712   opcode(0x88);
5713   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5714   ins_pipe(ialu_mem_reg);
5715 %}
5716 
5717 // Store Char/Short
5718 instruct storeC(memory mem, rRegI src)
5719 %{
5720   match(Set mem (StoreC mem src));
5721 
5722   ins_cost(125); // XXX
5723   format %{ "movw    $mem, $src\t# char/short" %}
5724   opcode(0x89);
5725   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5726   ins_pipe(ialu_mem_reg);
5727 %}
5728 
5729 // Store Integer
5730 instruct storeI(memory mem, rRegI src)
5731 %{
5732   match(Set mem (StoreI mem src));
5733 
5734   ins_cost(125); // XXX
5735   format %{ "movl    $mem, $src\t# int" %}
5736   opcode(0x89);
5737   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5738   ins_pipe(ialu_mem_reg);
5739 %}
5740 
5741 // Store Long
5742 instruct storeL(memory mem, rRegL src)
5743 %{
5744   match(Set mem (StoreL mem src));
5745 
5746   ins_cost(125); // XXX
5747   format %{ "movq    $mem, $src\t# long" %}
5748   opcode(0x89);
5749   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5750   ins_pipe(ialu_mem_reg); // XXX
5751 %}
5752 
5753 // Store Pointer
5754 instruct storeP(memory mem, any_RegP src)
5755 %{
5756   match(Set mem (StoreP mem src));
5757 
5758   ins_cost(125); // XXX
5759   format %{ "movq    $mem, $src\t# ptr" %}
5760   opcode(0x89);
5761   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5762   ins_pipe(ialu_mem_reg);
5763 %}
5764 
5765 instruct storeImmP0(memory mem, immP0 zero)
5766 %{
5767   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5768   match(Set mem (StoreP mem zero));
5769 
5770   ins_cost(125); // XXX
5771   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5772   ins_encode %{
5773     __ movq($mem$$Address, r12);
5774   %}
5775   ins_pipe(ialu_mem_reg);
5776 %}
5777 
5778 // Store NULL Pointer, mark word, or other simple pointer constant.
5779 instruct storeImmP(memory mem, immP31 src)
5780 %{
5781   match(Set mem (StoreP mem src));
5782 
5783   ins_cost(150); // XXX
5784   format %{ "movq    $mem, $src\t# ptr" %}
5785   opcode(0xC7); /* C7 /0 */
5786   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5787   ins_pipe(ialu_mem_imm);
5788 %}
5789 
5790 // Store Compressed Pointer
5791 instruct storeN(memory mem, rRegN src)
5792 %{
5793   match(Set mem (StoreN mem src));
5794 
5795   ins_cost(125); // XXX
5796   format %{ "movl    $mem, $src\t# compressed ptr" %}
5797   ins_encode %{
5798     __ movl($mem$$Address, $src$$Register);
5799   %}
5800   ins_pipe(ialu_mem_reg);
5801 %}
5802 
5803 instruct storeNKlass(memory mem, rRegN src)
5804 %{
5805   match(Set mem (StoreNKlass mem src));
5806 
5807   ins_cost(125); // XXX
5808   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5809   ins_encode %{
5810     __ movl($mem$$Address, $src$$Register);
5811   %}
5812   ins_pipe(ialu_mem_reg);
5813 %}
5814 
5815 instruct storeImmN0(memory mem, immN0 zero)
5816 %{
5817   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5818   match(Set mem (StoreN mem zero));
5819 
5820   ins_cost(125); // XXX
5821   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5822   ins_encode %{
5823     __ movl($mem$$Address, r12);
5824   %}
5825   ins_pipe(ialu_mem_reg);
5826 %}
5827 
5828 instruct storeImmN(memory mem, immN src)
5829 %{
5830   match(Set mem (StoreN mem src));
5831 
5832   ins_cost(150); // XXX
5833   format %{ "movl    $mem, $src\t# compressed ptr" %}
5834   ins_encode %{
5835     address con = (address)$src$$constant;
5836     if (con == NULL) {
5837       __ movl($mem$$Address, (int32_t)0);
5838     } else {
5839       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5840     }
5841   %}
5842   ins_pipe(ialu_mem_imm);
5843 %}
5844 
5845 instruct storeImmNKlass(memory mem, immNKlass src)
5846 %{
5847   match(Set mem (StoreNKlass mem src));
5848 
5849   ins_cost(150); // XXX
5850   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5851   ins_encode %{
5852     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5853   %}
5854   ins_pipe(ialu_mem_imm);
5855 %}
5856 
5857 // Store Integer Immediate
5858 instruct storeImmI0(memory mem, immI0 zero)
5859 %{
5860   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5861   match(Set mem (StoreI mem zero));
5862 
5863   ins_cost(125); // XXX
5864   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
5865   ins_encode %{
5866     __ movl($mem$$Address, r12);
5867   %}
5868   ins_pipe(ialu_mem_reg);
5869 %}
5870 
5871 instruct storeImmI(memory mem, immI src)
5872 %{
5873   match(Set mem (StoreI mem src));
5874 
5875   ins_cost(150);
5876   format %{ "movl    $mem, $src\t# int" %}
5877   opcode(0xC7); /* C7 /0 */
5878   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5879   ins_pipe(ialu_mem_imm);
5880 %}
5881 
5882 // Store Long Immediate
5883 instruct storeImmL0(memory mem, immL0 zero)
5884 %{
5885   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5886   match(Set mem (StoreL mem zero));
5887 
5888   ins_cost(125); // XXX
5889   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
5890   ins_encode %{
5891     __ movq($mem$$Address, r12);
5892   %}
5893   ins_pipe(ialu_mem_reg);
5894 %}
5895 
5896 instruct storeImmL(memory mem, immL32 src)
5897 %{
5898   match(Set mem (StoreL mem src));
5899 
5900   ins_cost(150);
5901   format %{ "movq    $mem, $src\t# long" %}
5902   opcode(0xC7); /* C7 /0 */
5903   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5904   ins_pipe(ialu_mem_imm);
5905 %}
5906 
5907 // Store Short/Char Immediate
5908 instruct storeImmC0(memory mem, immI0 zero)
5909 %{
5910   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5911   match(Set mem (StoreC mem zero));
5912 
5913   ins_cost(125); // XXX
5914   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
5915   ins_encode %{
5916     __ movw($mem$$Address, r12);
5917   %}
5918   ins_pipe(ialu_mem_reg);
5919 %}
5920 
5921 instruct storeImmI16(memory mem, immI16 src)
5922 %{
5923   predicate(UseStoreImmI16);
5924   match(Set mem (StoreC mem src));
5925 
5926   ins_cost(150);
5927   format %{ "movw    $mem, $src\t# short/char" %}
5928   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
5929   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
5930   ins_pipe(ialu_mem_imm);
5931 %}
5932 
5933 // Store Byte Immediate
5934 instruct storeImmB0(memory mem, immI0 zero)
5935 %{
5936   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5937   match(Set mem (StoreB mem zero));
5938 
5939   ins_cost(125); // XXX
5940   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
5941   ins_encode %{
5942     __ movb($mem$$Address, r12);
5943   %}
5944   ins_pipe(ialu_mem_reg);
5945 %}
5946 
5947 instruct storeImmB(memory mem, immI8 src)
5948 %{
5949   match(Set mem (StoreB mem src));
5950 
5951   ins_cost(150); // XXX
5952   format %{ "movb    $mem, $src\t# byte" %}
5953   opcode(0xC6); /* C6 /0 */
5954   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
5955   ins_pipe(ialu_mem_imm);
5956 %}
5957 
5958 // Store CMS card-mark Immediate
5959 instruct storeImmCM0_reg(memory mem, immI0 zero)
5960 %{
5961   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5962   match(Set mem (StoreCM mem zero));
5963 
5964   ins_cost(125); // XXX
5965   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
5966   ins_encode %{
5967     __ movb($mem$$Address, r12);
5968   %}
5969   ins_pipe(ialu_mem_reg);
5970 %}
5971 
5972 instruct storeImmCM0(memory mem, immI0 src)
5973 %{
5974   match(Set mem (StoreCM mem src));
5975 
5976   ins_cost(150); // XXX
5977   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
5978   opcode(0xC6); /* C6 /0 */
5979   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
5980   ins_pipe(ialu_mem_imm);
5981 %}
5982 
5983 // Store Float
5984 instruct storeF(memory mem, regF src)
5985 %{
5986   match(Set mem (StoreF mem src));
5987 
5988   ins_cost(95); // XXX
5989   format %{ "movss   $mem, $src\t# float" %}
5990   ins_encode %{
5991     __ movflt($mem$$Address, $src$$XMMRegister);
5992   %}
5993   ins_pipe(pipe_slow); // XXX
5994 %}
5995 
5996 // Store immediate Float value (it is faster than store from XMM register)
5997 instruct storeF0(memory mem, immF0 zero)
5998 %{
5999   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6000   match(Set mem (StoreF mem zero));
6001 
6002   ins_cost(25); // XXX
6003   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6004   ins_encode %{
6005     __ movl($mem$$Address, r12);
6006   %}
6007   ins_pipe(ialu_mem_reg);
6008 %}
6009 
6010 instruct storeF_imm(memory mem, immF src)
6011 %{
6012   match(Set mem (StoreF mem src));
6013 
6014   ins_cost(50);
6015   format %{ "movl    $mem, $src\t# float" %}
6016   opcode(0xC7); /* C7 /0 */
6017   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6018   ins_pipe(ialu_mem_imm);
6019 %}
6020 
6021 // Store Double
6022 instruct storeD(memory mem, regD src)
6023 %{
6024   match(Set mem (StoreD mem src));
6025 
6026   ins_cost(95); // XXX
6027   format %{ "movsd   $mem, $src\t# double" %}
6028   ins_encode %{
6029     __ movdbl($mem$$Address, $src$$XMMRegister);
6030   %}
6031   ins_pipe(pipe_slow); // XXX
6032 %}
6033 
6034 // Store immediate double 0.0 (it is faster than store from XMM register)
6035 instruct storeD0_imm(memory mem, immD0 src)
6036 %{
6037   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6038   match(Set mem (StoreD mem src));
6039 
6040   ins_cost(50);
6041   format %{ "movq    $mem, $src\t# double 0." %}
6042   opcode(0xC7); /* C7 /0 */
6043   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6044   ins_pipe(ialu_mem_imm);
6045 %}
6046 
6047 instruct storeD0(memory mem, immD0 zero)
6048 %{
6049   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6050   match(Set mem (StoreD mem zero));
6051 
6052   ins_cost(25); // XXX
6053   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6054   ins_encode %{
6055     __ movq($mem$$Address, r12);
6056   %}
6057   ins_pipe(ialu_mem_reg);
6058 %}
6059 
6060 instruct storeSSI(stackSlotI dst, rRegI src)
6061 %{
6062   match(Set dst src);
6063 
6064   ins_cost(100);
6065   format %{ "movl    $dst, $src\t# int stk" %}
6066   opcode(0x89);
6067   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6068   ins_pipe( ialu_mem_reg );
6069 %}
6070 
6071 instruct storeSSL(stackSlotL dst, rRegL src)
6072 %{
6073   match(Set dst src);
6074 
6075   ins_cost(100);
6076   format %{ "movq    $dst, $src\t# long stk" %}
6077   opcode(0x89);
6078   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6079   ins_pipe(ialu_mem_reg);
6080 %}
6081 
6082 instruct storeSSP(stackSlotP dst, rRegP src)
6083 %{
6084   match(Set dst src);
6085 
6086   ins_cost(100);
6087   format %{ "movq    $dst, $src\t# ptr stk" %}
6088   opcode(0x89);
6089   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6090   ins_pipe(ialu_mem_reg);
6091 %}
6092 
6093 instruct storeSSF(stackSlotF dst, regF src)
6094 %{
6095   match(Set dst src);
6096 
6097   ins_cost(95); // XXX
6098   format %{ "movss   $dst, $src\t# float stk" %}
6099   ins_encode %{
6100     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6101   %}
6102   ins_pipe(pipe_slow); // XXX
6103 %}
6104 
6105 instruct storeSSD(stackSlotD dst, regD src)
6106 %{
6107   match(Set dst src);
6108 
6109   ins_cost(95); // XXX
6110   format %{ "movsd   $dst, $src\t# double stk" %}
6111   ins_encode %{
6112     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6113   %}
6114   ins_pipe(pipe_slow); // XXX
6115 %}
6116 
6117 //----------BSWAP Instructions-------------------------------------------------
6118 instruct bytes_reverse_int(rRegI dst) %{
6119   match(Set dst (ReverseBytesI dst));
6120 
6121   format %{ "bswapl  $dst" %}
6122   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6123   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6124   ins_pipe( ialu_reg );
6125 %}
6126 
6127 instruct bytes_reverse_long(rRegL dst) %{
6128   match(Set dst (ReverseBytesL dst));
6129 
6130   format %{ "bswapq  $dst" %}
6131   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6132   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6133   ins_pipe( ialu_reg);
6134 %}
6135 
6136 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6137   match(Set dst (ReverseBytesUS dst));
6138   effect(KILL cr);
6139 
6140   format %{ "bswapl  $dst\n\t"
6141             "shrl    $dst,16\n\t" %}
6142   ins_encode %{
6143     __ bswapl($dst$$Register);
6144     __ shrl($dst$$Register, 16);
6145   %}
6146   ins_pipe( ialu_reg );
6147 %}
6148 
6149 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6150   match(Set dst (ReverseBytesS dst));
6151   effect(KILL cr);
6152 
6153   format %{ "bswapl  $dst\n\t"
6154             "sar     $dst,16\n\t" %}
6155   ins_encode %{
6156     __ bswapl($dst$$Register);
6157     __ sarl($dst$$Register, 16);
6158   %}
6159   ins_pipe( ialu_reg );
6160 %}
6161 
6162 //---------- Zeros Count Instructions ------------------------------------------
6163 
6164 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6165   predicate(UseCountLeadingZerosInstruction);
6166   match(Set dst (CountLeadingZerosI src));
6167   effect(KILL cr);
6168 
6169   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6170   ins_encode %{
6171     __ lzcntl($dst$$Register, $src$$Register);
6172   %}
6173   ins_pipe(ialu_reg);
6174 %}
6175 
6176 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6177   predicate(!UseCountLeadingZerosInstruction);
6178   match(Set dst (CountLeadingZerosI src));
6179   effect(KILL cr);
6180 
6181   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6182             "jnz     skip\n\t"
6183             "movl    $dst, -1\n"
6184       "skip:\n\t"
6185             "negl    $dst\n\t"
6186             "addl    $dst, 31" %}
6187   ins_encode %{
6188     Register Rdst = $dst$$Register;
6189     Register Rsrc = $src$$Register;
6190     Label skip;
6191     __ bsrl(Rdst, Rsrc);
6192     __ jccb(Assembler::notZero, skip);
6193     __ movl(Rdst, -1);
6194     __ bind(skip);
6195     __ negl(Rdst);
6196     __ addl(Rdst, BitsPerInt - 1);
6197   %}
6198   ins_pipe(ialu_reg);
6199 %}
6200 
6201 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6202   predicate(UseCountLeadingZerosInstruction);
6203   match(Set dst (CountLeadingZerosL src));
6204   effect(KILL cr);
6205 
6206   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6207   ins_encode %{
6208     __ lzcntq($dst$$Register, $src$$Register);
6209   %}
6210   ins_pipe(ialu_reg);
6211 %}
6212 
6213 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6214   predicate(!UseCountLeadingZerosInstruction);
6215   match(Set dst (CountLeadingZerosL src));
6216   effect(KILL cr);
6217 
6218   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6219             "jnz     skip\n\t"
6220             "movl    $dst, -1\n"
6221       "skip:\n\t"
6222             "negl    $dst\n\t"
6223             "addl    $dst, 63" %}
6224   ins_encode %{
6225     Register Rdst = $dst$$Register;
6226     Register Rsrc = $src$$Register;
6227     Label skip;
6228     __ bsrq(Rdst, Rsrc);
6229     __ jccb(Assembler::notZero, skip);
6230     __ movl(Rdst, -1);
6231     __ bind(skip);
6232     __ negl(Rdst);
6233     __ addl(Rdst, BitsPerLong - 1);
6234   %}
6235   ins_pipe(ialu_reg);
6236 %}
6237 
6238 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6239   match(Set dst (CountTrailingZerosI src));
6240   effect(KILL cr);
6241 
6242   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6243             "jnz     done\n\t"
6244             "movl    $dst, 32\n"
6245       "done:" %}
6246   ins_encode %{
6247     Register Rdst = $dst$$Register;
6248     Label done;
6249     __ bsfl(Rdst, $src$$Register);
6250     __ jccb(Assembler::notZero, done);
6251     __ movl(Rdst, BitsPerInt);
6252     __ bind(done);
6253   %}
6254   ins_pipe(ialu_reg);
6255 %}
6256 
6257 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6258   match(Set dst (CountTrailingZerosL src));
6259   effect(KILL cr);
6260 
6261   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6262             "jnz     done\n\t"
6263             "movl    $dst, 64\n"
6264       "done:" %}
6265   ins_encode %{
6266     Register Rdst = $dst$$Register;
6267     Label done;
6268     __ bsfq(Rdst, $src$$Register);
6269     __ jccb(Assembler::notZero, done);
6270     __ movl(Rdst, BitsPerLong);
6271     __ bind(done);
6272   %}
6273   ins_pipe(ialu_reg);
6274 %}
6275 
6276 
6277 //---------- Population Count Instructions -------------------------------------
6278 
6279 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6280   predicate(UsePopCountInstruction);
6281   match(Set dst (PopCountI src));
6282   effect(KILL cr);
6283 
6284   format %{ "popcnt  $dst, $src" %}
6285   ins_encode %{
6286     __ popcntl($dst$$Register, $src$$Register);
6287   %}
6288   ins_pipe(ialu_reg);
6289 %}
6290 
6291 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6292   predicate(UsePopCountInstruction);
6293   match(Set dst (PopCountI (LoadI mem)));
6294   effect(KILL cr);
6295 
6296   format %{ "popcnt  $dst, $mem" %}
6297   ins_encode %{
6298     __ popcntl($dst$$Register, $mem$$Address);
6299   %}
6300   ins_pipe(ialu_reg);
6301 %}
6302 
6303 // Note: Long.bitCount(long) returns an int.
6304 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6305   predicate(UsePopCountInstruction);
6306   match(Set dst (PopCountL src));
6307   effect(KILL cr);
6308 
6309   format %{ "popcnt  $dst, $src" %}
6310   ins_encode %{
6311     __ popcntq($dst$$Register, $src$$Register);
6312   %}
6313   ins_pipe(ialu_reg);
6314 %}
6315 
6316 // Note: Long.bitCount(long) returns an int.
6317 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6318   predicate(UsePopCountInstruction);
6319   match(Set dst (PopCountL (LoadL mem)));
6320   effect(KILL cr);
6321 
6322   format %{ "popcnt  $dst, $mem" %}
6323   ins_encode %{
6324     __ popcntq($dst$$Register, $mem$$Address);
6325   %}
6326   ins_pipe(ialu_reg);
6327 %}
6328 
6329 
6330 //----------MemBar Instructions-----------------------------------------------
6331 // Memory barrier flavors
6332 
6333 instruct membar_acquire()
6334 %{
6335   match(MemBarAcquire);
6336   ins_cost(0);
6337 
6338   size(0);
6339   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6340   ins_encode();
6341   ins_pipe(empty);
6342 %}
6343 
6344 instruct membar_acquire_lock()
6345 %{
6346   match(MemBarAcquireLock);
6347   ins_cost(0);
6348 
6349   size(0);
6350   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6351   ins_encode();
6352   ins_pipe(empty);
6353 %}
6354 
6355 instruct membar_release()
6356 %{
6357   match(MemBarRelease);
6358   ins_cost(0);
6359 
6360   size(0);
6361   format %{ "MEMBAR-release ! (empty encoding)" %}
6362   ins_encode();
6363   ins_pipe(empty);
6364 %}
6365 
6366 instruct membar_release_lock()
6367 %{
6368   match(MemBarReleaseLock);
6369   ins_cost(0);
6370 
6371   size(0);
6372   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6373   ins_encode();
6374   ins_pipe(empty);
6375 %}
6376 
6377 instruct membar_volatile(rFlagsReg cr) %{
6378   match(MemBarVolatile);
6379   effect(KILL cr);
6380   ins_cost(400);
6381 
6382   format %{
6383     $$template
6384     if (os::is_MP()) {
6385       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6386     } else {
6387       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6388     }
6389   %}
6390   ins_encode %{
6391     __ membar(Assembler::StoreLoad);
6392   %}
6393   ins_pipe(pipe_slow);
6394 %}
6395 
6396 instruct unnecessary_membar_volatile()
6397 %{
6398   match(MemBarVolatile);
6399   predicate(Matcher::post_store_load_barrier(n));
6400   ins_cost(0);
6401 
6402   size(0);
6403   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6404   ins_encode();
6405   ins_pipe(empty);
6406 %}
6407 
6408 instruct membar_storestore() %{
6409   match(MemBarStoreStore);
6410   ins_cost(0);
6411 
6412   size(0);
6413   format %{ "MEMBAR-storestore (empty encoding)" %}
6414   ins_encode( );
6415   ins_pipe(empty);
6416 %}
6417 
6418 //----------Move Instructions--------------------------------------------------
6419 
6420 instruct castX2P(rRegP dst, rRegL src)
6421 %{
6422   match(Set dst (CastX2P src));
6423 
6424   format %{ "movq    $dst, $src\t# long->ptr" %}
6425   ins_encode %{
6426     if ($dst$$reg != $src$$reg) {
6427       __ movptr($dst$$Register, $src$$Register);
6428     }
6429   %}
6430   ins_pipe(ialu_reg_reg); // XXX
6431 %}
6432 
6433 instruct castP2X(rRegL dst, rRegP src)
6434 %{
6435   match(Set dst (CastP2X src));
6436 
6437   format %{ "movq    $dst, $src\t# ptr -> long" %}
6438   ins_encode %{
6439     if ($dst$$reg != $src$$reg) {
6440       __ movptr($dst$$Register, $src$$Register);
6441     }
6442   %}
6443   ins_pipe(ialu_reg_reg); // XXX
6444 %}
6445 
6446 // Convert oop into int for vectors alignment masking
6447 instruct convP2I(rRegI dst, rRegP src)
6448 %{
6449   match(Set dst (ConvL2I (CastP2X src)));
6450 
6451   format %{ "movl    $dst, $src\t# ptr -> int" %}
6452   ins_encode %{
6453     __ movl($dst$$Register, $src$$Register);
6454   %}
6455   ins_pipe(ialu_reg_reg); // XXX
6456 %}
6457 
6458 // Convert compressed oop into int for vectors alignment masking
6459 // in case of 32bit oops (heap < 4Gb).
6460 instruct convN2I(rRegI dst, rRegN src)
6461 %{
6462   predicate(Universe::narrow_oop_shift() == 0);
6463   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6464 
6465   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6466   ins_encode %{
6467     __ movl($dst$$Register, $src$$Register);
6468   %}
6469   ins_pipe(ialu_reg_reg); // XXX
6470 %}
6471 
6472 // Convert oop pointer into compressed form
6473 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6474   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6475   match(Set dst (EncodeP src));
6476   effect(KILL cr);
6477   format %{ "encode_heap_oop $dst,$src" %}
6478   ins_encode %{
6479     Register s = $src$$Register;
6480     Register d = $dst$$Register;
6481     if (s != d) {
6482       __ movq(d, s);
6483     }
6484     __ encode_heap_oop(d);
6485   %}
6486   ins_pipe(ialu_reg_long);
6487 %}
6488 
6489 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6490   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6491   match(Set dst (EncodeP src));
6492   effect(KILL cr);
6493   format %{ "encode_heap_oop_not_null $dst,$src" %}
6494   ins_encode %{
6495     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6496   %}
6497   ins_pipe(ialu_reg_long);
6498 %}
6499 
6500 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6501   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6502             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6503   match(Set dst (DecodeN src));
6504   effect(KILL cr);
6505   format %{ "decode_heap_oop $dst,$src" %}
6506   ins_encode %{
6507     Register s = $src$$Register;
6508     Register d = $dst$$Register;
6509     if (s != d) {
6510       __ movq(d, s);
6511     }
6512     __ decode_heap_oop(d);
6513   %}
6514   ins_pipe(ialu_reg_long);
6515 %}
6516 
6517 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6518   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6519             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6520   match(Set dst (DecodeN src));
6521   effect(KILL cr);
6522   format %{ "decode_heap_oop_not_null $dst,$src" %}
6523   ins_encode %{
6524     Register s = $src$$Register;
6525     Register d = $dst$$Register;
6526     if (s != d) {
6527       __ decode_heap_oop_not_null(d, s);
6528     } else {
6529       __ decode_heap_oop_not_null(d);
6530     }
6531   %}
6532   ins_pipe(ialu_reg_long);
6533 %}
6534 
6535 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6536   match(Set dst (EncodePKlass src));
6537   effect(KILL cr);
6538   format %{ "encode_klass_not_null $dst,$src" %}
6539   ins_encode %{
6540     __ encode_klass_not_null($dst$$Register, $src$$Register);
6541   %}
6542   ins_pipe(ialu_reg_long);
6543 %}
6544 
6545 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6546   match(Set dst (DecodeNKlass src));
6547   effect(KILL cr);
6548   format %{ "decode_klass_not_null $dst,$src" %}
6549   ins_encode %{
6550     Register s = $src$$Register;
6551     Register d = $dst$$Register;
6552     if (s != d) {
6553       __ decode_klass_not_null(d, s);
6554     } else {
6555       __ decode_klass_not_null(d);
6556     }
6557   %}
6558   ins_pipe(ialu_reg_long);
6559 %}
6560 
6561 
6562 //----------Conditional Move---------------------------------------------------
6563 // Jump
6564 // dummy instruction for generating temp registers
6565 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6566   match(Jump (LShiftL switch_val shift));
6567   ins_cost(350);
6568   predicate(false);
6569   effect(TEMP dest);
6570 
6571   format %{ "leaq    $dest, [$constantaddress]\n\t"
6572             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6573   ins_encode %{
6574     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6575     // to do that and the compiler is using that register as one it can allocate.
6576     // So we build it all by hand.
6577     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6578     // ArrayAddress dispatch(table, index);
6579     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6580     __ lea($dest$$Register, $constantaddress);
6581     __ jmp(dispatch);
6582   %}
6583   ins_pipe(pipe_jmp);
6584 %}
6585 
6586 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6587   match(Jump (AddL (LShiftL switch_val shift) offset));
6588   ins_cost(350);
6589   effect(TEMP dest);
6590 
6591   format %{ "leaq    $dest, [$constantaddress]\n\t"
6592             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6593   ins_encode %{
6594     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6595     // to do that and the compiler is using that register as one it can allocate.
6596     // So we build it all by hand.
6597     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6598     // ArrayAddress dispatch(table, index);
6599     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6600     __ lea($dest$$Register, $constantaddress);
6601     __ jmp(dispatch);
6602   %}
6603   ins_pipe(pipe_jmp);
6604 %}
6605 
6606 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6607   match(Jump switch_val);
6608   ins_cost(350);
6609   effect(TEMP dest);
6610 
6611   format %{ "leaq    $dest, [$constantaddress]\n\t"
6612             "jmp     [$dest + $switch_val]\n\t" %}
6613   ins_encode %{
6614     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6615     // to do that and the compiler is using that register as one it can allocate.
6616     // So we build it all by hand.
6617     // Address index(noreg, switch_reg, Address::times_1);
6618     // ArrayAddress dispatch(table, index);
6619     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6620     __ lea($dest$$Register, $constantaddress);
6621     __ jmp(dispatch);
6622   %}
6623   ins_pipe(pipe_jmp);
6624 %}
6625 
6626 // Conditional move
6627 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6628 %{
6629   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6630 
6631   ins_cost(200); // XXX
6632   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6633   opcode(0x0F, 0x40);
6634   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6635   ins_pipe(pipe_cmov_reg);
6636 %}
6637 
6638 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6639   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6640 
6641   ins_cost(200); // XXX
6642   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6643   opcode(0x0F, 0x40);
6644   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6645   ins_pipe(pipe_cmov_reg);
6646 %}
6647 
6648 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6649   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6650   ins_cost(200);
6651   expand %{
6652     cmovI_regU(cop, cr, dst, src);
6653   %}
6654 %}
6655 
6656 // Conditional move
6657 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6658   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6659 
6660   ins_cost(250); // XXX
6661   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6662   opcode(0x0F, 0x40);
6663   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6664   ins_pipe(pipe_cmov_mem);
6665 %}
6666 
6667 // Conditional move
6668 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6669 %{
6670   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6671 
6672   ins_cost(250); // XXX
6673   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6674   opcode(0x0F, 0x40);
6675   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6676   ins_pipe(pipe_cmov_mem);
6677 %}
6678 
6679 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6680   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6681   ins_cost(250);
6682   expand %{
6683     cmovI_memU(cop, cr, dst, src);
6684   %}
6685 %}
6686 
6687 // Conditional move
6688 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6689 %{
6690   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6691 
6692   ins_cost(200); // XXX
6693   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6694   opcode(0x0F, 0x40);
6695   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6696   ins_pipe(pipe_cmov_reg);
6697 %}
6698 
6699 // Conditional move
6700 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6701 %{
6702   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6703 
6704   ins_cost(200); // XXX
6705   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6706   opcode(0x0F, 0x40);
6707   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6708   ins_pipe(pipe_cmov_reg);
6709 %}
6710 
6711 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6712   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6713   ins_cost(200);
6714   expand %{
6715     cmovN_regU(cop, cr, dst, src);
6716   %}
6717 %}
6718 
6719 // Conditional move
6720 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6721 %{
6722   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6723 
6724   ins_cost(200); // XXX
6725   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6726   opcode(0x0F, 0x40);
6727   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6728   ins_pipe(pipe_cmov_reg);  // XXX
6729 %}
6730 
6731 // Conditional move
6732 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6733 %{
6734   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6735 
6736   ins_cost(200); // XXX
6737   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6738   opcode(0x0F, 0x40);
6739   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6740   ins_pipe(pipe_cmov_reg); // XXX
6741 %}
6742 
6743 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6744   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6745   ins_cost(200);
6746   expand %{
6747     cmovP_regU(cop, cr, dst, src);
6748   %}
6749 %}
6750 
6751 // DISABLED: Requires the ADLC to emit a bottom_type call that
6752 // correctly meets the two pointer arguments; one is an incoming
6753 // register but the other is a memory operand.  ALSO appears to
6754 // be buggy with implicit null checks.
6755 //
6756 //// Conditional move
6757 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6758 //%{
6759 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6760 //  ins_cost(250);
6761 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6762 //  opcode(0x0F,0x40);
6763 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6764 //  ins_pipe( pipe_cmov_mem );
6765 //%}
6766 //
6767 //// Conditional move
6768 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6769 //%{
6770 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6771 //  ins_cost(250);
6772 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6773 //  opcode(0x0F,0x40);
6774 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6775 //  ins_pipe( pipe_cmov_mem );
6776 //%}
6777 
6778 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6779 %{
6780   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6781 
6782   ins_cost(200); // XXX
6783   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6784   opcode(0x0F, 0x40);
6785   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6786   ins_pipe(pipe_cmov_reg);  // XXX
6787 %}
6788 
6789 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6790 %{
6791   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6792 
6793   ins_cost(200); // XXX
6794   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6795   opcode(0x0F, 0x40);
6796   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6797   ins_pipe(pipe_cmov_mem);  // XXX
6798 %}
6799 
6800 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6801 %{
6802   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6803 
6804   ins_cost(200); // XXX
6805   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6806   opcode(0x0F, 0x40);
6807   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6808   ins_pipe(pipe_cmov_reg); // XXX
6809 %}
6810 
6811 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6812   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6813   ins_cost(200);
6814   expand %{
6815     cmovL_regU(cop, cr, dst, src);
6816   %}
6817 %}
6818 
6819 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6820 %{
6821   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6822 
6823   ins_cost(200); // XXX
6824   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6825   opcode(0x0F, 0x40);
6826   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6827   ins_pipe(pipe_cmov_mem); // XXX
6828 %}
6829 
6830 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6831   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6832   ins_cost(200);
6833   expand %{
6834     cmovL_memU(cop, cr, dst, src);
6835   %}
6836 %}
6837 
6838 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6839 %{
6840   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6841 
6842   ins_cost(200); // XXX
6843   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6844             "movss     $dst, $src\n"
6845     "skip:" %}
6846   ins_encode %{
6847     Label Lskip;
6848     // Invert sense of branch from sense of CMOV
6849     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6850     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6851     __ bind(Lskip);
6852   %}
6853   ins_pipe(pipe_slow);
6854 %}
6855 
6856 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
6857 // %{
6858 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
6859 
6860 //   ins_cost(200); // XXX
6861 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6862 //             "movss     $dst, $src\n"
6863 //     "skip:" %}
6864 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
6865 //   ins_pipe(pipe_slow);
6866 // %}
6867 
6868 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
6869 %{
6870   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6871 
6872   ins_cost(200); // XXX
6873   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
6874             "movss     $dst, $src\n"
6875     "skip:" %}
6876   ins_encode %{
6877     Label Lskip;
6878     // Invert sense of branch from sense of CMOV
6879     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6880     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6881     __ bind(Lskip);
6882   %}
6883   ins_pipe(pipe_slow);
6884 %}
6885 
6886 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
6887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6888   ins_cost(200);
6889   expand %{
6890     cmovF_regU(cop, cr, dst, src);
6891   %}
6892 %}
6893 
6894 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
6895 %{
6896   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6897 
6898   ins_cost(200); // XXX
6899   format %{ "jn$cop    skip\t# signed cmove double\n\t"
6900             "movsd     $dst, $src\n"
6901     "skip:" %}
6902   ins_encode %{
6903     Label Lskip;
6904     // Invert sense of branch from sense of CMOV
6905     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6906     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6907     __ bind(Lskip);
6908   %}
6909   ins_pipe(pipe_slow);
6910 %}
6911 
6912 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
6913 %{
6914   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6915 
6916   ins_cost(200); // XXX
6917   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
6918             "movsd     $dst, $src\n"
6919     "skip:" %}
6920   ins_encode %{
6921     Label Lskip;
6922     // Invert sense of branch from sense of CMOV
6923     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6924     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6925     __ bind(Lskip);
6926   %}
6927   ins_pipe(pipe_slow);
6928 %}
6929 
6930 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
6931   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6932   ins_cost(200);
6933   expand %{
6934     cmovD_regU(cop, cr, dst, src);
6935   %}
6936 %}
6937 
6938 //----------Arithmetic Instructions--------------------------------------------
6939 //----------Addition Instructions----------------------------------------------
6940 
6941 instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
6942 %{
6943   match(AddExactI dst src);
6944   effect(DEF cr);
6945 
6946   format %{ "addl    $dst, $src\t# addExact int" %}
6947   ins_encode %{
6948     __ addl($dst$$Register, $src$$Register);
6949   %}
6950   ins_pipe(ialu_reg_reg);
6951 %}
6952 
6953 instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
6954 %{
6955   match(AddExactI dst src);
6956   effect(DEF cr);
6957 
6958   format %{ "addl    $dst, $src\t# addExact int" %}
6959   ins_encode %{
6960     __ addl($dst$$Register, $src$$constant);
6961   %}
6962   ins_pipe(ialu_reg_reg);
6963 %}
6964 
6965 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
6966 %{
6967   match(Set dst (AddI dst src));
6968   effect(KILL cr);
6969 
6970   format %{ "addl    $dst, $src\t# int" %}
6971   opcode(0x03);
6972   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
6973   ins_pipe(ialu_reg_reg);
6974 %}
6975 
6976 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
6977 %{
6978   match(Set dst (AddI dst src));
6979   effect(KILL cr);
6980 
6981   format %{ "addl    $dst, $src\t# int" %}
6982   opcode(0x81, 0x00); /* /0 id */
6983   ins_encode(OpcSErm(dst, src), Con8or32(src));
6984   ins_pipe( ialu_reg );
6985 %}
6986 
6987 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
6988 %{
6989   match(Set dst (AddI dst (LoadI src)));
6990   effect(KILL cr);
6991 
6992   ins_cost(125); // XXX
6993   format %{ "addl    $dst, $src\t# int" %}
6994   opcode(0x03);
6995   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6996   ins_pipe(ialu_reg_mem);
6997 %}
6998 
6999 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7000 %{
7001   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7002   effect(KILL cr);
7003 
7004   ins_cost(150); // XXX
7005   format %{ "addl    $dst, $src\t# int" %}
7006   opcode(0x01); /* Opcode 01 /r */
7007   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7008   ins_pipe(ialu_mem_reg);
7009 %}
7010 
7011 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7012 %{
7013   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7014   effect(KILL cr);
7015 
7016   ins_cost(125); // XXX
7017   format %{ "addl    $dst, $src\t# int" %}
7018   opcode(0x81); /* Opcode 81 /0 id */
7019   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7020   ins_pipe(ialu_mem_imm);
7021 %}
7022 
7023 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7024 %{
7025   predicate(UseIncDec);
7026   match(Set dst (AddI dst src));
7027   effect(KILL cr);
7028 
7029   format %{ "incl    $dst\t# int" %}
7030   opcode(0xFF, 0x00); // FF /0
7031   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7032   ins_pipe(ialu_reg);
7033 %}
7034 
7035 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7036 %{
7037   predicate(UseIncDec);
7038   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7039   effect(KILL cr);
7040 
7041   ins_cost(125); // XXX
7042   format %{ "incl    $dst\t# int" %}
7043   opcode(0xFF); /* Opcode FF /0 */
7044   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7045   ins_pipe(ialu_mem_imm);
7046 %}
7047 
7048 // XXX why does that use AddI
7049 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7050 %{
7051   predicate(UseIncDec);
7052   match(Set dst (AddI dst src));
7053   effect(KILL cr);
7054 
7055   format %{ "decl    $dst\t# int" %}
7056   opcode(0xFF, 0x01); // FF /1
7057   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7058   ins_pipe(ialu_reg);
7059 %}
7060 
7061 // XXX why does that use AddI
7062 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7063 %{
7064   predicate(UseIncDec);
7065   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7066   effect(KILL cr);
7067 
7068   ins_cost(125); // XXX
7069   format %{ "decl    $dst\t# int" %}
7070   opcode(0xFF); /* Opcode FF /1 */
7071   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7072   ins_pipe(ialu_mem_imm);
7073 %}
7074 
7075 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7076 %{
7077   match(Set dst (AddI src0 src1));
7078 
7079   ins_cost(110);
7080   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7081   opcode(0x8D); /* 0x8D /r */
7082   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7083   ins_pipe(ialu_reg_reg);
7084 %}
7085 
7086 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7087 %{
7088   match(Set dst (AddL dst src));
7089   effect(KILL cr);
7090 
7091   format %{ "addq    $dst, $src\t# long" %}
7092   opcode(0x03);
7093   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7094   ins_pipe(ialu_reg_reg);
7095 %}
7096 
7097 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7098 %{
7099   match(Set dst (AddL dst src));
7100   effect(KILL cr);
7101 
7102   format %{ "addq    $dst, $src\t# long" %}
7103   opcode(0x81, 0x00); /* /0 id */
7104   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7105   ins_pipe( ialu_reg );
7106 %}
7107 
7108 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7109 %{
7110   match(Set dst (AddL dst (LoadL src)));
7111   effect(KILL cr);
7112 
7113   ins_cost(125); // XXX
7114   format %{ "addq    $dst, $src\t# long" %}
7115   opcode(0x03);
7116   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7117   ins_pipe(ialu_reg_mem);
7118 %}
7119 
7120 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7121 %{
7122   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7123   effect(KILL cr);
7124 
7125   ins_cost(150); // XXX
7126   format %{ "addq    $dst, $src\t# long" %}
7127   opcode(0x01); /* Opcode 01 /r */
7128   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7129   ins_pipe(ialu_mem_reg);
7130 %}
7131 
7132 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7133 %{
7134   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7135   effect(KILL cr);
7136 
7137   ins_cost(125); // XXX
7138   format %{ "addq    $dst, $src\t# long" %}
7139   opcode(0x81); /* Opcode 81 /0 id */
7140   ins_encode(REX_mem_wide(dst),
7141              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7142   ins_pipe(ialu_mem_imm);
7143 %}
7144 
7145 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7146 %{
7147   predicate(UseIncDec);
7148   match(Set dst (AddL dst src));
7149   effect(KILL cr);
7150 
7151   format %{ "incq    $dst\t# long" %}
7152   opcode(0xFF, 0x00); // FF /0
7153   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7154   ins_pipe(ialu_reg);
7155 %}
7156 
7157 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7158 %{
7159   predicate(UseIncDec);
7160   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7161   effect(KILL cr);
7162 
7163   ins_cost(125); // XXX
7164   format %{ "incq    $dst\t# long" %}
7165   opcode(0xFF); /* Opcode FF /0 */
7166   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7167   ins_pipe(ialu_mem_imm);
7168 %}
7169 
7170 // XXX why does that use AddL
7171 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7172 %{
7173   predicate(UseIncDec);
7174   match(Set dst (AddL dst src));
7175   effect(KILL cr);
7176 
7177   format %{ "decq    $dst\t# long" %}
7178   opcode(0xFF, 0x01); // FF /1
7179   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7180   ins_pipe(ialu_reg);
7181 %}
7182 
7183 // XXX why does that use AddL
7184 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7185 %{
7186   predicate(UseIncDec);
7187   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7188   effect(KILL cr);
7189 
7190   ins_cost(125); // XXX
7191   format %{ "decq    $dst\t# long" %}
7192   opcode(0xFF); /* Opcode FF /1 */
7193   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7194   ins_pipe(ialu_mem_imm);
7195 %}
7196 
7197 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7198 %{
7199   match(Set dst (AddL src0 src1));
7200 
7201   ins_cost(110);
7202   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7203   opcode(0x8D); /* 0x8D /r */
7204   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7205   ins_pipe(ialu_reg_reg);
7206 %}
7207 
7208 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7209 %{
7210   match(Set dst (AddP dst src));
7211   effect(KILL cr);
7212 
7213   format %{ "addq    $dst, $src\t# ptr" %}
7214   opcode(0x03);
7215   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7216   ins_pipe(ialu_reg_reg);
7217 %}
7218 
7219 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7220 %{
7221   match(Set dst (AddP dst src));
7222   effect(KILL cr);
7223 
7224   format %{ "addq    $dst, $src\t# ptr" %}
7225   opcode(0x81, 0x00); /* /0 id */
7226   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7227   ins_pipe( ialu_reg );
7228 %}
7229 
7230 // XXX addP mem ops ????
7231 
7232 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7233 %{
7234   match(Set dst (AddP src0 src1));
7235 
7236   ins_cost(110);
7237   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7238   opcode(0x8D); /* 0x8D /r */
7239   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7240   ins_pipe(ialu_reg_reg);
7241 %}
7242 
7243 instruct checkCastPP(rRegP dst)
7244 %{
7245   match(Set dst (CheckCastPP dst));
7246 
7247   size(0);
7248   format %{ "# checkcastPP of $dst" %}
7249   ins_encode(/* empty encoding */);
7250   ins_pipe(empty);
7251 %}
7252 
7253 instruct castPP(rRegP dst)
7254 %{
7255   match(Set dst (CastPP dst));
7256 
7257   size(0);
7258   format %{ "# castPP of $dst" %}
7259   ins_encode(/* empty encoding */);
7260   ins_pipe(empty);
7261 %}
7262 
7263 instruct castII(rRegI dst)
7264 %{
7265   match(Set dst (CastII dst));
7266 
7267   size(0);
7268   format %{ "# castII of $dst" %}
7269   ins_encode(/* empty encoding */);
7270   ins_cost(0);
7271   ins_pipe(empty);
7272 %}
7273 
7274 // LoadP-locked same as a regular LoadP when used with compare-swap
7275 instruct loadPLocked(rRegP dst, memory mem)
7276 %{
7277   match(Set dst (LoadPLocked mem));
7278 
7279   ins_cost(125); // XXX
7280   format %{ "movq    $dst, $mem\t# ptr locked" %}
7281   opcode(0x8B);
7282   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7283   ins_pipe(ialu_reg_mem); // XXX
7284 %}
7285 
7286 // Conditional-store of the updated heap-top.
7287 // Used during allocation of the shared heap.
7288 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7289 
7290 instruct storePConditional(memory heap_top_ptr,
7291                            rax_RegP oldval, rRegP newval,
7292                            rFlagsReg cr)
7293 %{
7294   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7295 
7296   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7297             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7298   opcode(0x0F, 0xB1);
7299   ins_encode(lock_prefix,
7300              REX_reg_mem_wide(newval, heap_top_ptr),
7301              OpcP, OpcS,
7302              reg_mem(newval, heap_top_ptr));
7303   ins_pipe(pipe_cmpxchg);
7304 %}
7305 
7306 // Conditional-store of an int value.
7307 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7308 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7309 %{
7310   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7311   effect(KILL oldval);
7312 
7313   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7314   opcode(0x0F, 0xB1);
7315   ins_encode(lock_prefix,
7316              REX_reg_mem(newval, mem),
7317              OpcP, OpcS,
7318              reg_mem(newval, mem));
7319   ins_pipe(pipe_cmpxchg);
7320 %}
7321 
7322 // Conditional-store of a long value.
7323 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7324 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7325 %{
7326   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7327   effect(KILL oldval);
7328 
7329   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7330   opcode(0x0F, 0xB1);
7331   ins_encode(lock_prefix,
7332              REX_reg_mem_wide(newval, mem),
7333              OpcP, OpcS,
7334              reg_mem(newval, mem));
7335   ins_pipe(pipe_cmpxchg);
7336 %}
7337 
7338 
7339 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7340 instruct compareAndSwapP(rRegI res,
7341                          memory mem_ptr,
7342                          rax_RegP oldval, rRegP newval,
7343                          rFlagsReg cr)
7344 %{
7345   predicate(VM_Version::supports_cx8());
7346   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7347   effect(KILL cr, KILL oldval);
7348 
7349   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7350             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7351             "sete    $res\n\t"
7352             "movzbl  $res, $res" %}
7353   opcode(0x0F, 0xB1);
7354   ins_encode(lock_prefix,
7355              REX_reg_mem_wide(newval, mem_ptr),
7356              OpcP, OpcS,
7357              reg_mem(newval, mem_ptr),
7358              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7359              REX_reg_breg(res, res), // movzbl
7360              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7361   ins_pipe( pipe_cmpxchg );
7362 %}
7363 
7364 instruct compareAndSwapL(rRegI res,
7365                          memory mem_ptr,
7366                          rax_RegL oldval, rRegL newval,
7367                          rFlagsReg cr)
7368 %{
7369   predicate(VM_Version::supports_cx8());
7370   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7371   effect(KILL cr, KILL oldval);
7372 
7373   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7374             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7375             "sete    $res\n\t"
7376             "movzbl  $res, $res" %}
7377   opcode(0x0F, 0xB1);
7378   ins_encode(lock_prefix,
7379              REX_reg_mem_wide(newval, mem_ptr),
7380              OpcP, OpcS,
7381              reg_mem(newval, mem_ptr),
7382              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7383              REX_reg_breg(res, res), // movzbl
7384              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7385   ins_pipe( pipe_cmpxchg );
7386 %}
7387 
7388 instruct compareAndSwapI(rRegI res,
7389                          memory mem_ptr,
7390                          rax_RegI oldval, rRegI newval,
7391                          rFlagsReg cr)
7392 %{
7393   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7394   effect(KILL cr, KILL oldval);
7395 
7396   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7397             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7398             "sete    $res\n\t"
7399             "movzbl  $res, $res" %}
7400   opcode(0x0F, 0xB1);
7401   ins_encode(lock_prefix,
7402              REX_reg_mem(newval, mem_ptr),
7403              OpcP, OpcS,
7404              reg_mem(newval, mem_ptr),
7405              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7406              REX_reg_breg(res, res), // movzbl
7407              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7408   ins_pipe( pipe_cmpxchg );
7409 %}
7410 
7411 
7412 instruct compareAndSwapN(rRegI res,
7413                           memory mem_ptr,
7414                           rax_RegN oldval, rRegN newval,
7415                           rFlagsReg cr) %{
7416   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7417   effect(KILL cr, KILL oldval);
7418 
7419   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7420             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7421             "sete    $res\n\t"
7422             "movzbl  $res, $res" %}
7423   opcode(0x0F, 0xB1);
7424   ins_encode(lock_prefix,
7425              REX_reg_mem(newval, mem_ptr),
7426              OpcP, OpcS,
7427              reg_mem(newval, mem_ptr),
7428              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7429              REX_reg_breg(res, res), // movzbl
7430              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7431   ins_pipe( pipe_cmpxchg );
7432 %}
7433 
7434 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7435   predicate(n->as_LoadStore()->result_not_used());
7436   match(Set dummy (GetAndAddI mem add));
7437   effect(KILL cr);
7438   format %{ "ADDL  [$mem],$add" %}
7439   ins_encode %{
7440     if (os::is_MP()) { __ lock(); }
7441     __ addl($mem$$Address, $add$$constant);
7442   %}
7443   ins_pipe( pipe_cmpxchg );
7444 %}
7445 
7446 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7447   match(Set newval (GetAndAddI mem newval));
7448   effect(KILL cr);
7449   format %{ "XADDL  [$mem],$newval" %}
7450   ins_encode %{
7451     if (os::is_MP()) { __ lock(); }
7452     __ xaddl($mem$$Address, $newval$$Register);
7453   %}
7454   ins_pipe( pipe_cmpxchg );
7455 %}
7456 
7457 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7458   predicate(n->as_LoadStore()->result_not_used());
7459   match(Set dummy (GetAndAddL mem add));
7460   effect(KILL cr);
7461   format %{ "ADDQ  [$mem],$add" %}
7462   ins_encode %{
7463     if (os::is_MP()) { __ lock(); }
7464     __ addq($mem$$Address, $add$$constant);
7465   %}
7466   ins_pipe( pipe_cmpxchg );
7467 %}
7468 
7469 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7470   match(Set newval (GetAndAddL mem newval));
7471   effect(KILL cr);
7472   format %{ "XADDQ  [$mem],$newval" %}
7473   ins_encode %{
7474     if (os::is_MP()) { __ lock(); }
7475     __ xaddq($mem$$Address, $newval$$Register);
7476   %}
7477   ins_pipe( pipe_cmpxchg );
7478 %}
7479 
7480 instruct xchgI( memory mem, rRegI newval) %{
7481   match(Set newval (GetAndSetI mem newval));
7482   format %{ "XCHGL  $newval,[$mem]" %}
7483   ins_encode %{
7484     __ xchgl($newval$$Register, $mem$$Address);
7485   %}
7486   ins_pipe( pipe_cmpxchg );
7487 %}
7488 
7489 instruct xchgL( memory mem, rRegL newval) %{
7490   match(Set newval (GetAndSetL mem newval));
7491   format %{ "XCHGL  $newval,[$mem]" %}
7492   ins_encode %{
7493     __ xchgq($newval$$Register, $mem$$Address);
7494   %}
7495   ins_pipe( pipe_cmpxchg );
7496 %}
7497 
7498 instruct xchgP( memory mem, rRegP newval) %{
7499   match(Set newval (GetAndSetP mem newval));
7500   format %{ "XCHGQ  $newval,[$mem]" %}
7501   ins_encode %{
7502     __ xchgq($newval$$Register, $mem$$Address);
7503   %}
7504   ins_pipe( pipe_cmpxchg );
7505 %}
7506 
7507 instruct xchgN( memory mem, rRegN newval) %{
7508   match(Set newval (GetAndSetN mem newval));
7509   format %{ "XCHGL  $newval,$mem]" %}
7510   ins_encode %{
7511     __ xchgl($newval$$Register, $mem$$Address);
7512   %}
7513   ins_pipe( pipe_cmpxchg );
7514 %}
7515 
7516 //----------Subtraction Instructions-------------------------------------------
7517 
7518 // Integer Subtraction Instructions
7519 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7520 %{
7521   match(Set dst (SubI dst src));
7522   effect(KILL cr);
7523 
7524   format %{ "subl    $dst, $src\t# int" %}
7525   opcode(0x2B);
7526   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7527   ins_pipe(ialu_reg_reg);
7528 %}
7529 
7530 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7531 %{
7532   match(Set dst (SubI dst src));
7533   effect(KILL cr);
7534 
7535   format %{ "subl    $dst, $src\t# int" %}
7536   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7537   ins_encode(OpcSErm(dst, src), Con8or32(src));
7538   ins_pipe(ialu_reg);
7539 %}
7540 
7541 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7542 %{
7543   match(Set dst (SubI dst (LoadI src)));
7544   effect(KILL cr);
7545 
7546   ins_cost(125);
7547   format %{ "subl    $dst, $src\t# int" %}
7548   opcode(0x2B);
7549   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7550   ins_pipe(ialu_reg_mem);
7551 %}
7552 
7553 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7554 %{
7555   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7556   effect(KILL cr);
7557 
7558   ins_cost(150);
7559   format %{ "subl    $dst, $src\t# int" %}
7560   opcode(0x29); /* Opcode 29 /r */
7561   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7562   ins_pipe(ialu_mem_reg);
7563 %}
7564 
7565 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7566 %{
7567   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7568   effect(KILL cr);
7569 
7570   ins_cost(125); // XXX
7571   format %{ "subl    $dst, $src\t# int" %}
7572   opcode(0x81); /* Opcode 81 /5 id */
7573   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7574   ins_pipe(ialu_mem_imm);
7575 %}
7576 
7577 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7578 %{
7579   match(Set dst (SubL dst src));
7580   effect(KILL cr);
7581 
7582   format %{ "subq    $dst, $src\t# long" %}
7583   opcode(0x2B);
7584   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7585   ins_pipe(ialu_reg_reg);
7586 %}
7587 
7588 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7589 %{
7590   match(Set dst (SubL dst src));
7591   effect(KILL cr);
7592 
7593   format %{ "subq    $dst, $src\t# long" %}
7594   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7595   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7596   ins_pipe(ialu_reg);
7597 %}
7598 
7599 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7600 %{
7601   match(Set dst (SubL dst (LoadL src)));
7602   effect(KILL cr);
7603 
7604   ins_cost(125);
7605   format %{ "subq    $dst, $src\t# long" %}
7606   opcode(0x2B);
7607   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7608   ins_pipe(ialu_reg_mem);
7609 %}
7610 
7611 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7612 %{
7613   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7614   effect(KILL cr);
7615 
7616   ins_cost(150);
7617   format %{ "subq    $dst, $src\t# long" %}
7618   opcode(0x29); /* Opcode 29 /r */
7619   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7620   ins_pipe(ialu_mem_reg);
7621 %}
7622 
7623 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7624 %{
7625   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7626   effect(KILL cr);
7627 
7628   ins_cost(125); // XXX
7629   format %{ "subq    $dst, $src\t# long" %}
7630   opcode(0x81); /* Opcode 81 /5 id */
7631   ins_encode(REX_mem_wide(dst),
7632              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7633   ins_pipe(ialu_mem_imm);
7634 %}
7635 
7636 // Subtract from a pointer
7637 // XXX hmpf???
7638 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7639 %{
7640   match(Set dst (AddP dst (SubI zero src)));
7641   effect(KILL cr);
7642 
7643   format %{ "subq    $dst, $src\t# ptr - int" %}
7644   opcode(0x2B);
7645   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7646   ins_pipe(ialu_reg_reg);
7647 %}
7648 
7649 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7650 %{
7651   match(Set dst (SubI zero dst));
7652   effect(KILL cr);
7653 
7654   format %{ "negl    $dst\t# int" %}
7655   opcode(0xF7, 0x03);  // Opcode F7 /3
7656   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7657   ins_pipe(ialu_reg);
7658 %}
7659 
7660 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7661 %{
7662   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7663   effect(KILL cr);
7664 
7665   format %{ "negl    $dst\t# int" %}
7666   opcode(0xF7, 0x03);  // Opcode F7 /3
7667   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7668   ins_pipe(ialu_reg);
7669 %}
7670 
7671 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7672 %{
7673   match(Set dst (SubL zero dst));
7674   effect(KILL cr);
7675 
7676   format %{ "negq    $dst\t# long" %}
7677   opcode(0xF7, 0x03);  // Opcode F7 /3
7678   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7679   ins_pipe(ialu_reg);
7680 %}
7681 
7682 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7683 %{
7684   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7685   effect(KILL cr);
7686 
7687   format %{ "negq    $dst\t# long" %}
7688   opcode(0xF7, 0x03);  // Opcode F7 /3
7689   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7690   ins_pipe(ialu_reg);
7691 %}
7692 
7693 
7694 //----------Multiplication/Division Instructions-------------------------------
7695 // Integer Multiplication Instructions
7696 // Multiply Register
7697 
7698 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7699 %{
7700   match(Set dst (MulI dst src));
7701   effect(KILL cr);
7702 
7703   ins_cost(300);
7704   format %{ "imull   $dst, $src\t# int" %}
7705   opcode(0x0F, 0xAF);
7706   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7707   ins_pipe(ialu_reg_reg_alu0);
7708 %}
7709 
7710 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7711 %{
7712   match(Set dst (MulI src imm));
7713   effect(KILL cr);
7714 
7715   ins_cost(300);
7716   format %{ "imull   $dst, $src, $imm\t# int" %}
7717   opcode(0x69); /* 69 /r id */
7718   ins_encode(REX_reg_reg(dst, src),
7719              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7720   ins_pipe(ialu_reg_reg_alu0);
7721 %}
7722 
7723 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7724 %{
7725   match(Set dst (MulI dst (LoadI src)));
7726   effect(KILL cr);
7727 
7728   ins_cost(350);
7729   format %{ "imull   $dst, $src\t# int" %}
7730   opcode(0x0F, 0xAF);
7731   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7732   ins_pipe(ialu_reg_mem_alu0);
7733 %}
7734 
7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7736 %{
7737   match(Set dst (MulI (LoadI src) imm));
7738   effect(KILL cr);
7739 
7740   ins_cost(300);
7741   format %{ "imull   $dst, $src, $imm\t# int" %}
7742   opcode(0x69); /* 69 /r id */
7743   ins_encode(REX_reg_mem(dst, src),
7744              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7745   ins_pipe(ialu_reg_mem_alu0);
7746 %}
7747 
7748 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7749 %{
7750   match(Set dst (MulL dst src));
7751   effect(KILL cr);
7752 
7753   ins_cost(300);
7754   format %{ "imulq   $dst, $src\t# long" %}
7755   opcode(0x0F, 0xAF);
7756   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7757   ins_pipe(ialu_reg_reg_alu0);
7758 %}
7759 
7760 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7761 %{
7762   match(Set dst (MulL src imm));
7763   effect(KILL cr);
7764 
7765   ins_cost(300);
7766   format %{ "imulq   $dst, $src, $imm\t# long" %}
7767   opcode(0x69); /* 69 /r id */
7768   ins_encode(REX_reg_reg_wide(dst, src),
7769              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7770   ins_pipe(ialu_reg_reg_alu0);
7771 %}
7772 
7773 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7774 %{
7775   match(Set dst (MulL dst (LoadL src)));
7776   effect(KILL cr);
7777 
7778   ins_cost(350);
7779   format %{ "imulq   $dst, $src\t# long" %}
7780   opcode(0x0F, 0xAF);
7781   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7782   ins_pipe(ialu_reg_mem_alu0);
7783 %}
7784 
7785 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7786 %{
7787   match(Set dst (MulL (LoadL src) imm));
7788   effect(KILL cr);
7789 
7790   ins_cost(300);
7791   format %{ "imulq   $dst, $src, $imm\t# long" %}
7792   opcode(0x69); /* 69 /r id */
7793   ins_encode(REX_reg_mem_wide(dst, src),
7794              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7795   ins_pipe(ialu_reg_mem_alu0);
7796 %}
7797 
7798 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7799 %{
7800   match(Set dst (MulHiL src rax));
7801   effect(USE_KILL rax, KILL cr);
7802 
7803   ins_cost(300);
7804   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
7805   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7806   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7807   ins_pipe(ialu_reg_reg_alu0);
7808 %}
7809 
7810 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7811                    rFlagsReg cr)
7812 %{
7813   match(Set rax (DivI rax div));
7814   effect(KILL rdx, KILL cr);
7815 
7816   ins_cost(30*100+10*100); // XXX
7817   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7818             "jne,s   normal\n\t"
7819             "xorl    rdx, rdx\n\t"
7820             "cmpl    $div, -1\n\t"
7821             "je,s    done\n"
7822     "normal: cdql\n\t"
7823             "idivl   $div\n"
7824     "done:"        %}
7825   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7826   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7827   ins_pipe(ialu_reg_reg_alu0);
7828 %}
7829 
7830 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7831                    rFlagsReg cr)
7832 %{
7833   match(Set rax (DivL rax div));
7834   effect(KILL rdx, KILL cr);
7835 
7836   ins_cost(30*100+10*100); // XXX
7837   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7838             "cmpq    rax, rdx\n\t"
7839             "jne,s   normal\n\t"
7840             "xorl    rdx, rdx\n\t"
7841             "cmpq    $div, -1\n\t"
7842             "je,s    done\n"
7843     "normal: cdqq\n\t"
7844             "idivq   $div\n"
7845     "done:"        %}
7846   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7847   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7848   ins_pipe(ialu_reg_reg_alu0);
7849 %}
7850 
7851 // Integer DIVMOD with Register, both quotient and mod results
7852 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7853                              rFlagsReg cr)
7854 %{
7855   match(DivModI rax div);
7856   effect(KILL cr);
7857 
7858   ins_cost(30*100+10*100); // XXX
7859   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7860             "jne,s   normal\n\t"
7861             "xorl    rdx, rdx\n\t"
7862             "cmpl    $div, -1\n\t"
7863             "je,s    done\n"
7864     "normal: cdql\n\t"
7865             "idivl   $div\n"
7866     "done:"        %}
7867   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7868   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7869   ins_pipe(pipe_slow);
7870 %}
7871 
7872 // Long DIVMOD with Register, both quotient and mod results
7873 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7874                              rFlagsReg cr)
7875 %{
7876   match(DivModL rax div);
7877   effect(KILL cr);
7878 
7879   ins_cost(30*100+10*100); // XXX
7880   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7881             "cmpq    rax, rdx\n\t"
7882             "jne,s   normal\n\t"
7883             "xorl    rdx, rdx\n\t"
7884             "cmpq    $div, -1\n\t"
7885             "je,s    done\n"
7886     "normal: cdqq\n\t"
7887             "idivq   $div\n"
7888     "done:"        %}
7889   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7890   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7891   ins_pipe(pipe_slow);
7892 %}
7893 
7894 //----------- DivL-By-Constant-Expansions--------------------------------------
7895 // DivI cases are handled by the compiler
7896 
7897 // Magic constant, reciprocal of 10
7898 instruct loadConL_0x6666666666666667(rRegL dst)
7899 %{
7900   effect(DEF dst);
7901 
7902   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
7903   ins_encode(load_immL(dst, 0x6666666666666667));
7904   ins_pipe(ialu_reg);
7905 %}
7906 
7907 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7908 %{
7909   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
7910 
7911   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
7912   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7913   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7914   ins_pipe(ialu_reg_reg_alu0);
7915 %}
7916 
7917 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
7918 %{
7919   effect(USE_DEF dst, KILL cr);
7920 
7921   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
7922   opcode(0xC1, 0x7); /* C1 /7 ib */
7923   ins_encode(reg_opc_imm_wide(dst, 0x3F));
7924   ins_pipe(ialu_reg);
7925 %}
7926 
7927 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
7928 %{
7929   effect(USE_DEF dst, KILL cr);
7930 
7931   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
7932   opcode(0xC1, 0x7); /* C1 /7 ib */
7933   ins_encode(reg_opc_imm_wide(dst, 0x2));
7934   ins_pipe(ialu_reg);
7935 %}
7936 
7937 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
7938 %{
7939   match(Set dst (DivL src div));
7940 
7941   ins_cost((5+8)*100);
7942   expand %{
7943     rax_RegL rax;                     // Killed temp
7944     rFlagsReg cr;                     // Killed
7945     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
7946     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
7947     sarL_rReg_63(src, cr);            // sarq  src, 63
7948     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
7949     subL_rReg(dst, src, cr);          // subl  rdx, src
7950   %}
7951 %}
7952 
7953 //-----------------------------------------------------------------------------
7954 
7955 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
7956                    rFlagsReg cr)
7957 %{
7958   match(Set rdx (ModI rax div));
7959   effect(KILL rax, KILL cr);
7960 
7961   ins_cost(300); // XXX
7962   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
7963             "jne,s   normal\n\t"
7964             "xorl    rdx, rdx\n\t"
7965             "cmpl    $div, -1\n\t"
7966             "je,s    done\n"
7967     "normal: cdql\n\t"
7968             "idivl   $div\n"
7969     "done:"        %}
7970   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7971   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7972   ins_pipe(ialu_reg_reg_alu0);
7973 %}
7974 
7975 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
7976                    rFlagsReg cr)
7977 %{
7978   match(Set rdx (ModL rax div));
7979   effect(KILL rax, KILL cr);
7980 
7981   ins_cost(300); // XXX
7982   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
7983             "cmpq    rax, rdx\n\t"
7984             "jne,s   normal\n\t"
7985             "xorl    rdx, rdx\n\t"
7986             "cmpq    $div, -1\n\t"
7987             "je,s    done\n"
7988     "normal: cdqq\n\t"
7989             "idivq   $div\n"
7990     "done:"        %}
7991   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7992   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7993   ins_pipe(ialu_reg_reg_alu0);
7994 %}
7995 
7996 // Integer Shift Instructions
7997 // Shift Left by one
7998 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
7999 %{
8000   match(Set dst (LShiftI dst shift));
8001   effect(KILL cr);
8002 
8003   format %{ "sall    $dst, $shift" %}
8004   opcode(0xD1, 0x4); /* D1 /4 */
8005   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8006   ins_pipe(ialu_reg);
8007 %}
8008 
8009 // Shift Left by one
8010 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8011 %{
8012   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8013   effect(KILL cr);
8014 
8015   format %{ "sall    $dst, $shift\t" %}
8016   opcode(0xD1, 0x4); /* D1 /4 */
8017   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8018   ins_pipe(ialu_mem_imm);
8019 %}
8020 
8021 // Shift Left by 8-bit immediate
8022 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8023 %{
8024   match(Set dst (LShiftI dst shift));
8025   effect(KILL cr);
8026 
8027   format %{ "sall    $dst, $shift" %}
8028   opcode(0xC1, 0x4); /* C1 /4 ib */
8029   ins_encode(reg_opc_imm(dst, shift));
8030   ins_pipe(ialu_reg);
8031 %}
8032 
8033 // Shift Left by 8-bit immediate
8034 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8035 %{
8036   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8037   effect(KILL cr);
8038 
8039   format %{ "sall    $dst, $shift" %}
8040   opcode(0xC1, 0x4); /* C1 /4 ib */
8041   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8042   ins_pipe(ialu_mem_imm);
8043 %}
8044 
8045 // Shift Left by variable
8046 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8047 %{
8048   match(Set dst (LShiftI dst shift));
8049   effect(KILL cr);
8050 
8051   format %{ "sall    $dst, $shift" %}
8052   opcode(0xD3, 0x4); /* D3 /4 */
8053   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8054   ins_pipe(ialu_reg_reg);
8055 %}
8056 
8057 // Shift Left by variable
8058 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8059 %{
8060   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8061   effect(KILL cr);
8062 
8063   format %{ "sall    $dst, $shift" %}
8064   opcode(0xD3, 0x4); /* D3 /4 */
8065   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8066   ins_pipe(ialu_mem_reg);
8067 %}
8068 
8069 // Arithmetic shift right by one
8070 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8071 %{
8072   match(Set dst (RShiftI dst shift));
8073   effect(KILL cr);
8074 
8075   format %{ "sarl    $dst, $shift" %}
8076   opcode(0xD1, 0x7); /* D1 /7 */
8077   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8078   ins_pipe(ialu_reg);
8079 %}
8080 
8081 // Arithmetic shift right by one
8082 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8083 %{
8084   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8085   effect(KILL cr);
8086 
8087   format %{ "sarl    $dst, $shift" %}
8088   opcode(0xD1, 0x7); /* D1 /7 */
8089   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8090   ins_pipe(ialu_mem_imm);
8091 %}
8092 
8093 // Arithmetic Shift Right by 8-bit immediate
8094 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8095 %{
8096   match(Set dst (RShiftI dst shift));
8097   effect(KILL cr);
8098 
8099   format %{ "sarl    $dst, $shift" %}
8100   opcode(0xC1, 0x7); /* C1 /7 ib */
8101   ins_encode(reg_opc_imm(dst, shift));
8102   ins_pipe(ialu_mem_imm);
8103 %}
8104 
8105 // Arithmetic Shift Right by 8-bit immediate
8106 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8107 %{
8108   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8109   effect(KILL cr);
8110 
8111   format %{ "sarl    $dst, $shift" %}
8112   opcode(0xC1, 0x7); /* C1 /7 ib */
8113   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8114   ins_pipe(ialu_mem_imm);
8115 %}
8116 
8117 // Arithmetic Shift Right by variable
8118 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8119 %{
8120   match(Set dst (RShiftI dst shift));
8121   effect(KILL cr);
8122 
8123   format %{ "sarl    $dst, $shift" %}
8124   opcode(0xD3, 0x7); /* D3 /7 */
8125   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8126   ins_pipe(ialu_reg_reg);
8127 %}
8128 
8129 // Arithmetic Shift Right by variable
8130 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8131 %{
8132   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8133   effect(KILL cr);
8134 
8135   format %{ "sarl    $dst, $shift" %}
8136   opcode(0xD3, 0x7); /* D3 /7 */
8137   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8138   ins_pipe(ialu_mem_reg);
8139 %}
8140 
8141 // Logical shift right by one
8142 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8143 %{
8144   match(Set dst (URShiftI dst shift));
8145   effect(KILL cr);
8146 
8147   format %{ "shrl    $dst, $shift" %}
8148   opcode(0xD1, 0x5); /* D1 /5 */
8149   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8150   ins_pipe(ialu_reg);
8151 %}
8152 
8153 // Logical shift right by one
8154 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8155 %{
8156   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8157   effect(KILL cr);
8158 
8159   format %{ "shrl    $dst, $shift" %}
8160   opcode(0xD1, 0x5); /* D1 /5 */
8161   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8162   ins_pipe(ialu_mem_imm);
8163 %}
8164 
8165 // Logical Shift Right by 8-bit immediate
8166 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8167 %{
8168   match(Set dst (URShiftI dst shift));
8169   effect(KILL cr);
8170 
8171   format %{ "shrl    $dst, $shift" %}
8172   opcode(0xC1, 0x5); /* C1 /5 ib */
8173   ins_encode(reg_opc_imm(dst, shift));
8174   ins_pipe(ialu_reg);
8175 %}
8176 
8177 // Logical Shift Right by 8-bit immediate
8178 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8179 %{
8180   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8181   effect(KILL cr);
8182 
8183   format %{ "shrl    $dst, $shift" %}
8184   opcode(0xC1, 0x5); /* C1 /5 ib */
8185   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8186   ins_pipe(ialu_mem_imm);
8187 %}
8188 
8189 // Logical Shift Right by variable
8190 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8191 %{
8192   match(Set dst (URShiftI dst shift));
8193   effect(KILL cr);
8194 
8195   format %{ "shrl    $dst, $shift" %}
8196   opcode(0xD3, 0x5); /* D3 /5 */
8197   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8198   ins_pipe(ialu_reg_reg);
8199 %}
8200 
8201 // Logical Shift Right by variable
8202 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8203 %{
8204   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8205   effect(KILL cr);
8206 
8207   format %{ "shrl    $dst, $shift" %}
8208   opcode(0xD3, 0x5); /* D3 /5 */
8209   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8210   ins_pipe(ialu_mem_reg);
8211 %}
8212 
8213 // Long Shift Instructions
8214 // Shift Left by one
8215 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8216 %{
8217   match(Set dst (LShiftL dst shift));
8218   effect(KILL cr);
8219 
8220   format %{ "salq    $dst, $shift" %}
8221   opcode(0xD1, 0x4); /* D1 /4 */
8222   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8223   ins_pipe(ialu_reg);
8224 %}
8225 
8226 // Shift Left by one
8227 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8228 %{
8229   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8230   effect(KILL cr);
8231 
8232   format %{ "salq    $dst, $shift" %}
8233   opcode(0xD1, 0x4); /* D1 /4 */
8234   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8235   ins_pipe(ialu_mem_imm);
8236 %}
8237 
8238 // Shift Left by 8-bit immediate
8239 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8240 %{
8241   match(Set dst (LShiftL dst shift));
8242   effect(KILL cr);
8243 
8244   format %{ "salq    $dst, $shift" %}
8245   opcode(0xC1, 0x4); /* C1 /4 ib */
8246   ins_encode(reg_opc_imm_wide(dst, shift));
8247   ins_pipe(ialu_reg);
8248 %}
8249 
8250 // Shift Left by 8-bit immediate
8251 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8252 %{
8253   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8254   effect(KILL cr);
8255 
8256   format %{ "salq    $dst, $shift" %}
8257   opcode(0xC1, 0x4); /* C1 /4 ib */
8258   ins_encode(REX_mem_wide(dst), OpcP,
8259              RM_opc_mem(secondary, dst), Con8or32(shift));
8260   ins_pipe(ialu_mem_imm);
8261 %}
8262 
8263 // Shift Left by variable
8264 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8265 %{
8266   match(Set dst (LShiftL dst shift));
8267   effect(KILL cr);
8268 
8269   format %{ "salq    $dst, $shift" %}
8270   opcode(0xD3, 0x4); /* D3 /4 */
8271   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8272   ins_pipe(ialu_reg_reg);
8273 %}
8274 
8275 // Shift Left by variable
8276 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8277 %{
8278   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8279   effect(KILL cr);
8280 
8281   format %{ "salq    $dst, $shift" %}
8282   opcode(0xD3, 0x4); /* D3 /4 */
8283   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8284   ins_pipe(ialu_mem_reg);
8285 %}
8286 
8287 // Arithmetic shift right by one
8288 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8289 %{
8290   match(Set dst (RShiftL dst shift));
8291   effect(KILL cr);
8292 
8293   format %{ "sarq    $dst, $shift" %}
8294   opcode(0xD1, 0x7); /* D1 /7 */
8295   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8296   ins_pipe(ialu_reg);
8297 %}
8298 
8299 // Arithmetic shift right by one
8300 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8301 %{
8302   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8303   effect(KILL cr);
8304 
8305   format %{ "sarq    $dst, $shift" %}
8306   opcode(0xD1, 0x7); /* D1 /7 */
8307   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8308   ins_pipe(ialu_mem_imm);
8309 %}
8310 
8311 // Arithmetic Shift Right by 8-bit immediate
8312 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8313 %{
8314   match(Set dst (RShiftL dst shift));
8315   effect(KILL cr);
8316 
8317   format %{ "sarq    $dst, $shift" %}
8318   opcode(0xC1, 0x7); /* C1 /7 ib */
8319   ins_encode(reg_opc_imm_wide(dst, shift));
8320   ins_pipe(ialu_mem_imm);
8321 %}
8322 
8323 // Arithmetic Shift Right by 8-bit immediate
8324 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8325 %{
8326   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8327   effect(KILL cr);
8328 
8329   format %{ "sarq    $dst, $shift" %}
8330   opcode(0xC1, 0x7); /* C1 /7 ib */
8331   ins_encode(REX_mem_wide(dst), OpcP,
8332              RM_opc_mem(secondary, dst), Con8or32(shift));
8333   ins_pipe(ialu_mem_imm);
8334 %}
8335 
8336 // Arithmetic Shift Right by variable
8337 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8338 %{
8339   match(Set dst (RShiftL dst shift));
8340   effect(KILL cr);
8341 
8342   format %{ "sarq    $dst, $shift" %}
8343   opcode(0xD3, 0x7); /* D3 /7 */
8344   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8345   ins_pipe(ialu_reg_reg);
8346 %}
8347 
8348 // Arithmetic Shift Right by variable
8349 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8350 %{
8351   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8352   effect(KILL cr);
8353 
8354   format %{ "sarq    $dst, $shift" %}
8355   opcode(0xD3, 0x7); /* D3 /7 */
8356   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8357   ins_pipe(ialu_mem_reg);
8358 %}
8359 
8360 // Logical shift right by one
8361 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8362 %{
8363   match(Set dst (URShiftL dst shift));
8364   effect(KILL cr);
8365 
8366   format %{ "shrq    $dst, $shift" %}
8367   opcode(0xD1, 0x5); /* D1 /5 */
8368   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8369   ins_pipe(ialu_reg);
8370 %}
8371 
8372 // Logical shift right by one
8373 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8374 %{
8375   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8376   effect(KILL cr);
8377 
8378   format %{ "shrq    $dst, $shift" %}
8379   opcode(0xD1, 0x5); /* D1 /5 */
8380   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8381   ins_pipe(ialu_mem_imm);
8382 %}
8383 
8384 // Logical Shift Right by 8-bit immediate
8385 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8386 %{
8387   match(Set dst (URShiftL dst shift));
8388   effect(KILL cr);
8389 
8390   format %{ "shrq    $dst, $shift" %}
8391   opcode(0xC1, 0x5); /* C1 /5 ib */
8392   ins_encode(reg_opc_imm_wide(dst, shift));
8393   ins_pipe(ialu_reg);
8394 %}
8395 
8396 
8397 // Logical Shift Right by 8-bit immediate
8398 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8399 %{
8400   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8401   effect(KILL cr);
8402 
8403   format %{ "shrq    $dst, $shift" %}
8404   opcode(0xC1, 0x5); /* C1 /5 ib */
8405   ins_encode(REX_mem_wide(dst), OpcP,
8406              RM_opc_mem(secondary, dst), Con8or32(shift));
8407   ins_pipe(ialu_mem_imm);
8408 %}
8409 
8410 // Logical Shift Right by variable
8411 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8412 %{
8413   match(Set dst (URShiftL dst shift));
8414   effect(KILL cr);
8415 
8416   format %{ "shrq    $dst, $shift" %}
8417   opcode(0xD3, 0x5); /* D3 /5 */
8418   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8419   ins_pipe(ialu_reg_reg);
8420 %}
8421 
8422 // Logical Shift Right by variable
8423 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8424 %{
8425   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8426   effect(KILL cr);
8427 
8428   format %{ "shrq    $dst, $shift" %}
8429   opcode(0xD3, 0x5); /* D3 /5 */
8430   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8431   ins_pipe(ialu_mem_reg);
8432 %}
8433 
8434 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8435 // This idiom is used by the compiler for the i2b bytecode.
8436 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8437 %{
8438   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8439 
8440   format %{ "movsbl  $dst, $src\t# i2b" %}
8441   opcode(0x0F, 0xBE);
8442   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8443   ins_pipe(ialu_reg_reg);
8444 %}
8445 
8446 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8447 // This idiom is used by the compiler the i2s bytecode.
8448 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8449 %{
8450   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8451 
8452   format %{ "movswl  $dst, $src\t# i2s" %}
8453   opcode(0x0F, 0xBF);
8454   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8455   ins_pipe(ialu_reg_reg);
8456 %}
8457 
8458 // ROL/ROR instructions
8459 
8460 // ROL expand
8461 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8462   effect(KILL cr, USE_DEF dst);
8463 
8464   format %{ "roll    $dst" %}
8465   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8466   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8467   ins_pipe(ialu_reg);
8468 %}
8469 
8470 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8471   effect(USE_DEF dst, USE shift, KILL cr);
8472 
8473   format %{ "roll    $dst, $shift" %}
8474   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8475   ins_encode( reg_opc_imm(dst, shift) );
8476   ins_pipe(ialu_reg);
8477 %}
8478 
8479 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8480 %{
8481   effect(USE_DEF dst, USE shift, KILL cr);
8482 
8483   format %{ "roll    $dst, $shift" %}
8484   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8485   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8486   ins_pipe(ialu_reg_reg);
8487 %}
8488 // end of ROL expand
8489 
8490 // Rotate Left by one
8491 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8492 %{
8493   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8494 
8495   expand %{
8496     rolI_rReg_imm1(dst, cr);
8497   %}
8498 %}
8499 
8500 // Rotate Left by 8-bit immediate
8501 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8502 %{
8503   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8504   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8505 
8506   expand %{
8507     rolI_rReg_imm8(dst, lshift, cr);
8508   %}
8509 %}
8510 
8511 // Rotate Left by variable
8512 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8513 %{
8514   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8515 
8516   expand %{
8517     rolI_rReg_CL(dst, shift, cr);
8518   %}
8519 %}
8520 
8521 // Rotate Left by variable
8522 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8523 %{
8524   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8525 
8526   expand %{
8527     rolI_rReg_CL(dst, shift, cr);
8528   %}
8529 %}
8530 
8531 // ROR expand
8532 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8533 %{
8534   effect(USE_DEF dst, KILL cr);
8535 
8536   format %{ "rorl    $dst" %}
8537   opcode(0xD1, 0x1); /* D1 /1 */
8538   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8539   ins_pipe(ialu_reg);
8540 %}
8541 
8542 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8543 %{
8544   effect(USE_DEF dst, USE shift, KILL cr);
8545 
8546   format %{ "rorl    $dst, $shift" %}
8547   opcode(0xC1, 0x1); /* C1 /1 ib */
8548   ins_encode(reg_opc_imm(dst, shift));
8549   ins_pipe(ialu_reg);
8550 %}
8551 
8552 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8553 %{
8554   effect(USE_DEF dst, USE shift, KILL cr);
8555 
8556   format %{ "rorl    $dst, $shift" %}
8557   opcode(0xD3, 0x1); /* D3 /1 */
8558   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8559   ins_pipe(ialu_reg_reg);
8560 %}
8561 // end of ROR expand
8562 
8563 // Rotate Right by one
8564 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8565 %{
8566   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8567 
8568   expand %{
8569     rorI_rReg_imm1(dst, cr);
8570   %}
8571 %}
8572 
8573 // Rotate Right by 8-bit immediate
8574 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8575 %{
8576   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8577   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8578 
8579   expand %{
8580     rorI_rReg_imm8(dst, rshift, cr);
8581   %}
8582 %}
8583 
8584 // Rotate Right by variable
8585 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8586 %{
8587   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8588 
8589   expand %{
8590     rorI_rReg_CL(dst, shift, cr);
8591   %}
8592 %}
8593 
8594 // Rotate Right by variable
8595 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8596 %{
8597   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8598 
8599   expand %{
8600     rorI_rReg_CL(dst, shift, cr);
8601   %}
8602 %}
8603 
8604 // for long rotate
8605 // ROL expand
8606 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8607   effect(USE_DEF dst, KILL cr);
8608 
8609   format %{ "rolq    $dst" %}
8610   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8611   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8612   ins_pipe(ialu_reg);
8613 %}
8614 
8615 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8616   effect(USE_DEF dst, USE shift, KILL cr);
8617 
8618   format %{ "rolq    $dst, $shift" %}
8619   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8620   ins_encode( reg_opc_imm_wide(dst, shift) );
8621   ins_pipe(ialu_reg);
8622 %}
8623 
8624 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8625 %{
8626   effect(USE_DEF dst, USE shift, KILL cr);
8627 
8628   format %{ "rolq    $dst, $shift" %}
8629   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8630   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8631   ins_pipe(ialu_reg_reg);
8632 %}
8633 // end of ROL expand
8634 
8635 // Rotate Left by one
8636 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8637 %{
8638   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8639 
8640   expand %{
8641     rolL_rReg_imm1(dst, cr);
8642   %}
8643 %}
8644 
8645 // Rotate Left by 8-bit immediate
8646 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8647 %{
8648   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8649   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8650 
8651   expand %{
8652     rolL_rReg_imm8(dst, lshift, cr);
8653   %}
8654 %}
8655 
8656 // Rotate Left by variable
8657 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8658 %{
8659   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8660 
8661   expand %{
8662     rolL_rReg_CL(dst, shift, cr);
8663   %}
8664 %}
8665 
8666 // Rotate Left by variable
8667 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8668 %{
8669   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8670 
8671   expand %{
8672     rolL_rReg_CL(dst, shift, cr);
8673   %}
8674 %}
8675 
8676 // ROR expand
8677 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8678 %{
8679   effect(USE_DEF dst, KILL cr);
8680 
8681   format %{ "rorq    $dst" %}
8682   opcode(0xD1, 0x1); /* D1 /1 */
8683   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8684   ins_pipe(ialu_reg);
8685 %}
8686 
8687 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8688 %{
8689   effect(USE_DEF dst, USE shift, KILL cr);
8690 
8691   format %{ "rorq    $dst, $shift" %}
8692   opcode(0xC1, 0x1); /* C1 /1 ib */
8693   ins_encode(reg_opc_imm_wide(dst, shift));
8694   ins_pipe(ialu_reg);
8695 %}
8696 
8697 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8698 %{
8699   effect(USE_DEF dst, USE shift, KILL cr);
8700 
8701   format %{ "rorq    $dst, $shift" %}
8702   opcode(0xD3, 0x1); /* D3 /1 */
8703   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8704   ins_pipe(ialu_reg_reg);
8705 %}
8706 // end of ROR expand
8707 
8708 // Rotate Right by one
8709 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8710 %{
8711   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8712 
8713   expand %{
8714     rorL_rReg_imm1(dst, cr);
8715   %}
8716 %}
8717 
8718 // Rotate Right by 8-bit immediate
8719 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8720 %{
8721   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8722   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8723 
8724   expand %{
8725     rorL_rReg_imm8(dst, rshift, cr);
8726   %}
8727 %}
8728 
8729 // Rotate Right by variable
8730 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8731 %{
8732   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8733 
8734   expand %{
8735     rorL_rReg_CL(dst, shift, cr);
8736   %}
8737 %}
8738 
8739 // Rotate Right by variable
8740 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8741 %{
8742   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8743 
8744   expand %{
8745     rorL_rReg_CL(dst, shift, cr);
8746   %}
8747 %}
8748 
8749 // Logical Instructions
8750 
8751 // Integer Logical Instructions
8752 
8753 // And Instructions
8754 // And Register with Register
8755 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8756 %{
8757   match(Set dst (AndI dst src));
8758   effect(KILL cr);
8759 
8760   format %{ "andl    $dst, $src\t# int" %}
8761   opcode(0x23);
8762   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8763   ins_pipe(ialu_reg_reg);
8764 %}
8765 
8766 // And Register with Immediate 255
8767 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8768 %{
8769   match(Set dst (AndI dst src));
8770 
8771   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8772   opcode(0x0F, 0xB6);
8773   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8774   ins_pipe(ialu_reg);
8775 %}
8776 
8777 // And Register with Immediate 255 and promote to long
8778 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8779 %{
8780   match(Set dst (ConvI2L (AndI src mask)));
8781 
8782   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8783   opcode(0x0F, 0xB6);
8784   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8785   ins_pipe(ialu_reg);
8786 %}
8787 
8788 // And Register with Immediate 65535
8789 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8790 %{
8791   match(Set dst (AndI dst src));
8792 
8793   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8794   opcode(0x0F, 0xB7);
8795   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8796   ins_pipe(ialu_reg);
8797 %}
8798 
8799 // And Register with Immediate 65535 and promote to long
8800 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8801 %{
8802   match(Set dst (ConvI2L (AndI src mask)));
8803 
8804   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8805   opcode(0x0F, 0xB7);
8806   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8807   ins_pipe(ialu_reg);
8808 %}
8809 
8810 // And Register with Immediate
8811 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8812 %{
8813   match(Set dst (AndI dst src));
8814   effect(KILL cr);
8815 
8816   format %{ "andl    $dst, $src\t# int" %}
8817   opcode(0x81, 0x04); /* Opcode 81 /4 */
8818   ins_encode(OpcSErm(dst, src), Con8or32(src));
8819   ins_pipe(ialu_reg);
8820 %}
8821 
8822 // And Register with Memory
8823 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8824 %{
8825   match(Set dst (AndI dst (LoadI src)));
8826   effect(KILL cr);
8827 
8828   ins_cost(125);
8829   format %{ "andl    $dst, $src\t# int" %}
8830   opcode(0x23);
8831   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8832   ins_pipe(ialu_reg_mem);
8833 %}
8834 
8835 // And Memory with Register
8836 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8837 %{
8838   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8839   effect(KILL cr);
8840 
8841   ins_cost(150);
8842   format %{ "andl    $dst, $src\t# int" %}
8843   opcode(0x21); /* Opcode 21 /r */
8844   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8845   ins_pipe(ialu_mem_reg);
8846 %}
8847 
8848 // And Memory with Immediate
8849 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
8850 %{
8851   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8852   effect(KILL cr);
8853 
8854   ins_cost(125);
8855   format %{ "andl    $dst, $src\t# int" %}
8856   opcode(0x81, 0x4); /* Opcode 81 /4 id */
8857   ins_encode(REX_mem(dst), OpcSE(src),
8858              RM_opc_mem(secondary, dst), Con8or32(src));
8859   ins_pipe(ialu_mem_imm);
8860 %}
8861 
8862 // Or Instructions
8863 // Or Register with Register
8864 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8865 %{
8866   match(Set dst (OrI dst src));
8867   effect(KILL cr);
8868 
8869   format %{ "orl     $dst, $src\t# int" %}
8870   opcode(0x0B);
8871   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8872   ins_pipe(ialu_reg_reg);
8873 %}
8874 
8875 // Or Register with Immediate
8876 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8877 %{
8878   match(Set dst (OrI dst src));
8879   effect(KILL cr);
8880 
8881   format %{ "orl     $dst, $src\t# int" %}
8882   opcode(0x81, 0x01); /* Opcode 81 /1 id */
8883   ins_encode(OpcSErm(dst, src), Con8or32(src));
8884   ins_pipe(ialu_reg);
8885 %}
8886 
8887 // Or Register with Memory
8888 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8889 %{
8890   match(Set dst (OrI dst (LoadI src)));
8891   effect(KILL cr);
8892 
8893   ins_cost(125);
8894   format %{ "orl     $dst, $src\t# int" %}
8895   opcode(0x0B);
8896   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8897   ins_pipe(ialu_reg_mem);
8898 %}
8899 
8900 // Or Memory with Register
8901 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8902 %{
8903   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8904   effect(KILL cr);
8905 
8906   ins_cost(150);
8907   format %{ "orl     $dst, $src\t# int" %}
8908   opcode(0x09); /* Opcode 09 /r */
8909   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8910   ins_pipe(ialu_mem_reg);
8911 %}
8912 
8913 // Or Memory with Immediate
8914 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
8915 %{
8916   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8917   effect(KILL cr);
8918 
8919   ins_cost(125);
8920   format %{ "orl     $dst, $src\t# int" %}
8921   opcode(0x81, 0x1); /* Opcode 81 /1 id */
8922   ins_encode(REX_mem(dst), OpcSE(src),
8923              RM_opc_mem(secondary, dst), Con8or32(src));
8924   ins_pipe(ialu_mem_imm);
8925 %}
8926 
8927 // Xor Instructions
8928 // Xor Register with Register
8929 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8930 %{
8931   match(Set dst (XorI dst src));
8932   effect(KILL cr);
8933 
8934   format %{ "xorl    $dst, $src\t# int" %}
8935   opcode(0x33);
8936   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8937   ins_pipe(ialu_reg_reg);
8938 %}
8939 
8940 // Xor Register with Immediate -1
8941 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
8942   match(Set dst (XorI dst imm));
8943 
8944   format %{ "not    $dst" %}
8945   ins_encode %{
8946      __ notl($dst$$Register);
8947   %}
8948   ins_pipe(ialu_reg);
8949 %}
8950 
8951 // Xor Register with Immediate
8952 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8953 %{
8954   match(Set dst (XorI dst src));
8955   effect(KILL cr);
8956 
8957   format %{ "xorl    $dst, $src\t# int" %}
8958   opcode(0x81, 0x06); /* Opcode 81 /6 id */
8959   ins_encode(OpcSErm(dst, src), Con8or32(src));
8960   ins_pipe(ialu_reg);
8961 %}
8962 
8963 // Xor Register with Memory
8964 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8965 %{
8966   match(Set dst (XorI dst (LoadI src)));
8967   effect(KILL cr);
8968 
8969   ins_cost(125);
8970   format %{ "xorl    $dst, $src\t# int" %}
8971   opcode(0x33);
8972   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8973   ins_pipe(ialu_reg_mem);
8974 %}
8975 
8976 // Xor Memory with Register
8977 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8978 %{
8979   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8980   effect(KILL cr);
8981 
8982   ins_cost(150);
8983   format %{ "xorl    $dst, $src\t# int" %}
8984   opcode(0x31); /* Opcode 31 /r */
8985   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8986   ins_pipe(ialu_mem_reg);
8987 %}
8988 
8989 // Xor Memory with Immediate
8990 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
8991 %{
8992   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8993   effect(KILL cr);
8994 
8995   ins_cost(125);
8996   format %{ "xorl    $dst, $src\t# int" %}
8997   opcode(0x81, 0x6); /* Opcode 81 /6 id */
8998   ins_encode(REX_mem(dst), OpcSE(src),
8999              RM_opc_mem(secondary, dst), Con8or32(src));
9000   ins_pipe(ialu_mem_imm);
9001 %}
9002 
9003 
9004 // Long Logical Instructions
9005 
9006 // And Instructions
9007 // And Register with Register
9008 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9009 %{
9010   match(Set dst (AndL dst src));
9011   effect(KILL cr);
9012 
9013   format %{ "andq    $dst, $src\t# long" %}
9014   opcode(0x23);
9015   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9016   ins_pipe(ialu_reg_reg);
9017 %}
9018 
9019 // And Register with Immediate 255
9020 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9021 %{
9022   match(Set dst (AndL dst src));
9023 
9024   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9025   opcode(0x0F, 0xB6);
9026   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9027   ins_pipe(ialu_reg);
9028 %}
9029 
9030 // And Register with Immediate 65535
9031 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9032 %{
9033   match(Set dst (AndL dst src));
9034 
9035   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9036   opcode(0x0F, 0xB7);
9037   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9038   ins_pipe(ialu_reg);
9039 %}
9040 
9041 // And Register with Immediate
9042 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9043 %{
9044   match(Set dst (AndL dst src));
9045   effect(KILL cr);
9046 
9047   format %{ "andq    $dst, $src\t# long" %}
9048   opcode(0x81, 0x04); /* Opcode 81 /4 */
9049   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9050   ins_pipe(ialu_reg);
9051 %}
9052 
9053 // And Register with Memory
9054 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9055 %{
9056   match(Set dst (AndL dst (LoadL src)));
9057   effect(KILL cr);
9058 
9059   ins_cost(125);
9060   format %{ "andq    $dst, $src\t# long" %}
9061   opcode(0x23);
9062   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9063   ins_pipe(ialu_reg_mem);
9064 %}
9065 
9066 // And Memory with Register
9067 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9068 %{
9069   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9070   effect(KILL cr);
9071 
9072   ins_cost(150);
9073   format %{ "andq    $dst, $src\t# long" %}
9074   opcode(0x21); /* Opcode 21 /r */
9075   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9076   ins_pipe(ialu_mem_reg);
9077 %}
9078 
9079 // And Memory with Immediate
9080 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9081 %{
9082   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9083   effect(KILL cr);
9084 
9085   ins_cost(125);
9086   format %{ "andq    $dst, $src\t# long" %}
9087   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9088   ins_encode(REX_mem_wide(dst), OpcSE(src),
9089              RM_opc_mem(secondary, dst), Con8or32(src));
9090   ins_pipe(ialu_mem_imm);
9091 %}
9092 
9093 // Or Instructions
9094 // Or Register with Register
9095 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9096 %{
9097   match(Set dst (OrL dst src));
9098   effect(KILL cr);
9099 
9100   format %{ "orq     $dst, $src\t# long" %}
9101   opcode(0x0B);
9102   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9103   ins_pipe(ialu_reg_reg);
9104 %}
9105 
9106 // Use any_RegP to match R15 (TLS register) without spilling.
9107 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9108   match(Set dst (OrL dst (CastP2X src)));
9109   effect(KILL cr);
9110 
9111   format %{ "orq     $dst, $src\t# long" %}
9112   opcode(0x0B);
9113   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9114   ins_pipe(ialu_reg_reg);
9115 %}
9116 
9117 
9118 // Or Register with Immediate
9119 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9120 %{
9121   match(Set dst (OrL dst src));
9122   effect(KILL cr);
9123 
9124   format %{ "orq     $dst, $src\t# long" %}
9125   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9126   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9127   ins_pipe(ialu_reg);
9128 %}
9129 
9130 // Or Register with Memory
9131 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9132 %{
9133   match(Set dst (OrL dst (LoadL src)));
9134   effect(KILL cr);
9135 
9136   ins_cost(125);
9137   format %{ "orq     $dst, $src\t# long" %}
9138   opcode(0x0B);
9139   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9140   ins_pipe(ialu_reg_mem);
9141 %}
9142 
9143 // Or Memory with Register
9144 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9145 %{
9146   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9147   effect(KILL cr);
9148 
9149   ins_cost(150);
9150   format %{ "orq     $dst, $src\t# long" %}
9151   opcode(0x09); /* Opcode 09 /r */
9152   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9153   ins_pipe(ialu_mem_reg);
9154 %}
9155 
9156 // Or Memory with Immediate
9157 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9158 %{
9159   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9160   effect(KILL cr);
9161 
9162   ins_cost(125);
9163   format %{ "orq     $dst, $src\t# long" %}
9164   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9165   ins_encode(REX_mem_wide(dst), OpcSE(src),
9166              RM_opc_mem(secondary, dst), Con8or32(src));
9167   ins_pipe(ialu_mem_imm);
9168 %}
9169 
9170 // Xor Instructions
9171 // Xor Register with Register
9172 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9173 %{
9174   match(Set dst (XorL dst src));
9175   effect(KILL cr);
9176 
9177   format %{ "xorq    $dst, $src\t# long" %}
9178   opcode(0x33);
9179   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9180   ins_pipe(ialu_reg_reg);
9181 %}
9182 
9183 // Xor Register with Immediate -1
9184 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9185   match(Set dst (XorL dst imm));
9186 
9187   format %{ "notq   $dst" %}
9188   ins_encode %{
9189      __ notq($dst$$Register);
9190   %}
9191   ins_pipe(ialu_reg);
9192 %}
9193 
9194 // Xor Register with Immediate
9195 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9196 %{
9197   match(Set dst (XorL dst src));
9198   effect(KILL cr);
9199 
9200   format %{ "xorq    $dst, $src\t# long" %}
9201   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9202   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9203   ins_pipe(ialu_reg);
9204 %}
9205 
9206 // Xor Register with Memory
9207 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9208 %{
9209   match(Set dst (XorL dst (LoadL src)));
9210   effect(KILL cr);
9211 
9212   ins_cost(125);
9213   format %{ "xorq    $dst, $src\t# long" %}
9214   opcode(0x33);
9215   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9216   ins_pipe(ialu_reg_mem);
9217 %}
9218 
9219 // Xor Memory with Register
9220 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9221 %{
9222   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9223   effect(KILL cr);
9224 
9225   ins_cost(150);
9226   format %{ "xorq    $dst, $src\t# long" %}
9227   opcode(0x31); /* Opcode 31 /r */
9228   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9229   ins_pipe(ialu_mem_reg);
9230 %}
9231 
9232 // Xor Memory with Immediate
9233 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9234 %{
9235   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9236   effect(KILL cr);
9237 
9238   ins_cost(125);
9239   format %{ "xorq    $dst, $src\t# long" %}
9240   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9241   ins_encode(REX_mem_wide(dst), OpcSE(src),
9242              RM_opc_mem(secondary, dst), Con8or32(src));
9243   ins_pipe(ialu_mem_imm);
9244 %}
9245 
9246 // Convert Int to Boolean
9247 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9248 %{
9249   match(Set dst (Conv2B src));
9250   effect(KILL cr);
9251 
9252   format %{ "testl   $src, $src\t# ci2b\n\t"
9253             "setnz   $dst\n\t"
9254             "movzbl  $dst, $dst" %}
9255   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9256              setNZ_reg(dst),
9257              REX_reg_breg(dst, dst), // movzbl
9258              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9259   ins_pipe(pipe_slow); // XXX
9260 %}
9261 
9262 // Convert Pointer to Boolean
9263 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9264 %{
9265   match(Set dst (Conv2B src));
9266   effect(KILL cr);
9267 
9268   format %{ "testq   $src, $src\t# cp2b\n\t"
9269             "setnz   $dst\n\t"
9270             "movzbl  $dst, $dst" %}
9271   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9272              setNZ_reg(dst),
9273              REX_reg_breg(dst, dst), // movzbl
9274              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9275   ins_pipe(pipe_slow); // XXX
9276 %}
9277 
9278 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9279 %{
9280   match(Set dst (CmpLTMask p q));
9281   effect(KILL cr);
9282 
9283   ins_cost(400);
9284   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9285             "setlt   $dst\n\t"
9286             "movzbl  $dst, $dst\n\t"
9287             "negl    $dst" %}
9288   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9289              setLT_reg(dst),
9290              REX_reg_breg(dst, dst), // movzbl
9291              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9292              neg_reg(dst));
9293   ins_pipe(pipe_slow);
9294 %}
9295 
9296 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9297 %{
9298   match(Set dst (CmpLTMask dst zero));
9299   effect(KILL cr);
9300 
9301   ins_cost(100);
9302   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9303   ins_encode %{
9304   __ sarl($dst$$Register, 31);
9305   %}
9306   ins_pipe(ialu_reg);
9307 %}
9308 
9309 /* Better to save a register than avoid a branch */
9310 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9311 %{
9312   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9313   effect(KILL cr);
9314   ins_cost(300);
9315   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
9316             "jge    done\n\t"
9317             "addl   $p,$y\n"
9318             "done:  " %}
9319   ins_encode %{
9320     Register Rp = $p$$Register;
9321     Register Rq = $q$$Register;
9322     Register Ry = $y$$Register;
9323     Label done;
9324     __ subl(Rp, Rq);
9325     __ jccb(Assembler::greaterEqual, done);
9326     __ addl(Rp, Ry);
9327     __ bind(done);
9328   %}
9329   ins_pipe(pipe_cmplt);
9330 %}
9331 
9332 /* Better to save a register than avoid a branch */
9333 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9334 %{
9335   match(Set y (AndI (CmpLTMask p q) y));
9336   effect(KILL cr);
9337 
9338   ins_cost(300);
9339 
9340   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
9341             "jlt      done\n\t"
9342             "xorl     $y, $y\n"
9343             "done:  " %}
9344   ins_encode %{
9345     Register Rp = $p$$Register;
9346     Register Rq = $q$$Register;
9347     Register Ry = $y$$Register;
9348     Label done;
9349     __ cmpl(Rp, Rq);
9350     __ jccb(Assembler::less, done);
9351     __ xorl(Ry, Ry);
9352     __ bind(done);
9353   %}
9354   ins_pipe(pipe_cmplt);
9355 %}
9356 
9357 
9358 //---------- FP Instructions------------------------------------------------
9359 
9360 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9361 %{
9362   match(Set cr (CmpF src1 src2));
9363 
9364   ins_cost(145);
9365   format %{ "ucomiss $src1, $src2\n\t"
9366             "jnp,s   exit\n\t"
9367             "pushfq\t# saw NaN, set CF\n\t"
9368             "andq    [rsp], #0xffffff2b\n\t"
9369             "popfq\n"
9370     "exit:" %}
9371   ins_encode %{
9372     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9373     emit_cmpfp_fixup(_masm);
9374   %}
9375   ins_pipe(pipe_slow);
9376 %}
9377 
9378 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9379   match(Set cr (CmpF src1 src2));
9380 
9381   ins_cost(100);
9382   format %{ "ucomiss $src1, $src2" %}
9383   ins_encode %{
9384     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9385   %}
9386   ins_pipe(pipe_slow);
9387 %}
9388 
9389 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9390 %{
9391   match(Set cr (CmpF src1 (LoadF src2)));
9392 
9393   ins_cost(145);
9394   format %{ "ucomiss $src1, $src2\n\t"
9395             "jnp,s   exit\n\t"
9396             "pushfq\t# saw NaN, set CF\n\t"
9397             "andq    [rsp], #0xffffff2b\n\t"
9398             "popfq\n"
9399     "exit:" %}
9400   ins_encode %{
9401     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9402     emit_cmpfp_fixup(_masm);
9403   %}
9404   ins_pipe(pipe_slow);
9405 %}
9406 
9407 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9408   match(Set cr (CmpF src1 (LoadF src2)));
9409 
9410   ins_cost(100);
9411   format %{ "ucomiss $src1, $src2" %}
9412   ins_encode %{
9413     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9414   %}
9415   ins_pipe(pipe_slow);
9416 %}
9417 
9418 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9419   match(Set cr (CmpF src con));
9420 
9421   ins_cost(145);
9422   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9423             "jnp,s   exit\n\t"
9424             "pushfq\t# saw NaN, set CF\n\t"
9425             "andq    [rsp], #0xffffff2b\n\t"
9426             "popfq\n"
9427     "exit:" %}
9428   ins_encode %{
9429     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9430     emit_cmpfp_fixup(_masm);
9431   %}
9432   ins_pipe(pipe_slow);
9433 %}
9434 
9435 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9436   match(Set cr (CmpF src con));
9437   ins_cost(100);
9438   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9439   ins_encode %{
9440     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9441   %}
9442   ins_pipe(pipe_slow);
9443 %}
9444 
9445 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9446 %{
9447   match(Set cr (CmpD src1 src2));
9448 
9449   ins_cost(145);
9450   format %{ "ucomisd $src1, $src2\n\t"
9451             "jnp,s   exit\n\t"
9452             "pushfq\t# saw NaN, set CF\n\t"
9453             "andq    [rsp], #0xffffff2b\n\t"
9454             "popfq\n"
9455     "exit:" %}
9456   ins_encode %{
9457     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9458     emit_cmpfp_fixup(_masm);
9459   %}
9460   ins_pipe(pipe_slow);
9461 %}
9462 
9463 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9464   match(Set cr (CmpD src1 src2));
9465 
9466   ins_cost(100);
9467   format %{ "ucomisd $src1, $src2 test" %}
9468   ins_encode %{
9469     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9470   %}
9471   ins_pipe(pipe_slow);
9472 %}
9473 
9474 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9475 %{
9476   match(Set cr (CmpD src1 (LoadD src2)));
9477 
9478   ins_cost(145);
9479   format %{ "ucomisd $src1, $src2\n\t"
9480             "jnp,s   exit\n\t"
9481             "pushfq\t# saw NaN, set CF\n\t"
9482             "andq    [rsp], #0xffffff2b\n\t"
9483             "popfq\n"
9484     "exit:" %}
9485   ins_encode %{
9486     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9487     emit_cmpfp_fixup(_masm);
9488   %}
9489   ins_pipe(pipe_slow);
9490 %}
9491 
9492 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
9493   match(Set cr (CmpD src1 (LoadD src2)));
9494 
9495   ins_cost(100);
9496   format %{ "ucomisd $src1, $src2" %}
9497   ins_encode %{
9498     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9499   %}
9500   ins_pipe(pipe_slow);
9501 %}
9502 
9503 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
9504   match(Set cr (CmpD src con));
9505 
9506   ins_cost(145);
9507   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9508             "jnp,s   exit\n\t"
9509             "pushfq\t# saw NaN, set CF\n\t"
9510             "andq    [rsp], #0xffffff2b\n\t"
9511             "popfq\n"
9512     "exit:" %}
9513   ins_encode %{
9514     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9515     emit_cmpfp_fixup(_masm);
9516   %}
9517   ins_pipe(pipe_slow);
9518 %}
9519 
9520 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
9521   match(Set cr (CmpD src con));
9522   ins_cost(100);
9523   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
9524   ins_encode %{
9525     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9526   %}
9527   ins_pipe(pipe_slow);
9528 %}
9529 
9530 // Compare into -1,0,1
9531 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9532 %{
9533   match(Set dst (CmpF3 src1 src2));
9534   effect(KILL cr);
9535 
9536   ins_cost(275);
9537   format %{ "ucomiss $src1, $src2\n\t"
9538             "movl    $dst, #-1\n\t"
9539             "jp,s    done\n\t"
9540             "jb,s    done\n\t"
9541             "setne   $dst\n\t"
9542             "movzbl  $dst, $dst\n"
9543     "done:" %}
9544   ins_encode %{
9545     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9546     emit_cmpfp3(_masm, $dst$$Register);
9547   %}
9548   ins_pipe(pipe_slow);
9549 %}
9550 
9551 // Compare into -1,0,1
9552 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9553 %{
9554   match(Set dst (CmpF3 src1 (LoadF src2)));
9555   effect(KILL cr);
9556 
9557   ins_cost(275);
9558   format %{ "ucomiss $src1, $src2\n\t"
9559             "movl    $dst, #-1\n\t"
9560             "jp,s    done\n\t"
9561             "jb,s    done\n\t"
9562             "setne   $dst\n\t"
9563             "movzbl  $dst, $dst\n"
9564     "done:" %}
9565   ins_encode %{
9566     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9567     emit_cmpfp3(_masm, $dst$$Register);
9568   %}
9569   ins_pipe(pipe_slow);
9570 %}
9571 
9572 // Compare into -1,0,1
9573 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
9574   match(Set dst (CmpF3 src con));
9575   effect(KILL cr);
9576 
9577   ins_cost(275);
9578   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9579             "movl    $dst, #-1\n\t"
9580             "jp,s    done\n\t"
9581             "jb,s    done\n\t"
9582             "setne   $dst\n\t"
9583             "movzbl  $dst, $dst\n"
9584     "done:" %}
9585   ins_encode %{
9586     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9587     emit_cmpfp3(_masm, $dst$$Register);
9588   %}
9589   ins_pipe(pipe_slow);
9590 %}
9591 
9592 // Compare into -1,0,1
9593 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9594 %{
9595   match(Set dst (CmpD3 src1 src2));
9596   effect(KILL cr);
9597 
9598   ins_cost(275);
9599   format %{ "ucomisd $src1, $src2\n\t"
9600             "movl    $dst, #-1\n\t"
9601             "jp,s    done\n\t"
9602             "jb,s    done\n\t"
9603             "setne   $dst\n\t"
9604             "movzbl  $dst, $dst\n"
9605     "done:" %}
9606   ins_encode %{
9607     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9608     emit_cmpfp3(_masm, $dst$$Register);
9609   %}
9610   ins_pipe(pipe_slow);
9611 %}
9612 
9613 // Compare into -1,0,1
9614 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9615 %{
9616   match(Set dst (CmpD3 src1 (LoadD src2)));
9617   effect(KILL cr);
9618 
9619   ins_cost(275);
9620   format %{ "ucomisd $src1, $src2\n\t"
9621             "movl    $dst, #-1\n\t"
9622             "jp,s    done\n\t"
9623             "jb,s    done\n\t"
9624             "setne   $dst\n\t"
9625             "movzbl  $dst, $dst\n"
9626     "done:" %}
9627   ins_encode %{
9628     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9629     emit_cmpfp3(_masm, $dst$$Register);
9630   %}
9631   ins_pipe(pipe_slow);
9632 %}
9633 
9634 // Compare into -1,0,1
9635 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
9636   match(Set dst (CmpD3 src con));
9637   effect(KILL cr);
9638 
9639   ins_cost(275);
9640   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9641             "movl    $dst, #-1\n\t"
9642             "jp,s    done\n\t"
9643             "jb,s    done\n\t"
9644             "setne   $dst\n\t"
9645             "movzbl  $dst, $dst\n"
9646     "done:" %}
9647   ins_encode %{
9648     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9649     emit_cmpfp3(_masm, $dst$$Register);
9650   %}
9651   ins_pipe(pipe_slow);
9652 %}
9653 
9654 // -----------Trig and Trancendental Instructions------------------------------
9655 instruct cosD_reg(regD dst) %{
9656   match(Set dst (CosD dst));
9657 
9658   format %{ "dcos   $dst\n\t" %}
9659   opcode(0xD9, 0xFF);
9660   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9661   ins_pipe( pipe_slow );
9662 %}
9663 
9664 instruct sinD_reg(regD dst) %{
9665   match(Set dst (SinD dst));
9666 
9667   format %{ "dsin   $dst\n\t" %}
9668   opcode(0xD9, 0xFE);
9669   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9670   ins_pipe( pipe_slow );
9671 %}
9672 
9673 instruct tanD_reg(regD dst) %{
9674   match(Set dst (TanD dst));
9675 
9676   format %{ "dtan   $dst\n\t" %}
9677   ins_encode( Push_SrcXD(dst),
9678               Opcode(0xD9), Opcode(0xF2),   //fptan
9679               Opcode(0xDD), Opcode(0xD8),   //fstp st
9680               Push_ResultXD(dst) );
9681   ins_pipe( pipe_slow );
9682 %}
9683 
9684 instruct log10D_reg(regD dst) %{
9685   // The source and result Double operands in XMM registers
9686   match(Set dst (Log10D dst));
9687   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9688   // fyl2x        ; compute log_10(2) * log_2(x)
9689   format %{ "fldlg2\t\t\t#Log10\n\t"
9690             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9691          %}
9692    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9693               Push_SrcXD(dst),
9694               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9695               Push_ResultXD(dst));
9696 
9697   ins_pipe( pipe_slow );
9698 %}
9699 
9700 instruct logD_reg(regD dst) %{
9701   // The source and result Double operands in XMM registers
9702   match(Set dst (LogD dst));
9703   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9704   // fyl2x        ; compute log_e(2) * log_2(x)
9705   format %{ "fldln2\t\t\t#Log_e\n\t"
9706             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9707          %}
9708   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9709               Push_SrcXD(dst),
9710               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9711               Push_ResultXD(dst));
9712   ins_pipe( pipe_slow );
9713 %}
9714 
9715 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9716   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9717   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9718   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9719   ins_encode %{
9720     __ subptr(rsp, 8);
9721     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9722     __ fld_d(Address(rsp, 0));
9723     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9724     __ fld_d(Address(rsp, 0));
9725     __ fast_pow();
9726     __ fstp_d(Address(rsp, 0));
9727     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9728     __ addptr(rsp, 8);
9729   %}
9730   ins_pipe( pipe_slow );
9731 %}
9732 
9733 instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9734   match(Set dst (ExpD src));
9735   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9736   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9737   ins_encode %{
9738     __ subptr(rsp, 8);
9739     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9740     __ fld_d(Address(rsp, 0));
9741     __ fast_exp();
9742     __ fstp_d(Address(rsp, 0));
9743     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9744     __ addptr(rsp, 8);
9745   %}
9746   ins_pipe( pipe_slow );
9747 %}
9748 
9749 //----------Arithmetic Conversion Instructions---------------------------------
9750 
9751 instruct roundFloat_nop(regF dst)
9752 %{
9753   match(Set dst (RoundFloat dst));
9754 
9755   ins_cost(0);
9756   ins_encode();
9757   ins_pipe(empty);
9758 %}
9759 
9760 instruct roundDouble_nop(regD dst)
9761 %{
9762   match(Set dst (RoundDouble dst));
9763 
9764   ins_cost(0);
9765   ins_encode();
9766   ins_pipe(empty);
9767 %}
9768 
9769 instruct convF2D_reg_reg(regD dst, regF src)
9770 %{
9771   match(Set dst (ConvF2D src));
9772 
9773   format %{ "cvtss2sd $dst, $src" %}
9774   ins_encode %{
9775     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
9776   %}
9777   ins_pipe(pipe_slow); // XXX
9778 %}
9779 
9780 instruct convF2D_reg_mem(regD dst, memory src)
9781 %{
9782   match(Set dst (ConvF2D (LoadF src)));
9783 
9784   format %{ "cvtss2sd $dst, $src" %}
9785   ins_encode %{
9786     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
9787   %}
9788   ins_pipe(pipe_slow); // XXX
9789 %}
9790 
9791 instruct convD2F_reg_reg(regF dst, regD src)
9792 %{
9793   match(Set dst (ConvD2F src));
9794 
9795   format %{ "cvtsd2ss $dst, $src" %}
9796   ins_encode %{
9797     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
9798   %}
9799   ins_pipe(pipe_slow); // XXX
9800 %}
9801 
9802 instruct convD2F_reg_mem(regF dst, memory src)
9803 %{
9804   match(Set dst (ConvD2F (LoadD src)));
9805 
9806   format %{ "cvtsd2ss $dst, $src" %}
9807   ins_encode %{
9808     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
9809   %}
9810   ins_pipe(pipe_slow); // XXX
9811 %}
9812 
9813 // XXX do mem variants
9814 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
9815 %{
9816   match(Set dst (ConvF2I src));
9817   effect(KILL cr);
9818 
9819   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
9820             "cmpl    $dst, #0x80000000\n\t"
9821             "jne,s   done\n\t"
9822             "subq    rsp, #8\n\t"
9823             "movss   [rsp], $src\n\t"
9824             "call    f2i_fixup\n\t"
9825             "popq    $dst\n"
9826     "done:   "%}
9827   ins_encode %{
9828     Label done;
9829     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
9830     __ cmpl($dst$$Register, 0x80000000);
9831     __ jccb(Assembler::notEqual, done);
9832     __ subptr(rsp, 8);
9833     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9834     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
9835     __ pop($dst$$Register);
9836     __ bind(done);
9837   %}
9838   ins_pipe(pipe_slow);
9839 %}
9840 
9841 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
9842 %{
9843   match(Set dst (ConvF2L src));
9844   effect(KILL cr);
9845 
9846   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
9847             "cmpq    $dst, [0x8000000000000000]\n\t"
9848             "jne,s   done\n\t"
9849             "subq    rsp, #8\n\t"
9850             "movss   [rsp], $src\n\t"
9851             "call    f2l_fixup\n\t"
9852             "popq    $dst\n"
9853     "done:   "%}
9854   ins_encode %{
9855     Label done;
9856     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
9857     __ cmp64($dst$$Register,
9858              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9859     __ jccb(Assembler::notEqual, done);
9860     __ subptr(rsp, 8);
9861     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9862     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
9863     __ pop($dst$$Register);
9864     __ bind(done);
9865   %}
9866   ins_pipe(pipe_slow);
9867 %}
9868 
9869 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
9870 %{
9871   match(Set dst (ConvD2I src));
9872   effect(KILL cr);
9873 
9874   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
9875             "cmpl    $dst, #0x80000000\n\t"
9876             "jne,s   done\n\t"
9877             "subq    rsp, #8\n\t"
9878             "movsd   [rsp], $src\n\t"
9879             "call    d2i_fixup\n\t"
9880             "popq    $dst\n"
9881     "done:   "%}
9882   ins_encode %{
9883     Label done;
9884     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
9885     __ cmpl($dst$$Register, 0x80000000);
9886     __ jccb(Assembler::notEqual, done);
9887     __ subptr(rsp, 8);
9888     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9889     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
9890     __ pop($dst$$Register);
9891     __ bind(done);
9892   %}
9893   ins_pipe(pipe_slow);
9894 %}
9895 
9896 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
9897 %{
9898   match(Set dst (ConvD2L src));
9899   effect(KILL cr);
9900 
9901   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
9902             "cmpq    $dst, [0x8000000000000000]\n\t"
9903             "jne,s   done\n\t"
9904             "subq    rsp, #8\n\t"
9905             "movsd   [rsp], $src\n\t"
9906             "call    d2l_fixup\n\t"
9907             "popq    $dst\n"
9908     "done:   "%}
9909   ins_encode %{
9910     Label done;
9911     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
9912     __ cmp64($dst$$Register,
9913              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9914     __ jccb(Assembler::notEqual, done);
9915     __ subptr(rsp, 8);
9916     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9917     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
9918     __ pop($dst$$Register);
9919     __ bind(done);
9920   %}
9921   ins_pipe(pipe_slow);
9922 %}
9923 
9924 instruct convI2F_reg_reg(regF dst, rRegI src)
9925 %{
9926   predicate(!UseXmmI2F);
9927   match(Set dst (ConvI2F src));
9928 
9929   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
9930   ins_encode %{
9931     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
9932   %}
9933   ins_pipe(pipe_slow); // XXX
9934 %}
9935 
9936 instruct convI2F_reg_mem(regF dst, memory src)
9937 %{
9938   match(Set dst (ConvI2F (LoadI src)));
9939 
9940   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
9941   ins_encode %{
9942     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
9943   %}
9944   ins_pipe(pipe_slow); // XXX
9945 %}
9946 
9947 instruct convI2D_reg_reg(regD dst, rRegI src)
9948 %{
9949   predicate(!UseXmmI2D);
9950   match(Set dst (ConvI2D src));
9951 
9952   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
9953   ins_encode %{
9954     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
9955   %}
9956   ins_pipe(pipe_slow); // XXX
9957 %}
9958 
9959 instruct convI2D_reg_mem(regD dst, memory src)
9960 %{
9961   match(Set dst (ConvI2D (LoadI src)));
9962 
9963   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
9964   ins_encode %{
9965     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
9966   %}
9967   ins_pipe(pipe_slow); // XXX
9968 %}
9969 
9970 instruct convXI2F_reg(regF dst, rRegI src)
9971 %{
9972   predicate(UseXmmI2F);
9973   match(Set dst (ConvI2F src));
9974 
9975   format %{ "movdl $dst, $src\n\t"
9976             "cvtdq2psl $dst, $dst\t# i2f" %}
9977   ins_encode %{
9978     __ movdl($dst$$XMMRegister, $src$$Register);
9979     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
9980   %}
9981   ins_pipe(pipe_slow); // XXX
9982 %}
9983 
9984 instruct convXI2D_reg(regD dst, rRegI src)
9985 %{
9986   predicate(UseXmmI2D);
9987   match(Set dst (ConvI2D src));
9988 
9989   format %{ "movdl $dst, $src\n\t"
9990             "cvtdq2pdl $dst, $dst\t# i2d" %}
9991   ins_encode %{
9992     __ movdl($dst$$XMMRegister, $src$$Register);
9993     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
9994   %}
9995   ins_pipe(pipe_slow); // XXX
9996 %}
9997 
9998 instruct convL2F_reg_reg(regF dst, rRegL src)
9999 %{
10000   match(Set dst (ConvL2F src));
10001 
10002   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10003   ins_encode %{
10004     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10005   %}
10006   ins_pipe(pipe_slow); // XXX
10007 %}
10008 
10009 instruct convL2F_reg_mem(regF dst, memory src)
10010 %{
10011   match(Set dst (ConvL2F (LoadL src)));
10012 
10013   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10014   ins_encode %{
10015     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10016   %}
10017   ins_pipe(pipe_slow); // XXX
10018 %}
10019 
10020 instruct convL2D_reg_reg(regD dst, rRegL src)
10021 %{
10022   match(Set dst (ConvL2D src));
10023 
10024   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10025   ins_encode %{
10026     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10027   %}
10028   ins_pipe(pipe_slow); // XXX
10029 %}
10030 
10031 instruct convL2D_reg_mem(regD dst, memory src)
10032 %{
10033   match(Set dst (ConvL2D (LoadL src)));
10034 
10035   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10036   ins_encode %{
10037     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10038   %}
10039   ins_pipe(pipe_slow); // XXX
10040 %}
10041 
10042 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10043 %{
10044   match(Set dst (ConvI2L src));
10045 
10046   ins_cost(125);
10047   format %{ "movslq  $dst, $src\t# i2l" %}
10048   ins_encode %{
10049     __ movslq($dst$$Register, $src$$Register);
10050   %}
10051   ins_pipe(ialu_reg_reg);
10052 %}
10053 
10054 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10055 // %{
10056 //   match(Set dst (ConvI2L src));
10057 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10058 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10059 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10060 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10061 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10062 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10063 
10064 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10065 //   ins_encode(enc_copy(dst, src));
10066 // //   opcode(0x63); // needs REX.W
10067 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10068 //   ins_pipe(ialu_reg_reg);
10069 // %}
10070 
10071 // Zero-extend convert int to long
10072 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10073 %{
10074   match(Set dst (AndL (ConvI2L src) mask));
10075 
10076   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10077   ins_encode %{
10078     if ($dst$$reg != $src$$reg) {
10079       __ movl($dst$$Register, $src$$Register);
10080     }
10081   %}
10082   ins_pipe(ialu_reg_reg);
10083 %}
10084 
10085 // Zero-extend convert int to long
10086 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10087 %{
10088   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10089 
10090   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10091   ins_encode %{
10092     __ movl($dst$$Register, $src$$Address);
10093   %}
10094   ins_pipe(ialu_reg_mem);
10095 %}
10096 
10097 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10098 %{
10099   match(Set dst (AndL src mask));
10100 
10101   format %{ "movl    $dst, $src\t# zero-extend long" %}
10102   ins_encode %{
10103     __ movl($dst$$Register, $src$$Register);
10104   %}
10105   ins_pipe(ialu_reg_reg);
10106 %}
10107 
10108 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10109 %{
10110   match(Set dst (ConvL2I src));
10111 
10112   format %{ "movl    $dst, $src\t# l2i" %}
10113   ins_encode %{
10114     __ movl($dst$$Register, $src$$Register);
10115   %}
10116   ins_pipe(ialu_reg_reg);
10117 %}
10118 
10119 
10120 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10121   match(Set dst (MoveF2I src));
10122   effect(DEF dst, USE src);
10123 
10124   ins_cost(125);
10125   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10126   ins_encode %{
10127     __ movl($dst$$Register, Address(rsp, $src$$disp));
10128   %}
10129   ins_pipe(ialu_reg_mem);
10130 %}
10131 
10132 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10133   match(Set dst (MoveI2F src));
10134   effect(DEF dst, USE src);
10135 
10136   ins_cost(125);
10137   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10138   ins_encode %{
10139     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10140   %}
10141   ins_pipe(pipe_slow);
10142 %}
10143 
10144 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10145   match(Set dst (MoveD2L src));
10146   effect(DEF dst, USE src);
10147 
10148   ins_cost(125);
10149   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10150   ins_encode %{
10151     __ movq($dst$$Register, Address(rsp, $src$$disp));
10152   %}
10153   ins_pipe(ialu_reg_mem);
10154 %}
10155 
10156 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10157   predicate(!UseXmmLoadAndClearUpper);
10158   match(Set dst (MoveL2D src));
10159   effect(DEF dst, USE src);
10160 
10161   ins_cost(125);
10162   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10163   ins_encode %{
10164     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10165   %}
10166   ins_pipe(pipe_slow);
10167 %}
10168 
10169 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10170   predicate(UseXmmLoadAndClearUpper);
10171   match(Set dst (MoveL2D src));
10172   effect(DEF dst, USE src);
10173 
10174   ins_cost(125);
10175   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10176   ins_encode %{
10177     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10178   %}
10179   ins_pipe(pipe_slow);
10180 %}
10181 
10182 
10183 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10184   match(Set dst (MoveF2I src));
10185   effect(DEF dst, USE src);
10186 
10187   ins_cost(95); // XXX
10188   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10189   ins_encode %{
10190     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10191   %}
10192   ins_pipe(pipe_slow);
10193 %}
10194 
10195 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10196   match(Set dst (MoveI2F src));
10197   effect(DEF dst, USE src);
10198 
10199   ins_cost(100);
10200   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10201   ins_encode %{
10202     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10203   %}
10204   ins_pipe( ialu_mem_reg );
10205 %}
10206 
10207 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10208   match(Set dst (MoveD2L src));
10209   effect(DEF dst, USE src);
10210 
10211   ins_cost(95); // XXX
10212   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10213   ins_encode %{
10214     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10215   %}
10216   ins_pipe(pipe_slow);
10217 %}
10218 
10219 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10220   match(Set dst (MoveL2D src));
10221   effect(DEF dst, USE src);
10222 
10223   ins_cost(100);
10224   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10225   ins_encode %{
10226     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10227   %}
10228   ins_pipe(ialu_mem_reg);
10229 %}
10230 
10231 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10232   match(Set dst (MoveF2I src));
10233   effect(DEF dst, USE src);
10234   ins_cost(85);
10235   format %{ "movd    $dst,$src\t# MoveF2I" %}
10236   ins_encode %{
10237     __ movdl($dst$$Register, $src$$XMMRegister);
10238   %}
10239   ins_pipe( pipe_slow );
10240 %}
10241 
10242 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10243   match(Set dst (MoveD2L src));
10244   effect(DEF dst, USE src);
10245   ins_cost(85);
10246   format %{ "movd    $dst,$src\t# MoveD2L" %}
10247   ins_encode %{
10248     __ movdq($dst$$Register, $src$$XMMRegister);
10249   %}
10250   ins_pipe( pipe_slow );
10251 %}
10252 
10253 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10254   match(Set dst (MoveI2F src));
10255   effect(DEF dst, USE src);
10256   ins_cost(100);
10257   format %{ "movd    $dst,$src\t# MoveI2F" %}
10258   ins_encode %{
10259     __ movdl($dst$$XMMRegister, $src$$Register);
10260   %}
10261   ins_pipe( pipe_slow );
10262 %}
10263 
10264 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10265   match(Set dst (MoveL2D src));
10266   effect(DEF dst, USE src);
10267   ins_cost(100);
10268   format %{ "movd    $dst,$src\t# MoveL2D" %}
10269   ins_encode %{
10270      __ movdq($dst$$XMMRegister, $src$$Register);
10271   %}
10272   ins_pipe( pipe_slow );
10273 %}
10274 
10275 
10276 // =======================================================================
10277 // fast clearing of an array
10278 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10279                   rFlagsReg cr)
10280 %{
10281   predicate(!UseFastStosb);
10282   match(Set dummy (ClearArray cnt base));
10283   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10284 
10285   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10286             "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
10287   ins_encode %{ 
10288     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10289   %}
10290   ins_pipe(pipe_slow);
10291 %}
10292 
10293 instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10294                         rFlagsReg cr)
10295 %{
10296   predicate(UseFastStosb);
10297   match(Set dummy (ClearArray cnt base));
10298   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10299   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10300             "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10301             "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
10302   ins_encode %{ 
10303     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10304   %}
10305   ins_pipe( pipe_slow );
10306 %}
10307 
10308 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10309                         rax_RegI result, regD tmp1, rFlagsReg cr)
10310 %{
10311   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10312   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10313 
10314   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10315   ins_encode %{
10316     __ string_compare($str1$$Register, $str2$$Register,
10317                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10318                       $tmp1$$XMMRegister);
10319   %}
10320   ins_pipe( pipe_slow );
10321 %}
10322 
10323 // fast search of substring with known size.
10324 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10325                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10326 %{
10327   predicate(UseSSE42Intrinsics);
10328   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10329   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10330 
10331   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10332   ins_encode %{
10333     int icnt2 = (int)$int_cnt2$$constant;
10334     if (icnt2 >= 8) {
10335       // IndexOf for constant substrings with size >= 8 elements
10336       // which don't need to be loaded through stack.
10337       __ string_indexofC8($str1$$Register, $str2$$Register,
10338                           $cnt1$$Register, $cnt2$$Register,
10339                           icnt2, $result$$Register,
10340                           $vec$$XMMRegister, $tmp$$Register);
10341     } else {
10342       // Small strings are loaded through stack if they cross page boundary.
10343       __ string_indexof($str1$$Register, $str2$$Register,
10344                         $cnt1$$Register, $cnt2$$Register,
10345                         icnt2, $result$$Register,
10346                         $vec$$XMMRegister, $tmp$$Register);
10347     }
10348   %}
10349   ins_pipe( pipe_slow );
10350 %}
10351 
10352 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10353                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10354 %{
10355   predicate(UseSSE42Intrinsics);
10356   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10357   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10358 
10359   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10360   ins_encode %{
10361     __ string_indexof($str1$$Register, $str2$$Register,
10362                       $cnt1$$Register, $cnt2$$Register,
10363                       (-1), $result$$Register,
10364                       $vec$$XMMRegister, $tmp$$Register);
10365   %}
10366   ins_pipe( pipe_slow );
10367 %}
10368 
10369 // fast string equals
10370 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10371                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10372 %{
10373   match(Set result (StrEquals (Binary str1 str2) cnt));
10374   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10375 
10376   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10377   ins_encode %{
10378     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10379                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10380                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10381   %}
10382   ins_pipe( pipe_slow );
10383 %}
10384 
10385 // fast array equals
10386 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10387                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10388 %{
10389   match(Set result (AryEq ary1 ary2));
10390   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10391   //ins_cost(300);
10392 
10393   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10394   ins_encode %{
10395     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10396                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10397                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10398   %}
10399   ins_pipe( pipe_slow );
10400 %}
10401 
10402 // encode char[] to byte[] in ISO_8859_1
10403 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
10404                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
10405                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
10406   match(Set result (EncodeISOArray src (Binary dst len)));
10407   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
10408 
10409   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
10410   ins_encode %{
10411     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
10412                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
10413                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
10414   %}
10415   ins_pipe( pipe_slow );
10416 %}
10417 
10418 
10419 //----------Control Flow Instructions------------------------------------------
10420 // Signed compare Instructions
10421 
10422 // XXX more variants!!
10423 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10424 %{
10425   match(Set cr (CmpI op1 op2));
10426   effect(DEF cr, USE op1, USE op2);
10427 
10428   format %{ "cmpl    $op1, $op2" %}
10429   opcode(0x3B);  /* Opcode 3B /r */
10430   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10431   ins_pipe(ialu_cr_reg_reg);
10432 %}
10433 
10434 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10435 %{
10436   match(Set cr (CmpI op1 op2));
10437 
10438   format %{ "cmpl    $op1, $op2" %}
10439   opcode(0x81, 0x07); /* Opcode 81 /7 */
10440   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10441   ins_pipe(ialu_cr_reg_imm);
10442 %}
10443 
10444 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10445 %{
10446   match(Set cr (CmpI op1 (LoadI op2)));
10447 
10448   ins_cost(500); // XXX
10449   format %{ "cmpl    $op1, $op2" %}
10450   opcode(0x3B); /* Opcode 3B /r */
10451   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10452   ins_pipe(ialu_cr_reg_mem);
10453 %}
10454 
10455 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10456 %{
10457   match(Set cr (CmpI src zero));
10458 
10459   format %{ "testl   $src, $src" %}
10460   opcode(0x85);
10461   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10462   ins_pipe(ialu_cr_reg_imm);
10463 %}
10464 
10465 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10466 %{
10467   match(Set cr (CmpI (AndI src con) zero));
10468 
10469   format %{ "testl   $src, $con" %}
10470   opcode(0xF7, 0x00);
10471   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10472   ins_pipe(ialu_cr_reg_imm);
10473 %}
10474 
10475 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10476 %{
10477   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10478 
10479   format %{ "testl   $src, $mem" %}
10480   opcode(0x85);
10481   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10482   ins_pipe(ialu_cr_reg_mem);
10483 %}
10484 
10485 // Unsigned compare Instructions; really, same as signed except they
10486 // produce an rFlagsRegU instead of rFlagsReg.
10487 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10488 %{
10489   match(Set cr (CmpU op1 op2));
10490 
10491   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10492   opcode(0x3B); /* Opcode 3B /r */
10493   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10494   ins_pipe(ialu_cr_reg_reg);
10495 %}
10496 
10497 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10498 %{
10499   match(Set cr (CmpU op1 op2));
10500 
10501   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10502   opcode(0x81,0x07); /* Opcode 81 /7 */
10503   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10504   ins_pipe(ialu_cr_reg_imm);
10505 %}
10506 
10507 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10508 %{
10509   match(Set cr (CmpU op1 (LoadI op2)));
10510 
10511   ins_cost(500); // XXX
10512   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10513   opcode(0x3B); /* Opcode 3B /r */
10514   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10515   ins_pipe(ialu_cr_reg_mem);
10516 %}
10517 
10518 // // // Cisc-spilled version of cmpU_rReg
10519 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10520 // //%{
10521 // //  match(Set cr (CmpU (LoadI op1) op2));
10522 // //
10523 // //  format %{ "CMPu   $op1,$op2" %}
10524 // //  ins_cost(500);
10525 // //  opcode(0x39);  /* Opcode 39 /r */
10526 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10527 // //%}
10528 
10529 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10530 %{
10531   match(Set cr (CmpU src zero));
10532 
10533   format %{ "testl  $src, $src\t# unsigned" %}
10534   opcode(0x85);
10535   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10536   ins_pipe(ialu_cr_reg_imm);
10537 %}
10538 
10539 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10540 %{
10541   match(Set cr (CmpP op1 op2));
10542 
10543   format %{ "cmpq    $op1, $op2\t# ptr" %}
10544   opcode(0x3B); /* Opcode 3B /r */
10545   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10546   ins_pipe(ialu_cr_reg_reg);
10547 %}
10548 
10549 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10550 %{
10551   match(Set cr (CmpP op1 (LoadP op2)));
10552 
10553   ins_cost(500); // XXX
10554   format %{ "cmpq    $op1, $op2\t# ptr" %}
10555   opcode(0x3B); /* Opcode 3B /r */
10556   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10557   ins_pipe(ialu_cr_reg_mem);
10558 %}
10559 
10560 // // // Cisc-spilled version of cmpP_rReg
10561 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10562 // //%{
10563 // //  match(Set cr (CmpP (LoadP op1) op2));
10564 // //
10565 // //  format %{ "CMPu   $op1,$op2" %}
10566 // //  ins_cost(500);
10567 // //  opcode(0x39);  /* Opcode 39 /r */
10568 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10569 // //%}
10570 
10571 // XXX this is generalized by compP_rReg_mem???
10572 // Compare raw pointer (used in out-of-heap check).
10573 // Only works because non-oop pointers must be raw pointers
10574 // and raw pointers have no anti-dependencies.
10575 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10576 %{
10577   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
10578   match(Set cr (CmpP op1 (LoadP op2)));
10579 
10580   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10581   opcode(0x3B); /* Opcode 3B /r */
10582   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10583   ins_pipe(ialu_cr_reg_mem);
10584 %}
10585 
10586 // This will generate a signed flags result. This should be OK since
10587 // any compare to a zero should be eq/neq.
10588 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10589 %{
10590   match(Set cr (CmpP src zero));
10591 
10592   format %{ "testq   $src, $src\t# ptr" %}
10593   opcode(0x85);
10594   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10595   ins_pipe(ialu_cr_reg_imm);
10596 %}
10597 
10598 // This will generate a signed flags result. This should be OK since
10599 // any compare to a zero should be eq/neq.
10600 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
10601 %{
10602   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
10603   match(Set cr (CmpP (LoadP op) zero));
10604 
10605   ins_cost(500); // XXX
10606   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10607   opcode(0xF7); /* Opcode F7 /0 */
10608   ins_encode(REX_mem_wide(op),
10609              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10610   ins_pipe(ialu_cr_reg_imm);
10611 %}
10612 
10613 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
10614 %{
10615   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
10616   match(Set cr (CmpP (LoadP mem) zero));
10617 
10618   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
10619   ins_encode %{
10620     __ cmpq(r12, $mem$$Address);
10621   %}
10622   ins_pipe(ialu_cr_reg_mem);
10623 %}
10624 
10625 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
10626 %{
10627   match(Set cr (CmpN op1 op2));
10628 
10629   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10630   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
10631   ins_pipe(ialu_cr_reg_reg);
10632 %}
10633 
10634 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
10635 %{
10636   match(Set cr (CmpN src (LoadN mem)));
10637 
10638   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
10639   ins_encode %{
10640     __ cmpl($src$$Register, $mem$$Address);
10641   %}
10642   ins_pipe(ialu_cr_reg_mem);
10643 %}
10644 
10645 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
10646   match(Set cr (CmpN op1 op2));
10647 
10648   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10649   ins_encode %{
10650     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
10651   %}
10652   ins_pipe(ialu_cr_reg_imm);
10653 %}
10654 
10655 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
10656 %{
10657   match(Set cr (CmpN src (LoadN mem)));
10658 
10659   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
10660   ins_encode %{
10661     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
10662   %}
10663   ins_pipe(ialu_cr_reg_mem);
10664 %}
10665 
10666 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
10667   match(Set cr (CmpN op1 op2));
10668 
10669   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
10670   ins_encode %{
10671     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
10672   %}
10673   ins_pipe(ialu_cr_reg_imm);
10674 %}
10675 
10676 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
10677 %{
10678   match(Set cr (CmpN src (LoadNKlass mem)));
10679 
10680   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
10681   ins_encode %{
10682     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
10683   %}
10684   ins_pipe(ialu_cr_reg_mem);
10685 %}
10686 
10687 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
10688   match(Set cr (CmpN src zero));
10689 
10690   format %{ "testl   $src, $src\t# compressed ptr" %}
10691   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
10692   ins_pipe(ialu_cr_reg_imm);
10693 %}
10694 
10695 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
10696 %{
10697   predicate(Universe::narrow_oop_base() != NULL);
10698   match(Set cr (CmpN (LoadN mem) zero));
10699 
10700   ins_cost(500); // XXX
10701   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
10702   ins_encode %{
10703     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
10704   %}
10705   ins_pipe(ialu_cr_reg_mem);
10706 %}
10707 
10708 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
10709 %{
10710   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
10711   match(Set cr (CmpN (LoadN mem) zero));
10712 
10713   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
10714   ins_encode %{
10715     __ cmpl(r12, $mem$$Address);
10716   %}
10717   ins_pipe(ialu_cr_reg_mem);
10718 %}
10719 
10720 // Yanked all unsigned pointer compare operations.
10721 // Pointer compares are done with CmpP which is already unsigned.
10722 
10723 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10724 %{
10725   match(Set cr (CmpL op1 op2));
10726 
10727   format %{ "cmpq    $op1, $op2" %}
10728   opcode(0x3B);  /* Opcode 3B /r */
10729   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10730   ins_pipe(ialu_cr_reg_reg);
10731 %}
10732 
10733 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10734 %{
10735   match(Set cr (CmpL op1 op2));
10736 
10737   format %{ "cmpq    $op1, $op2" %}
10738   opcode(0x81, 0x07); /* Opcode 81 /7 */
10739   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10740   ins_pipe(ialu_cr_reg_imm);
10741 %}
10742 
10743 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10744 %{
10745   match(Set cr (CmpL op1 (LoadL op2)));
10746 
10747   format %{ "cmpq    $op1, $op2" %}
10748   opcode(0x3B); /* Opcode 3B /r */
10749   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10750   ins_pipe(ialu_cr_reg_mem);
10751 %}
10752 
10753 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10754 %{
10755   match(Set cr (CmpL src zero));
10756 
10757   format %{ "testq   $src, $src" %}
10758   opcode(0x85);
10759   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10760   ins_pipe(ialu_cr_reg_imm);
10761 %}
10762 
10763 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
10764 %{
10765   match(Set cr (CmpL (AndL src con) zero));
10766 
10767   format %{ "testq   $src, $con\t# long" %}
10768   opcode(0xF7, 0x00);
10769   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
10770   ins_pipe(ialu_cr_reg_imm);
10771 %}
10772 
10773 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
10774 %{
10775   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
10776 
10777   format %{ "testq   $src, $mem" %}
10778   opcode(0x85);
10779   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
10780   ins_pipe(ialu_cr_reg_mem);
10781 %}
10782 
10783 // Manifest a CmpL result in an integer register.  Very painful.
10784 // This is the test to avoid.
10785 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
10786 %{
10787   match(Set dst (CmpL3 src1 src2));
10788   effect(KILL flags);
10789 
10790   ins_cost(275); // XXX
10791   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
10792             "movl    $dst, -1\n\t"
10793             "jl,s    done\n\t"
10794             "setne   $dst\n\t"
10795             "movzbl  $dst, $dst\n\t"
10796     "done:" %}
10797   ins_encode(cmpl3_flag(src1, src2, dst));
10798   ins_pipe(pipe_slow);
10799 %}
10800 
10801 //----------Max and Min--------------------------------------------------------
10802 // Min Instructions
10803 
10804 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
10805 %{
10806   effect(USE_DEF dst, USE src, USE cr);
10807 
10808   format %{ "cmovlgt $dst, $src\t# min" %}
10809   opcode(0x0F, 0x4F);
10810   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10811   ins_pipe(pipe_cmov_reg);
10812 %}
10813 
10814 
10815 instruct minI_rReg(rRegI dst, rRegI src)
10816 %{
10817   match(Set dst (MinI dst src));
10818 
10819   ins_cost(200);
10820   expand %{
10821     rFlagsReg cr;
10822     compI_rReg(cr, dst, src);
10823     cmovI_reg_g(dst, src, cr);
10824   %}
10825 %}
10826 
10827 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
10828 %{
10829   effect(USE_DEF dst, USE src, USE cr);
10830 
10831   format %{ "cmovllt $dst, $src\t# max" %}
10832   opcode(0x0F, 0x4C);
10833   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10834   ins_pipe(pipe_cmov_reg);
10835 %}
10836 
10837 
10838 instruct maxI_rReg(rRegI dst, rRegI src)
10839 %{
10840   match(Set dst (MaxI dst src));
10841 
10842   ins_cost(200);
10843   expand %{
10844     rFlagsReg cr;
10845     compI_rReg(cr, dst, src);
10846     cmovI_reg_l(dst, src, cr);
10847   %}
10848 %}
10849 
10850 // ============================================================================
10851 // Branch Instructions
10852 
10853 // Jump Direct - Label defines a relative address from JMP+1
10854 instruct jmpDir(label labl)
10855 %{
10856   match(Goto);
10857   effect(USE labl);
10858 
10859   ins_cost(300);
10860   format %{ "jmp     $labl" %}
10861   size(5);
10862   ins_encode %{
10863     Label* L = $labl$$label;
10864     __ jmp(*L, false); // Always long jump
10865   %}
10866   ins_pipe(pipe_jmp);
10867 %}
10868 
10869 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10870 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
10871 %{
10872   match(If cop cr);
10873   effect(USE labl);
10874 
10875   ins_cost(300);
10876   format %{ "j$cop     $labl" %}
10877   size(6);
10878   ins_encode %{
10879     Label* L = $labl$$label;
10880     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10881   %}
10882   ins_pipe(pipe_jcc);
10883 %}
10884 
10885 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10886 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
10887 %{
10888   match(CountedLoopEnd cop cr);
10889   effect(USE labl);
10890 
10891   ins_cost(300);
10892   format %{ "j$cop     $labl\t# loop end" %}
10893   size(6);
10894   ins_encode %{
10895     Label* L = $labl$$label;
10896     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10897   %}
10898   ins_pipe(pipe_jcc);
10899 %}
10900 
10901 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10902 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
10903   match(CountedLoopEnd cop cmp);
10904   effect(USE labl);
10905 
10906   ins_cost(300);
10907   format %{ "j$cop,u   $labl\t# loop end" %}
10908   size(6);
10909   ins_encode %{
10910     Label* L = $labl$$label;
10911     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10912   %}
10913   ins_pipe(pipe_jcc);
10914 %}
10915 
10916 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
10917   match(CountedLoopEnd cop cmp);
10918   effect(USE labl);
10919 
10920   ins_cost(200);
10921   format %{ "j$cop,u   $labl\t# loop end" %}
10922   size(6);
10923   ins_encode %{
10924     Label* L = $labl$$label;
10925     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10926   %}
10927   ins_pipe(pipe_jcc);
10928 %}
10929 
10930 // Jump Direct Conditional - using unsigned comparison
10931 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
10932   match(If cop cmp);
10933   effect(USE labl);
10934 
10935   ins_cost(300);
10936   format %{ "j$cop,u  $labl" %}
10937   size(6);
10938   ins_encode %{
10939     Label* L = $labl$$label;
10940     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10941   %}
10942   ins_pipe(pipe_jcc);
10943 %}
10944 
10945 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
10946   match(If cop cmp);
10947   effect(USE labl);
10948 
10949   ins_cost(200);
10950   format %{ "j$cop,u  $labl" %}
10951   size(6);
10952   ins_encode %{
10953     Label* L = $labl$$label;
10954     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10955   %}
10956   ins_pipe(pipe_jcc);
10957 %}
10958 
10959 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
10960   match(If cop cmp);
10961   effect(USE labl);
10962 
10963   ins_cost(200);
10964   format %{ $$template
10965     if ($cop$$cmpcode == Assembler::notEqual) {
10966       $$emit$$"jp,u   $labl\n\t"
10967       $$emit$$"j$cop,u   $labl"
10968     } else {
10969       $$emit$$"jp,u   done\n\t"
10970       $$emit$$"j$cop,u   $labl\n\t"
10971       $$emit$$"done:"
10972     }
10973   %}
10974   ins_encode %{
10975     Label* l = $labl$$label;
10976     if ($cop$$cmpcode == Assembler::notEqual) {
10977       __ jcc(Assembler::parity, *l, false);
10978       __ jcc(Assembler::notEqual, *l, false);
10979     } else if ($cop$$cmpcode == Assembler::equal) {
10980       Label done;
10981       __ jccb(Assembler::parity, done);
10982       __ jcc(Assembler::equal, *l, false);
10983       __ bind(done);
10984     } else {
10985        ShouldNotReachHere();
10986     }
10987   %}
10988   ins_pipe(pipe_jcc);
10989 %}
10990 
10991 // ============================================================================
10992 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
10993 // superklass array for an instance of the superklass.  Set a hidden
10994 // internal cache on a hit (cache is checked with exposed code in
10995 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
10996 // encoding ALSO sets flags.
10997 
10998 instruct partialSubtypeCheck(rdi_RegP result,
10999                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11000                              rFlagsReg cr)
11001 %{
11002   match(Set result (PartialSubtypeCheck sub super));
11003   effect(KILL rcx, KILL cr);
11004 
11005   ins_cost(1100);  // slightly larger than the next version
11006   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11007             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11008             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11009             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11010             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11011             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11012             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11013     "miss:\t" %}
11014 
11015   opcode(0x1); // Force a XOR of RDI
11016   ins_encode(enc_PartialSubtypeCheck());
11017   ins_pipe(pipe_slow);
11018 %}
11019 
11020 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11021                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11022                                      immP0 zero,
11023                                      rdi_RegP result)
11024 %{
11025   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11026   effect(KILL rcx, KILL result);
11027 
11028   ins_cost(1000);
11029   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11030             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11031             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11032             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11033             "jne,s   miss\t\t# Missed: flags nz\n\t"
11034             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11035     "miss:\t" %}
11036 
11037   opcode(0x0); // No need to XOR RDI
11038   ins_encode(enc_PartialSubtypeCheck());
11039   ins_pipe(pipe_slow);
11040 %}
11041 
11042 // ============================================================================
11043 // Branch Instructions -- short offset versions
11044 //
11045 // These instructions are used to replace jumps of a long offset (the default
11046 // match) with jumps of a shorter offset.  These instructions are all tagged
11047 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11048 // match rules in general matching.  Instead, the ADLC generates a conversion
11049 // method in the MachNode which can be used to do in-place replacement of the
11050 // long variant with the shorter variant.  The compiler will determine if a
11051 // branch can be taken by the is_short_branch_offset() predicate in the machine
11052 // specific code section of the file.
11053 
11054 // Jump Direct - Label defines a relative address from JMP+1
11055 instruct jmpDir_short(label labl) %{
11056   match(Goto);
11057   effect(USE labl);
11058 
11059   ins_cost(300);
11060   format %{ "jmp,s   $labl" %}
11061   size(2);
11062   ins_encode %{
11063     Label* L = $labl$$label;
11064     __ jmpb(*L);
11065   %}
11066   ins_pipe(pipe_jmp);
11067   ins_short_branch(1);
11068 %}
11069 
11070 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11071 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11072   match(If cop cr);
11073   effect(USE labl);
11074 
11075   ins_cost(300);
11076   format %{ "j$cop,s   $labl" %}
11077   size(2);
11078   ins_encode %{
11079     Label* L = $labl$$label;
11080     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11081   %}
11082   ins_pipe(pipe_jcc);
11083   ins_short_branch(1);
11084 %}
11085 
11086 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11087 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11088   match(CountedLoopEnd cop cr);
11089   effect(USE labl);
11090 
11091   ins_cost(300);
11092   format %{ "j$cop,s   $labl\t# loop end" %}
11093   size(2);
11094   ins_encode %{
11095     Label* L = $labl$$label;
11096     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11097   %}
11098   ins_pipe(pipe_jcc);
11099   ins_short_branch(1);
11100 %}
11101 
11102 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11103 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11104   match(CountedLoopEnd cop cmp);
11105   effect(USE labl);
11106 
11107   ins_cost(300);
11108   format %{ "j$cop,us  $labl\t# loop end" %}
11109   size(2);
11110   ins_encode %{
11111     Label* L = $labl$$label;
11112     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11113   %}
11114   ins_pipe(pipe_jcc);
11115   ins_short_branch(1);
11116 %}
11117 
11118 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11119   match(CountedLoopEnd cop cmp);
11120   effect(USE labl);
11121 
11122   ins_cost(300);
11123   format %{ "j$cop,us  $labl\t# loop end" %}
11124   size(2);
11125   ins_encode %{
11126     Label* L = $labl$$label;
11127     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11128   %}
11129   ins_pipe(pipe_jcc);
11130   ins_short_branch(1);
11131 %}
11132 
11133 // Jump Direct Conditional - using unsigned comparison
11134 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11135   match(If cop cmp);
11136   effect(USE labl);
11137 
11138   ins_cost(300);
11139   format %{ "j$cop,us  $labl" %}
11140   size(2);
11141   ins_encode %{
11142     Label* L = $labl$$label;
11143     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11144   %}
11145   ins_pipe(pipe_jcc);
11146   ins_short_branch(1);
11147 %}
11148 
11149 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11150   match(If cop cmp);
11151   effect(USE labl);
11152 
11153   ins_cost(300);
11154   format %{ "j$cop,us  $labl" %}
11155   size(2);
11156   ins_encode %{
11157     Label* L = $labl$$label;
11158     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11159   %}
11160   ins_pipe(pipe_jcc);
11161   ins_short_branch(1);
11162 %}
11163 
11164 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11165   match(If cop cmp);
11166   effect(USE labl);
11167 
11168   ins_cost(300);
11169   format %{ $$template
11170     if ($cop$$cmpcode == Assembler::notEqual) {
11171       $$emit$$"jp,u,s   $labl\n\t"
11172       $$emit$$"j$cop,u,s   $labl"
11173     } else {
11174       $$emit$$"jp,u,s   done\n\t"
11175       $$emit$$"j$cop,u,s  $labl\n\t"
11176       $$emit$$"done:"
11177     }
11178   %}
11179   size(4);
11180   ins_encode %{
11181     Label* l = $labl$$label;
11182     if ($cop$$cmpcode == Assembler::notEqual) {
11183       __ jccb(Assembler::parity, *l);
11184       __ jccb(Assembler::notEqual, *l);
11185     } else if ($cop$$cmpcode == Assembler::equal) {
11186       Label done;
11187       __ jccb(Assembler::parity, done);
11188       __ jccb(Assembler::equal, *l);
11189       __ bind(done);
11190     } else {
11191        ShouldNotReachHere();
11192     }
11193   %}
11194   ins_pipe(pipe_jcc);
11195   ins_short_branch(1);
11196 %}
11197 
11198 // ============================================================================
11199 // inlined locking and unlocking
11200 
11201 instruct cmpFastLock(rFlagsReg cr,
11202                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11203 %{
11204   match(Set cr (FastLock object box));
11205   effect(TEMP tmp, TEMP scr, USE_KILL box);
11206 
11207   ins_cost(300);
11208   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11209   ins_encode(Fast_Lock(object, box, tmp, scr));
11210   ins_pipe(pipe_slow);
11211 %}
11212 
11213 instruct cmpFastUnlock(rFlagsReg cr,
11214                        rRegP object, rax_RegP box, rRegP tmp)
11215 %{
11216   match(Set cr (FastUnlock object box));
11217   effect(TEMP tmp, USE_KILL box);
11218 
11219   ins_cost(300);
11220   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11221   ins_encode(Fast_Unlock(object, box, tmp));
11222   ins_pipe(pipe_slow);
11223 %}
11224 
11225 
11226 // ============================================================================
11227 // Safepoint Instructions
11228 instruct safePoint_poll(rFlagsReg cr)
11229 %{
11230   predicate(!Assembler::is_polling_page_far());
11231   match(SafePoint);
11232   effect(KILL cr);
11233 
11234   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11235             "# Safepoint: poll for GC" %}
11236   ins_cost(125);
11237   ins_encode %{
11238     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11239     __ testl(rax, addr);
11240   %}
11241   ins_pipe(ialu_reg_mem);
11242 %}
11243 
11244 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11245 %{
11246   predicate(Assembler::is_polling_page_far());
11247   match(SafePoint poll);
11248   effect(KILL cr, USE poll);
11249 
11250   format %{ "testl  rax, [$poll]\t"
11251             "# Safepoint: poll for GC" %}
11252   ins_cost(125);
11253   ins_encode %{
11254     __ relocate(relocInfo::poll_type);
11255     __ testl(rax, Address($poll$$Register, 0));
11256   %}
11257   ins_pipe(ialu_reg_mem);
11258 %}
11259 
11260 // ============================================================================
11261 // Procedure Call/Return Instructions
11262 // Call Java Static Instruction
11263 // Note: If this code changes, the corresponding ret_addr_offset() and
11264 //       compute_padding() functions will have to be adjusted.
11265 instruct CallStaticJavaDirect(method meth) %{
11266   match(CallStaticJava);
11267   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11268   effect(USE meth);
11269 
11270   ins_cost(300);
11271   format %{ "call,static " %}
11272   opcode(0xE8); /* E8 cd */
11273   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
11274   ins_pipe(pipe_slow);
11275   ins_alignment(4);
11276 %}
11277 
11278 // Call Java Static Instruction (method handle version)
11279 // Note: If this code changes, the corresponding ret_addr_offset() and
11280 //       compute_padding() functions will have to be adjusted.
11281 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11282   match(CallStaticJava);
11283   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11284   effect(USE meth);
11285   // RBP is saved by all callees (for interpreter stack correction).
11286   // We use it here for a similar purpose, in {preserve,restore}_SP.
11287 
11288   ins_cost(300);
11289   format %{ "call,static/MethodHandle " %}
11290   opcode(0xE8); /* E8 cd */
11291   ins_encode(clear_avx, preserve_SP,
11292              Java_Static_Call(meth),
11293              restore_SP,
11294              call_epilog);
11295   ins_pipe(pipe_slow);
11296   ins_alignment(4);
11297 %}
11298 
11299 // Call Java Dynamic Instruction
11300 // Note: If this code changes, the corresponding ret_addr_offset() and
11301 //       compute_padding() functions will have to be adjusted.
11302 instruct CallDynamicJavaDirect(method meth)
11303 %{
11304   match(CallDynamicJava);
11305   effect(USE meth);
11306 
11307   ins_cost(300);
11308   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11309             "call,dynamic " %}
11310   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
11311   ins_pipe(pipe_slow);
11312   ins_alignment(4);
11313 %}
11314 
11315 // Call Runtime Instruction
11316 instruct CallRuntimeDirect(method meth)
11317 %{
11318   match(CallRuntime);
11319   effect(USE meth);
11320 
11321   ins_cost(300);
11322   format %{ "call,runtime " %}
11323   ins_encode(clear_avx, Java_To_Runtime(meth));
11324   ins_pipe(pipe_slow);
11325 %}
11326 
11327 // Call runtime without safepoint
11328 instruct CallLeafDirect(method meth)
11329 %{
11330   match(CallLeaf);
11331   effect(USE meth);
11332 
11333   ins_cost(300);
11334   format %{ "call_leaf,runtime " %}
11335   ins_encode(clear_avx, Java_To_Runtime(meth));
11336   ins_pipe(pipe_slow);
11337 %}
11338 
11339 // Call runtime without safepoint
11340 instruct CallLeafNoFPDirect(method meth)
11341 %{
11342   match(CallLeafNoFP);
11343   effect(USE meth);
11344 
11345   ins_cost(300);
11346   format %{ "call_leaf_nofp,runtime " %}
11347   ins_encode(Java_To_Runtime(meth));
11348   ins_pipe(pipe_slow);
11349 %}
11350 
11351 // Return Instruction
11352 // Remove the return address & jump to it.
11353 // Notice: We always emit a nop after a ret to make sure there is room
11354 // for safepoint patching
11355 instruct Ret()
11356 %{
11357   match(Return);
11358 
11359   format %{ "ret" %}
11360   opcode(0xC3);
11361   ins_encode(OpcP);
11362   ins_pipe(pipe_jmp);
11363 %}
11364 
11365 // Tail Call; Jump from runtime stub to Java code.
11366 // Also known as an 'interprocedural jump'.
11367 // Target of jump will eventually return to caller.
11368 // TailJump below removes the return address.
11369 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11370 %{
11371   match(TailCall jump_target method_oop);
11372 
11373   ins_cost(300);
11374   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11375   opcode(0xFF, 0x4); /* Opcode FF /4 */
11376   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11377   ins_pipe(pipe_jmp);
11378 %}
11379 
11380 // Tail Jump; remove the return address; jump to target.
11381 // TailCall above leaves the return address around.
11382 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11383 %{
11384   match(TailJump jump_target ex_oop);
11385 
11386   ins_cost(300);
11387   format %{ "popq    rdx\t# pop return address\n\t"
11388             "jmp     $jump_target" %}
11389   opcode(0xFF, 0x4); /* Opcode FF /4 */
11390   ins_encode(Opcode(0x5a), // popq rdx
11391              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11392   ins_pipe(pipe_jmp);
11393 %}
11394 
11395 // Create exception oop: created by stack-crawling runtime code.
11396 // Created exception is now available to this handler, and is setup
11397 // just prior to jumping to this handler.  No code emitted.
11398 instruct CreateException(rax_RegP ex_oop)
11399 %{
11400   match(Set ex_oop (CreateEx));
11401 
11402   size(0);
11403   // use the following format syntax
11404   format %{ "# exception oop is in rax; no code emitted" %}
11405   ins_encode();
11406   ins_pipe(empty);
11407 %}
11408 
11409 // Rethrow exception:
11410 // The exception oop will come in the first argument position.
11411 // Then JUMP (not call) to the rethrow stub code.
11412 instruct RethrowException()
11413 %{
11414   match(Rethrow);
11415 
11416   // use the following format syntax
11417   format %{ "jmp     rethrow_stub" %}
11418   ins_encode(enc_rethrow);
11419   ins_pipe(pipe_jmp);
11420 %}
11421 
11422 
11423 // ============================================================================
11424 // This name is KNOWN by the ADLC and cannot be changed.
11425 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
11426 // for this guy.
11427 instruct tlsLoadP(r15_RegP dst) %{
11428   match(Set dst (ThreadLocal));
11429   effect(DEF dst);
11430 
11431   size(0);
11432   format %{ "# TLS is in R15" %}
11433   ins_encode( /*empty encoding*/ );
11434   ins_pipe(ialu_reg_reg);
11435 %}
11436 
11437 
11438 //----------PEEPHOLE RULES-----------------------------------------------------
11439 // These must follow all instruction definitions as they use the names
11440 // defined in the instructions definitions.
11441 //
11442 // peepmatch ( root_instr_name [preceding_instruction]* );
11443 //
11444 // peepconstraint %{
11445 // (instruction_number.operand_name relational_op instruction_number.operand_name
11446 //  [, ...] );
11447 // // instruction numbers are zero-based using left to right order in peepmatch
11448 //
11449 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11450 // // provide an instruction_number.operand_name for each operand that appears
11451 // // in the replacement instruction's match rule
11452 //
11453 // ---------VM FLAGS---------------------------------------------------------
11454 //
11455 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11456 //
11457 // Each peephole rule is given an identifying number starting with zero and
11458 // increasing by one in the order seen by the parser.  An individual peephole
11459 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11460 // on the command-line.
11461 //
11462 // ---------CURRENT LIMITATIONS----------------------------------------------
11463 //
11464 // Only match adjacent instructions in same basic block
11465 // Only equality constraints
11466 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11467 // Only one replacement instruction
11468 //
11469 // ---------EXAMPLE----------------------------------------------------------
11470 //
11471 // // pertinent parts of existing instructions in architecture description
11472 // instruct movI(rRegI dst, rRegI src)
11473 // %{
11474 //   match(Set dst (CopyI src));
11475 // %}
11476 //
11477 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11478 // %{
11479 //   match(Set dst (AddI dst src));
11480 //   effect(KILL cr);
11481 // %}
11482 //
11483 // // Change (inc mov) to lea
11484 // peephole %{
11485 //   // increment preceeded by register-register move
11486 //   peepmatch ( incI_rReg movI );
11487 //   // require that the destination register of the increment
11488 //   // match the destination register of the move
11489 //   peepconstraint ( 0.dst == 1.dst );
11490 //   // construct a replacement instruction that sets
11491 //   // the destination to ( move's source register + one )
11492 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11493 // %}
11494 //
11495 
11496 // Implementation no longer uses movX instructions since
11497 // machine-independent system no longer uses CopyX nodes.
11498 //
11499 // peephole
11500 // %{
11501 //   peepmatch (incI_rReg movI);
11502 //   peepconstraint (0.dst == 1.dst);
11503 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11504 // %}
11505 
11506 // peephole
11507 // %{
11508 //   peepmatch (decI_rReg movI);
11509 //   peepconstraint (0.dst == 1.dst);
11510 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11511 // %}
11512 
11513 // peephole
11514 // %{
11515 //   peepmatch (addI_rReg_imm movI);
11516 //   peepconstraint (0.dst == 1.dst);
11517 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11518 // %}
11519 
11520 // peephole
11521 // %{
11522 //   peepmatch (incL_rReg movL);
11523 //   peepconstraint (0.dst == 1.dst);
11524 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11525 // %}
11526 
11527 // peephole
11528 // %{
11529 //   peepmatch (decL_rReg movL);
11530 //   peepconstraint (0.dst == 1.dst);
11531 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11532 // %}
11533 
11534 // peephole
11535 // %{
11536 //   peepmatch (addL_rReg_imm movL);
11537 //   peepconstraint (0.dst == 1.dst);
11538 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11539 // %}
11540 
11541 // peephole
11542 // %{
11543 //   peepmatch (addP_rReg_imm movP);
11544 //   peepconstraint (0.dst == 1.dst);
11545 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11546 // %}
11547 
11548 // // Change load of spilled value to only a spill
11549 // instruct storeI(memory mem, rRegI src)
11550 // %{
11551 //   match(Set mem (StoreI mem src));
11552 // %}
11553 //
11554 // instruct loadI(rRegI dst, memory mem)
11555 // %{
11556 //   match(Set dst (LoadI mem));
11557 // %}
11558 //
11559 
11560 peephole
11561 %{
11562   peepmatch (loadI storeI);
11563   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11564   peepreplace (storeI(1.mem 1.mem 1.src));
11565 %}
11566 
11567 peephole
11568 %{
11569   peepmatch (loadL storeL);
11570   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11571   peepreplace (storeL(1.mem 1.mem 1.src));
11572 %}
11573 
11574 //----------SMARTSPILL RULES---------------------------------------------------
11575 // These must follow all instruction definitions as they use the names
11576 // defined in the instructions definitions.