1 //
   2 // Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Class for all pointer registers (including RSP)
 170 reg_class any_reg(RAX, RAX_H,
 171                   RDX, RDX_H,
 172                   RBP, RBP_H,
 173                   RDI, RDI_H,
 174                   RSI, RSI_H,
 175                   RCX, RCX_H,
 176                   RBX, RBX_H,
 177                   RSP, RSP_H,
 178                   R8,  R8_H,
 179                   R9,  R9_H,
 180                   R10, R10_H,
 181                   R11, R11_H,
 182                   R12, R12_H,
 183                   R13, R13_H,
 184                   R14, R14_H,
 185                   R15, R15_H);
 186 
 187 // Class for all pointer registers except RSP
 188 reg_class ptr_reg(RAX, RAX_H,
 189                   RDX, RDX_H,
 190                   RBP, RBP_H,
 191                   RDI, RDI_H,
 192                   RSI, RSI_H,
 193                   RCX, RCX_H,
 194                   RBX, RBX_H,
 195                   R8,  R8_H,
 196                   R9,  R9_H,
 197                   R10, R10_H,
 198                   R11, R11_H,
 199                   R13, R13_H,
 200                   R14, R14_H);
 201 
 202 // Class for all pointer registers except RAX and RSP
 203 reg_class ptr_no_rax_reg(RDX, RDX_H,
 204                          RBP, RBP_H,
 205                          RDI, RDI_H,
 206                          RSI, RSI_H,
 207                          RCX, RCX_H,
 208                          RBX, RBX_H,
 209                          R8,  R8_H,
 210                          R9,  R9_H,
 211                          R10, R10_H,
 212                          R11, R11_H,
 213                          R13, R13_H,
 214                          R14, R14_H);
 215 
 216 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 217                          RAX, RAX_H,
 218                          RDI, RDI_H,
 219                          RSI, RSI_H,
 220                          RCX, RCX_H,
 221                          RBX, RBX_H,
 222                          R8,  R8_H,
 223                          R9,  R9_H,
 224                          R10, R10_H,
 225                          R11, R11_H,
 226                          R13, R13_H,
 227                          R14, R14_H);
 228 
 229 // Class for all pointer registers except RAX, RBX and RSP
 230 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 231                              RBP, RBP_H,
 232                              RDI, RDI_H,
 233                              RSI, RSI_H,
 234                              RCX, RCX_H,
 235                              R8,  R8_H,
 236                              R9,  R9_H,
 237                              R10, R10_H,
 238                              R11, R11_H,
 239                              R13, R13_H,
 240                              R14, R14_H);
 241 
 242 // Singleton class for RAX pointer register
 243 reg_class ptr_rax_reg(RAX, RAX_H);
 244 
 245 // Singleton class for RBX pointer register
 246 reg_class ptr_rbx_reg(RBX, RBX_H);
 247 
 248 // Singleton class for RSI pointer register
 249 reg_class ptr_rsi_reg(RSI, RSI_H);
 250 
 251 // Singleton class for RDI pointer register
 252 reg_class ptr_rdi_reg(RDI, RDI_H);
 253 
 254 // Singleton class for RBP pointer register
 255 reg_class ptr_rbp_reg(RBP, RBP_H);
 256 
 257 // Singleton class for stack pointer
 258 reg_class ptr_rsp_reg(RSP, RSP_H);
 259 
 260 // Singleton class for TLS pointer
 261 reg_class ptr_r15_reg(R15, R15_H);
 262 
 263 // Class for all long registers (except RSP)
 264 reg_class long_reg(RAX, RAX_H,
 265                    RDX, RDX_H,
 266                    RBP, RBP_H,
 267                    RDI, RDI_H,
 268                    RSI, RSI_H,
 269                    RCX, RCX_H,
 270                    RBX, RBX_H,
 271                    R8,  R8_H,
 272                    R9,  R9_H,
 273                    R10, R10_H,
 274                    R11, R11_H,
 275                    R13, R13_H,
 276                    R14, R14_H);
 277 
 278 // Class for all long registers except RAX, RDX (and RSP)
 279 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 280                               RDI, RDI_H,
 281                               RSI, RSI_H,
 282                               RCX, RCX_H,
 283                               RBX, RBX_H,
 284                               R8,  R8_H,
 285                               R9,  R9_H,
 286                               R10, R10_H,
 287                               R11, R11_H,
 288                               R13, R13_H,
 289                               R14, R14_H);
 290 
 291 // Class for all long registers except RCX (and RSP)
 292 reg_class long_no_rcx_reg(RBP, RBP_H,
 293                           RDI, RDI_H,
 294                           RSI, RSI_H,
 295                           RAX, RAX_H,
 296                           RDX, RDX_H,
 297                           RBX, RBX_H,
 298                           R8,  R8_H,
 299                           R9,  R9_H,
 300                           R10, R10_H,
 301                           R11, R11_H,
 302                           R13, R13_H,
 303                           R14, R14_H);
 304 
 305 // Class for all long registers except RAX (and RSP)
 306 reg_class long_no_rax_reg(RBP, RBP_H,
 307                           RDX, RDX_H,
 308                           RDI, RDI_H,
 309                           RSI, RSI_H,
 310                           RCX, RCX_H,
 311                           RBX, RBX_H,
 312                           R8,  R8_H,
 313                           R9,  R9_H,
 314                           R10, R10_H,
 315                           R11, R11_H,
 316                           R13, R13_H,
 317                           R14, R14_H);
 318 
 319 // Singleton class for RAX long register
 320 reg_class long_rax_reg(RAX, RAX_H);
 321 
 322 // Singleton class for RCX long register
 323 reg_class long_rcx_reg(RCX, RCX_H);
 324 
 325 // Singleton class for RDX long register
 326 reg_class long_rdx_reg(RDX, RDX_H);
 327 
 328 // Class for all int registers (except RSP)
 329 reg_class int_reg(RAX,
 330                   RDX,
 331                   RBP,
 332                   RDI,
 333                   RSI,
 334                   RCX,
 335                   RBX,
 336                   R8,
 337                   R9,
 338                   R10,
 339                   R11,
 340                   R13,
 341                   R14);
 342 
 343 // Class for all int registers except RCX (and RSP)
 344 reg_class int_no_rcx_reg(RAX,
 345                          RDX,
 346                          RBP,
 347                          RDI,
 348                          RSI,
 349                          RBX,
 350                          R8,
 351                          R9,
 352                          R10,
 353                          R11,
 354                          R13,
 355                          R14);
 356 
 357 // Class for all int registers except RAX, RDX (and RSP)
 358 reg_class int_no_rax_rdx_reg(RBP,
 359                              RDI,
 360                              RSI,
 361                              RCX,
 362                              RBX,
 363                              R8,
 364                              R9,
 365                              R10,
 366                              R11,
 367                              R13,
 368                              R14);
 369 
 370 // Singleton class for RAX int register
 371 reg_class int_rax_reg(RAX);
 372 
 373 // Singleton class for RBX int register
 374 reg_class int_rbx_reg(RBX);
 375 
 376 // Singleton class for RCX int register
 377 reg_class int_rcx_reg(RCX);
 378 
 379 // Singleton class for RCX int register
 380 reg_class int_rdx_reg(RDX);
 381 
 382 // Singleton class for RCX int register
 383 reg_class int_rdi_reg(RDI);
 384 
 385 // Singleton class for instruction pointer
 386 // reg_class ip_reg(RIP);
 387 
 388 %}
 389 
 390 //----------SOURCE BLOCK-------------------------------------------------------
 391 // This is a block of C++ code which provides values, functions, and
 392 // definitions necessary in the rest of the architecture description
 393 source %{
 394 #define   RELOC_IMM64    Assembler::imm_operand
 395 #define   RELOC_DISP32   Assembler::disp32_operand
 396 
 397 #define __ _masm.
 398 
 399 static int preserve_SP_size() {
 400   return 3;  // rex.w, op, rm(reg/reg)
 401 }
 402 static int clear_avx_size() {
 403   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 404 }
 405 
 406 // !!!!! Special hack to get all types of calls to specify the byte offset
 407 //       from the start of the call to the point where the return address
 408 //       will point.
 409 int MachCallStaticJavaNode::ret_addr_offset()
 410 {
 411   int offset = 5; // 5 bytes from start of call to where return address points
 412   offset += clear_avx_size();
 413   if (_method_handle_invoke)
 414     offset += preserve_SP_size();
 415   return offset;
 416 }
 417 
 418 int MachCallDynamicJavaNode::ret_addr_offset()
 419 {
 420   int offset = 15; // 15 bytes from start of call to where return address points
 421   offset += clear_avx_size();
 422   return offset;
 423 }
 424 
 425 int MachCallRuntimeNode::ret_addr_offset() {
 426   int offset = 13; // movq r10,#addr; callq (r10)
 427   offset += clear_avx_size();
 428   return offset;
 429 }
 430 
 431 // Indicate if the safepoint node needs the polling page as an input,
 432 // it does if the polling page is more than disp32 away.
 433 bool SafePointNode::needs_polling_address_input()
 434 {
 435   return Assembler::is_polling_page_far();
 436 }
 437 
 438 //
 439 // Compute padding required for nodes which need alignment
 440 //
 441 
 442 // The address of the call instruction needs to be 4-byte aligned to
 443 // ensure that it does not span a cache line so that it can be patched.
 444 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 445 {
 446   current_offset += clear_avx_size(); // skip vzeroupper
 447   current_offset += 1; // skip call opcode byte
 448   return round_to(current_offset, alignment_required()) - current_offset;
 449 }
 450 
 451 // The address of the call instruction needs to be 4-byte aligned to
 452 // ensure that it does not span a cache line so that it can be patched.
 453 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 454 {
 455   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 456   current_offset += clear_avx_size(); // skip vzeroupper
 457   current_offset += 1; // skip call opcode byte
 458   return round_to(current_offset, alignment_required()) - current_offset;
 459 }
 460 
 461 // The address of the call instruction needs to be 4-byte aligned to
 462 // ensure that it does not span a cache line so that it can be patched.
 463 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 464 {
 465   current_offset += clear_avx_size(); // skip vzeroupper
 466   current_offset += 11; // skip movq instruction + call opcode byte
 467   return round_to(current_offset, alignment_required()) - current_offset;
 468 }
 469 
 470 // EMIT_RM()
 471 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 472   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 473   cbuf.insts()->emit_int8(c);
 474 }
 475 
 476 // EMIT_CC()
 477 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 478   unsigned char c = (unsigned char) (f1 | f2);
 479   cbuf.insts()->emit_int8(c);
 480 }
 481 
 482 // EMIT_OPCODE()
 483 void emit_opcode(CodeBuffer &cbuf, int code) {
 484   cbuf.insts()->emit_int8((unsigned char) code);
 485 }
 486 
 487 // EMIT_OPCODE() w/ relocation information
 488 void emit_opcode(CodeBuffer &cbuf,
 489                  int code, relocInfo::relocType reloc, int offset, int format)
 490 {
 491   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 492   emit_opcode(cbuf, code);
 493 }
 494 
 495 // EMIT_D8()
 496 void emit_d8(CodeBuffer &cbuf, int d8) {
 497   cbuf.insts()->emit_int8((unsigned char) d8);
 498 }
 499 
 500 // EMIT_D16()
 501 void emit_d16(CodeBuffer &cbuf, int d16) {
 502   cbuf.insts()->emit_int16(d16);
 503 }
 504 
 505 // EMIT_D32()
 506 void emit_d32(CodeBuffer &cbuf, int d32) {
 507   cbuf.insts()->emit_int32(d32);
 508 }
 509 
 510 // EMIT_D64()
 511 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 512   cbuf.insts()->emit_int64(d64);
 513 }
 514 
 515 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 516 void emit_d32_reloc(CodeBuffer& cbuf,
 517                     int d32,
 518                     relocInfo::relocType reloc,
 519                     int format)
 520 {
 521   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 522   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 523   cbuf.insts()->emit_int32(d32);
 524 }
 525 
 526 // emit 32 bit value and construct relocation entry from RelocationHolder
 527 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 528 #ifdef ASSERT
 529   if (rspec.reloc()->type() == relocInfo::oop_type &&
 530       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 531     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 532     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 533   }
 534 #endif
 535   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 536   cbuf.insts()->emit_int32(d32);
 537 }
 538 
 539 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 540   address next_ip = cbuf.insts_end() + 4;
 541   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 542                  external_word_Relocation::spec(addr),
 543                  RELOC_DISP32);
 544 }
 545 
 546 
 547 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 548 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 549   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 550   cbuf.insts()->emit_int64(d64);
 551 }
 552 
 553 // emit 64 bit value and construct relocation entry from RelocationHolder
 554 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 555 #ifdef ASSERT
 556   if (rspec.reloc()->type() == relocInfo::oop_type &&
 557       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 558     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 559     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 560            "cannot embed scavengable oops in code");
 561   }
 562 #endif
 563   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 564   cbuf.insts()->emit_int64(d64);
 565 }
 566 
 567 // Access stack slot for load or store
 568 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 569 {
 570   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 571   if (-0x80 <= disp && disp < 0x80) {
 572     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 573     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 574     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 575   } else {
 576     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 577     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 578     emit_d32(cbuf, disp);     // Displacement // R/M byte
 579   }
 580 }
 581 
 582    // rRegI ereg, memory mem) %{    // emit_reg_mem
 583 void encode_RegMem(CodeBuffer &cbuf,
 584                    int reg,
 585                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 586 {
 587   assert(disp_reloc == relocInfo::none, "cannot have disp");
 588   int regenc = reg & 7;
 589   int baseenc = base & 7;
 590   int indexenc = index & 7;
 591 
 592   // There is no index & no scale, use form without SIB byte
 593   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 594     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 595     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 596       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 597     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 598       // If 8-bit displacement, mode 0x1
 599       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 600       emit_d8(cbuf, disp);
 601     } else {
 602       // If 32-bit displacement
 603       if (base == -1) { // Special flag for absolute address
 604         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 605         if (disp_reloc != relocInfo::none) {
 606           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 607         } else {
 608           emit_d32(cbuf, disp);
 609         }
 610       } else {
 611         // Normal base + offset
 612         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 613         if (disp_reloc != relocInfo::none) {
 614           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 615         } else {
 616           emit_d32(cbuf, disp);
 617         }
 618       }
 619     }
 620   } else {
 621     // Else, encode with the SIB byte
 622     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 623     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 624       // If no displacement
 625       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 626       emit_rm(cbuf, scale, indexenc, baseenc);
 627     } else {
 628       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 629         // If 8-bit displacement, mode 0x1
 630         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 631         emit_rm(cbuf, scale, indexenc, baseenc);
 632         emit_d8(cbuf, disp);
 633       } else {
 634         // If 32-bit displacement
 635         if (base == 0x04 ) {
 636           emit_rm(cbuf, 0x2, regenc, 0x4);
 637           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 638         } else {
 639           emit_rm(cbuf, 0x2, regenc, 0x4);
 640           emit_rm(cbuf, scale, indexenc, baseenc); // *
 641         }
 642         if (disp_reloc != relocInfo::none) {
 643           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 644         } else {
 645           emit_d32(cbuf, disp);
 646         }
 647       }
 648     }
 649   }
 650 }
 651 
 652 // This could be in MacroAssembler but it's fairly C2 specific
 653 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 654   Label exit;
 655   __ jccb(Assembler::noParity, exit);
 656   __ pushf();
 657   //
 658   // comiss/ucomiss instructions set ZF,PF,CF flags and
 659   // zero OF,AF,SF for NaN values.
 660   // Fixup flags by zeroing ZF,PF so that compare of NaN
 661   // values returns 'less than' result (CF is set).
 662   // Leave the rest of flags unchanged.
 663   //
 664   //    7 6 5 4 3 2 1 0
 665   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 666   //    0 0 1 0 1 0 1 1   (0x2B)
 667   //
 668   __ andq(Address(rsp, 0), 0xffffff2b);
 669   __ popf();
 670   __ bind(exit);
 671 }
 672 
 673 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 674   Label done;
 675   __ movl(dst, -1);
 676   __ jcc(Assembler::parity, done);
 677   __ jcc(Assembler::below, done);
 678   __ setb(Assembler::notEqual, dst);
 679   __ movzbl(dst, dst);
 680   __ bind(done);
 681 }
 682 
 683 
 684 //=============================================================================
 685 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 686 
 687 int Compile::ConstantTable::calculate_table_base_offset() const {
 688   return 0;  // absolute addressing, no offset
 689 }
 690 
 691 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 692   // Empty encoding
 693 }
 694 
 695 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 696   return 0;
 697 }
 698 
 699 #ifndef PRODUCT
 700 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 701   st->print("# MachConstantBaseNode (empty encoding)");
 702 }
 703 #endif
 704 
 705 
 706 //=============================================================================
 707 #ifndef PRODUCT
 708 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 709   Compile* C = ra_->C;
 710 
 711   int framesize = C->frame_slots() << LogBytesPerInt;
 712   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 713   // Remove wordSize for return addr which is already pushed.
 714   framesize -= wordSize;
 715 
 716   if (C->need_stack_bang(framesize)) {
 717     framesize -= wordSize;
 718     st->print("# stack bang");
 719     st->print("\n\t");
 720     st->print("pushq   rbp\t# Save rbp");
 721     if (framesize) {
 722       st->print("\n\t");
 723       st->print("subq    rsp, #%d\t# Create frame",framesize);
 724     }
 725   } else {
 726     st->print("subq    rsp, #%d\t# Create frame",framesize);
 727     st->print("\n\t");
 728     framesize -= wordSize;
 729     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 730   }
 731 
 732   if (VerifyStackAtCalls) {
 733     st->print("\n\t");
 734     framesize -= wordSize;
 735     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 736 #ifdef ASSERT
 737     st->print("\n\t");
 738     st->print("# stack alignment check");
 739 #endif
 740   }
 741   st->cr();
 742 }
 743 #endif
 744 
 745 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 746   Compile* C = ra_->C;
 747   MacroAssembler _masm(&cbuf);
 748 
 749   int framesize = C->frame_slots() << LogBytesPerInt;
 750 
 751   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 752 
 753   C->set_frame_complete(cbuf.insts_size());
 754 
 755   if (C->has_mach_constant_base_node()) {
 756     // NOTE: We set the table base offset here because users might be
 757     // emitted before MachConstantBaseNode.
 758     Compile::ConstantTable& constant_table = C->constant_table();
 759     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 760   }
 761 }
 762 
 763 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 764 {
 765   return MachNode::size(ra_); // too many variables; just compute it
 766                               // the hard way
 767 }
 768 
 769 int MachPrologNode::reloc() const
 770 {
 771   return 0; // a large enough number
 772 }
 773 
 774 //=============================================================================
 775 #ifndef PRODUCT
 776 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 777 {
 778   Compile* C = ra_->C;
 779   if (C->max_vector_size() > 16) {
 780     st->print("vzeroupper");
 781     st->cr(); st->print("\t");
 782   }
 783 
 784   int framesize = C->frame_slots() << LogBytesPerInt;
 785   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 786   // Remove word for return adr already pushed
 787   // and RBP
 788   framesize -= 2*wordSize;
 789 
 790   if (framesize) {
 791     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 792     st->print("\t");
 793   }
 794 
 795   st->print_cr("popq   rbp");
 796   if (do_polling() && C->is_method_compilation()) {
 797     st->print("\t");
 798     if (Assembler::is_polling_page_far()) {
 799       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 800                    "testl  rax, [rscratch1]\t"
 801                    "# Safepoint: poll for GC");
 802     } else {
 803       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 804                    "# Safepoint: poll for GC");
 805     }
 806   }
 807 }
 808 #endif
 809 
 810 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 811 {
 812   Compile* C = ra_->C;
 813   if (C->max_vector_size() > 16) {
 814     // Clear upper bits of YMM registers when current compiled code uses
 815     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 816     MacroAssembler _masm(&cbuf);
 817     __ vzeroupper();
 818   }
 819 
 820   int framesize = C->frame_slots() << LogBytesPerInt;
 821   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 822   // Remove word for return adr already pushed
 823   // and RBP
 824   framesize -= 2*wordSize;
 825 
 826   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 827 
 828   if (framesize) {
 829     emit_opcode(cbuf, Assembler::REX_W);
 830     if (framesize < 0x80) {
 831       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 832       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 833       emit_d8(cbuf, framesize);
 834     } else {
 835       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 836       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 837       emit_d32(cbuf, framesize);
 838     }
 839   }
 840 
 841   // popq rbp
 842   emit_opcode(cbuf, 0x58 | RBP_enc);
 843 
 844   if (do_polling() && C->is_method_compilation()) {
 845     MacroAssembler _masm(&cbuf);
 846     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 847     if (Assembler::is_polling_page_far()) {
 848       __ lea(rscratch1, polling_page);
 849       __ relocate(relocInfo::poll_return_type);
 850       __ testl(rax, Address(rscratch1, 0));
 851     } else {
 852       __ testl(rax, polling_page);
 853     }
 854   }
 855 }
 856 
 857 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 858 {
 859   return MachNode::size(ra_); // too many variables; just compute it
 860                               // the hard way
 861 }
 862 
 863 int MachEpilogNode::reloc() const
 864 {
 865   return 2; // a large enough number
 866 }
 867 
 868 const Pipeline* MachEpilogNode::pipeline() const
 869 {
 870   return MachNode::pipeline_class();
 871 }
 872 
 873 int MachEpilogNode::safepoint_offset() const
 874 {
 875   return 0;
 876 }
 877 
 878 //=============================================================================
 879 
 880 enum RC {
 881   rc_bad,
 882   rc_int,
 883   rc_float,
 884   rc_stack
 885 };
 886 
 887 static enum RC rc_class(OptoReg::Name reg)
 888 {
 889   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 890 
 891   if (OptoReg::is_stack(reg)) return rc_stack;
 892 
 893   VMReg r = OptoReg::as_VMReg(reg);
 894 
 895   if (r->is_Register()) return rc_int;
 896 
 897   assert(r->is_XMMRegister(), "must be");
 898   return rc_float;
 899 }
 900 
 901 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 902 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 903                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 904 
 905 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 906                             int stack_offset, int reg, uint ireg, outputStream* st);
 907 
 908 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 909                                       int dst_offset, uint ireg, outputStream* st) {
 910   if (cbuf) {
 911     MacroAssembler _masm(cbuf);
 912     switch (ireg) {
 913     case Op_VecS:
 914       __ movq(Address(rsp, -8), rax);
 915       __ movl(rax, Address(rsp, src_offset));
 916       __ movl(Address(rsp, dst_offset), rax);
 917       __ movq(rax, Address(rsp, -8));
 918       break;
 919     case Op_VecD:
 920       __ pushq(Address(rsp, src_offset));
 921       __ popq (Address(rsp, dst_offset));
 922       break;
 923     case Op_VecX:
 924       __ pushq(Address(rsp, src_offset));
 925       __ popq (Address(rsp, dst_offset));
 926       __ pushq(Address(rsp, src_offset+8));
 927       __ popq (Address(rsp, dst_offset+8));
 928       break;
 929     case Op_VecY:
 930       __ vmovdqu(Address(rsp, -32), xmm0);
 931       __ vmovdqu(xmm0, Address(rsp, src_offset));
 932       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 933       __ vmovdqu(xmm0, Address(rsp, -32));
 934       break;
 935     default:
 936       ShouldNotReachHere();
 937     }
 938 #ifndef PRODUCT
 939   } else {
 940     switch (ireg) {
 941     case Op_VecS:
 942       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 943                 "movl    rax, [rsp + #%d]\n\t"
 944                 "movl    [rsp + #%d], rax\n\t"
 945                 "movq    rax, [rsp - #8]",
 946                 src_offset, dst_offset);
 947       break;
 948     case Op_VecD:
 949       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 950                 "popq    [rsp + #%d]",
 951                 src_offset, dst_offset);
 952       break;
 953      case Op_VecX:
 954       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 955                 "popq    [rsp + #%d]\n\t"
 956                 "pushq   [rsp + #%d]\n\t"
 957                 "popq    [rsp + #%d]",
 958                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 959       break;
 960     case Op_VecY:
 961       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 962                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 963                 "vmovdqu [rsp + #%d], xmm0\n\t"
 964                 "vmovdqu xmm0, [rsp - #32]",
 965                 src_offset, dst_offset);
 966       break;
 967     default:
 968       ShouldNotReachHere();
 969     }
 970 #endif
 971   }
 972 }
 973 
 974 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 975                                        PhaseRegAlloc* ra_,
 976                                        bool do_size,
 977                                        outputStream* st) const {
 978   assert(cbuf != NULL || st  != NULL, "sanity");
 979   // Get registers to move
 980   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 981   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 982   OptoReg::Name dst_second = ra_->get_reg_second(this);
 983   OptoReg::Name dst_first = ra_->get_reg_first(this);
 984 
 985   enum RC src_second_rc = rc_class(src_second);
 986   enum RC src_first_rc = rc_class(src_first);
 987   enum RC dst_second_rc = rc_class(dst_second);
 988   enum RC dst_first_rc = rc_class(dst_first);
 989 
 990   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 991          "must move at least 1 register" );
 992 
 993   if (src_first == dst_first && src_second == dst_second) {
 994     // Self copy, no move
 995     return 0;
 996   }
 997   if (bottom_type()->isa_vect() != NULL) {
 998     uint ireg = ideal_reg();
 999     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1000     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1001     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1002       // mem -> mem
1003       int src_offset = ra_->reg2offset(src_first);
1004       int dst_offset = ra_->reg2offset(dst_first);
1005       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1006     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1007       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1008     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1009       int stack_offset = ra_->reg2offset(dst_first);
1010       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1011     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1012       int stack_offset = ra_->reg2offset(src_first);
1013       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1014     } else {
1015       ShouldNotReachHere();
1016     }
1017     return 0;
1018   }
1019   if (src_first_rc == rc_stack) {
1020     // mem ->
1021     if (dst_first_rc == rc_stack) {
1022       // mem -> mem
1023       assert(src_second != dst_first, "overlap");
1024       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1025           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1026         // 64-bit
1027         int src_offset = ra_->reg2offset(src_first);
1028         int dst_offset = ra_->reg2offset(dst_first);
1029         if (cbuf) {
1030           MacroAssembler _masm(cbuf);
1031           __ pushq(Address(rsp, src_offset));
1032           __ popq (Address(rsp, dst_offset));
1033 #ifndef PRODUCT
1034         } else {
1035           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                     "popq    [rsp + #%d]",
1037                      src_offset, dst_offset);
1038 #endif
1039         }
1040       } else {
1041         // 32-bit
1042         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1043         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1044         // No pushl/popl, so:
1045         int src_offset = ra_->reg2offset(src_first);
1046         int dst_offset = ra_->reg2offset(dst_first);
1047         if (cbuf) {
1048           MacroAssembler _masm(cbuf);
1049           __ movq(Address(rsp, -8), rax);
1050           __ movl(rax, Address(rsp, src_offset));
1051           __ movl(Address(rsp, dst_offset), rax);
1052           __ movq(rax, Address(rsp, -8));
1053 #ifndef PRODUCT
1054         } else {
1055           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1056                     "movl    rax, [rsp + #%d]\n\t"
1057                     "movl    [rsp + #%d], rax\n\t"
1058                     "movq    rax, [rsp - #8]",
1059                      src_offset, dst_offset);
1060 #endif
1061         }
1062       }
1063       return 0;
1064     } else if (dst_first_rc == rc_int) {
1065       // mem -> gpr
1066       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1067           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1068         // 64-bit
1069         int offset = ra_->reg2offset(src_first);
1070         if (cbuf) {
1071           MacroAssembler _masm(cbuf);
1072           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1073 #ifndef PRODUCT
1074         } else {
1075           st->print("movq    %s, [rsp + #%d]\t# spill",
1076                      Matcher::regName[dst_first],
1077                      offset);
1078 #endif
1079         }
1080       } else {
1081         // 32-bit
1082         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1083         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1084         int offset = ra_->reg2offset(src_first);
1085         if (cbuf) {
1086           MacroAssembler _masm(cbuf);
1087           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1088 #ifndef PRODUCT
1089         } else {
1090           st->print("movl    %s, [rsp + #%d]\t# spill",
1091                      Matcher::regName[dst_first],
1092                      offset);
1093 #endif
1094         }
1095       }
1096       return 0;
1097     } else if (dst_first_rc == rc_float) {
1098       // mem-> xmm
1099       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1100           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1101         // 64-bit
1102         int offset = ra_->reg2offset(src_first);
1103         if (cbuf) {
1104           MacroAssembler _masm(cbuf);
1105           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1106 #ifndef PRODUCT
1107         } else {
1108           st->print("%s  %s, [rsp + #%d]\t# spill",
1109                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1110                      Matcher::regName[dst_first],
1111                      offset);
1112 #endif
1113         }
1114       } else {
1115         // 32-bit
1116         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1117         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1118         int offset = ra_->reg2offset(src_first);
1119         if (cbuf) {
1120           MacroAssembler _masm(cbuf);
1121           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1122 #ifndef PRODUCT
1123         } else {
1124           st->print("movss   %s, [rsp + #%d]\t# spill",
1125                      Matcher::regName[dst_first],
1126                      offset);
1127 #endif
1128         }
1129       }
1130       return 0;
1131     }
1132   } else if (src_first_rc == rc_int) {
1133     // gpr ->
1134     if (dst_first_rc == rc_stack) {
1135       // gpr -> mem
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int offset = ra_->reg2offset(dst_first);
1140         if (cbuf) {
1141           MacroAssembler _masm(cbuf);
1142           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1143 #ifndef PRODUCT
1144         } else {
1145           st->print("movq    [rsp + #%d], %s\t# spill",
1146                      offset,
1147                      Matcher::regName[src_first]);
1148 #endif
1149         }
1150       } else {
1151         // 32-bit
1152         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1153         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1154         int offset = ra_->reg2offset(dst_first);
1155         if (cbuf) {
1156           MacroAssembler _masm(cbuf);
1157           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1158 #ifndef PRODUCT
1159         } else {
1160           st->print("movl    [rsp + #%d], %s\t# spill",
1161                      offset,
1162                      Matcher::regName[src_first]);
1163 #endif
1164         }
1165       }
1166       return 0;
1167     } else if (dst_first_rc == rc_int) {
1168       // gpr -> gpr
1169       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1170           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1171         // 64-bit
1172         if (cbuf) {
1173           MacroAssembler _masm(cbuf);
1174           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1175                   as_Register(Matcher::_regEncode[src_first]));
1176 #ifndef PRODUCT
1177         } else {
1178           st->print("movq    %s, %s\t# spill",
1179                      Matcher::regName[dst_first],
1180                      Matcher::regName[src_first]);
1181 #endif
1182         }
1183         return 0;
1184       } else {
1185         // 32-bit
1186         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1187         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1188         if (cbuf) {
1189           MacroAssembler _masm(cbuf);
1190           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1191                   as_Register(Matcher::_regEncode[src_first]));
1192 #ifndef PRODUCT
1193         } else {
1194           st->print("movl    %s, %s\t# spill",
1195                      Matcher::regName[dst_first],
1196                      Matcher::regName[src_first]);
1197 #endif
1198         }
1199         return 0;
1200       }
1201     } else if (dst_first_rc == rc_float) {
1202       // gpr -> xmm
1203       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1204           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1205         // 64-bit
1206         if (cbuf) {
1207           MacroAssembler _masm(cbuf);
1208           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1209 #ifndef PRODUCT
1210         } else {
1211           st->print("movdq   %s, %s\t# spill",
1212                      Matcher::regName[dst_first],
1213                      Matcher::regName[src_first]);
1214 #endif
1215         }
1216       } else {
1217         // 32-bit
1218         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1219         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1220         if (cbuf) {
1221           MacroAssembler _masm(cbuf);
1222           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1223 #ifndef PRODUCT
1224         } else {
1225           st->print("movdl   %s, %s\t# spill",
1226                      Matcher::regName[dst_first],
1227                      Matcher::regName[src_first]);
1228 #endif
1229         }
1230       }
1231       return 0;
1232     }
1233   } else if (src_first_rc == rc_float) {
1234     // xmm ->
1235     if (dst_first_rc == rc_stack) {
1236       // xmm -> mem
1237       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1238           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1239         // 64-bit
1240         int offset = ra_->reg2offset(dst_first);
1241         if (cbuf) {
1242           MacroAssembler _masm(cbuf);
1243           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1244 #ifndef PRODUCT
1245         } else {
1246           st->print("movsd   [rsp + #%d], %s\t# spill",
1247                      offset,
1248                      Matcher::regName[src_first]);
1249 #endif
1250         }
1251       } else {
1252         // 32-bit
1253         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1254         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1255         int offset = ra_->reg2offset(dst_first);
1256         if (cbuf) {
1257           MacroAssembler _masm(cbuf);
1258           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1259 #ifndef PRODUCT
1260         } else {
1261           st->print("movss   [rsp + #%d], %s\t# spill",
1262                      offset,
1263                      Matcher::regName[src_first]);
1264 #endif
1265         }
1266       }
1267       return 0;
1268     } else if (dst_first_rc == rc_int) {
1269       // xmm -> gpr
1270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1272         // 64-bit
1273         if (cbuf) {
1274           MacroAssembler _masm(cbuf);
1275           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1276 #ifndef PRODUCT
1277         } else {
1278           st->print("movdq   %s, %s\t# spill",
1279                      Matcher::regName[dst_first],
1280                      Matcher::regName[src_first]);
1281 #endif
1282         }
1283       } else {
1284         // 32-bit
1285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1287         if (cbuf) {
1288           MacroAssembler _masm(cbuf);
1289           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1290 #ifndef PRODUCT
1291         } else {
1292           st->print("movdl   %s, %s\t# spill",
1293                      Matcher::regName[dst_first],
1294                      Matcher::regName[src_first]);
1295 #endif
1296         }
1297       }
1298       return 0;
1299     } else if (dst_first_rc == rc_float) {
1300       // xmm -> xmm
1301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1303         // 64-bit
1304         if (cbuf) {
1305           MacroAssembler _masm(cbuf);
1306           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1307 #ifndef PRODUCT
1308         } else {
1309           st->print("%s  %s, %s\t# spill",
1310                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1311                      Matcher::regName[dst_first],
1312                      Matcher::regName[src_first]);
1313 #endif
1314         }
1315       } else {
1316         // 32-bit
1317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1319         if (cbuf) {
1320           MacroAssembler _masm(cbuf);
1321           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1322 #ifndef PRODUCT
1323         } else {
1324           st->print("%s  %s, %s\t# spill",
1325                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1326                      Matcher::regName[dst_first],
1327                      Matcher::regName[src_first]);
1328 #endif
1329         }
1330       }
1331       return 0;
1332     }
1333   }
1334 
1335   assert(0," foo ");
1336   Unimplemented();
1337   return 0;
1338 }
1339 
1340 #ifndef PRODUCT
1341 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1342   implementation(NULL, ra_, false, st);
1343 }
1344 #endif
1345 
1346 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1347   implementation(&cbuf, ra_, false, NULL);
1348 }
1349 
1350 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1351   return MachNode::size(ra_);
1352 }
1353 
1354 //=============================================================================
1355 #ifndef PRODUCT
1356 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1357 {
1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359   int reg = ra_->get_reg_first(this);
1360   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1361             Matcher::regName[reg], offset);
1362 }
1363 #endif
1364 
1365 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1366 {
1367   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1368   int reg = ra_->get_encode(this);
1369   if (offset >= 0x80) {
1370     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1371     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1372     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1373     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1374     emit_d32(cbuf, offset);
1375   } else {
1376     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1377     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1378     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1379     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1380     emit_d8(cbuf, offset);
1381   }
1382 }
1383 
1384 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1385 {
1386   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1387   return (offset < 0x80) ? 5 : 8; // REX
1388 }
1389 
1390 //=============================================================================
1391 #ifndef PRODUCT
1392 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1393 {
1394   if (UseCompressedKlassPointers) {
1395     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1396     if (Universe::narrow_klass_shift() != 0) {
1397       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1398     }
1399     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1400   } else {
1401     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1402                  "# Inline cache check");
1403   }
1404   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1405   st->print_cr("\tnop\t# nops to align entry point");
1406 }
1407 #endif
1408 
1409 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1410 {
1411   MacroAssembler masm(&cbuf);
1412   uint insts_size = cbuf.insts_size();
1413   if (UseCompressedKlassPointers) {
1414     masm.load_klass(rscratch1, j_rarg0);
1415     masm.cmpptr(rax, rscratch1);
1416   } else {
1417     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1418   }
1419 
1420   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1421 
1422   /* WARNING these NOPs are critical so that verified entry point is properly
1423      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1424   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1425   if (OptoBreakpoint) {
1426     // Leave space for int3
1427     nops_cnt -= 1;
1428   }
1429   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1430   if (nops_cnt > 0)
1431     masm.nop(nops_cnt);
1432 }
1433 
1434 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1435 {
1436   return MachNode::size(ra_); // too many variables; just compute it
1437                               // the hard way
1438 }
1439 
1440 
1441 //=============================================================================
1442 uint size_exception_handler()
1443 {
1444   // NativeCall instruction size is the same as NativeJump.
1445   // Note that this value is also credited (in output.cpp) to
1446   // the size of the code section.
1447   return NativeJump::instruction_size;
1448 }
1449 
1450 // Emit exception handler code.
1451 int emit_exception_handler(CodeBuffer& cbuf)
1452 {
1453 
1454   // Note that the code buffer's insts_mark is always relative to insts.
1455   // That's why we must use the macroassembler to generate a handler.
1456   MacroAssembler _masm(&cbuf);
1457   address base =
1458   __ start_a_stub(size_exception_handler());
1459   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1460   int offset = __ offset();
1461   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1462   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1463   __ end_a_stub();
1464   return offset;
1465 }
1466 
1467 uint size_deopt_handler()
1468 {
1469   // three 5 byte instructions
1470   return 15;
1471 }
1472 
1473 // Emit deopt handler code.
1474 int emit_deopt_handler(CodeBuffer& cbuf)
1475 {
1476 
1477   // Note that the code buffer's insts_mark is always relative to insts.
1478   // That's why we must use the macroassembler to generate a handler.
1479   MacroAssembler _masm(&cbuf);
1480   address base =
1481   __ start_a_stub(size_deopt_handler());
1482   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1483   int offset = __ offset();
1484   address the_pc = (address) __ pc();
1485   Label next;
1486   // push a "the_pc" on the stack without destroying any registers
1487   // as they all may be live.
1488 
1489   // push address of "next"
1490   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1491   __ bind(next);
1492   // adjust it so it matches "the_pc"
1493   __ subptr(Address(rsp, 0), __ offset() - offset);
1494   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1495   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1496   __ end_a_stub();
1497   return offset;
1498 }
1499 
1500 int Matcher::regnum_to_fpu_offset(int regnum)
1501 {
1502   return regnum - 32; // The FP registers are in the second chunk
1503 }
1504 
1505 // This is UltraSparc specific, true just means we have fast l2f conversion
1506 const bool Matcher::convL2FSupported(void) {
1507   return true;
1508 }
1509 
1510 // Is this branch offset short enough that a short branch can be used?
1511 //
1512 // NOTE: If the platform does not provide any short branch variants, then
1513 //       this method should return false for offset 0.
1514 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1515   // The passed offset is relative to address of the branch.
1516   // On 86 a branch displacement is calculated relative to address
1517   // of a next instruction.
1518   offset -= br_size;
1519 
1520   // the short version of jmpConUCF2 contains multiple branches,
1521   // making the reach slightly less
1522   if (rule == jmpConUCF2_rule)
1523     return (-126 <= offset && offset <= 125);
1524   return (-128 <= offset && offset <= 127);
1525 }
1526 
1527 const bool Matcher::isSimpleConstant64(jlong value) {
1528   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1529   //return value == (int) value;  // Cf. storeImmL and immL32.
1530 
1531   // Probably always true, even if a temp register is required.
1532   return true;
1533 }
1534 
1535 // The ecx parameter to rep stosq for the ClearArray node is in words.
1536 const bool Matcher::init_array_count_is_in_bytes = false;
1537 
1538 // Threshold size for cleararray.
1539 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1540 
1541 // No additional cost for CMOVL.
1542 const int Matcher::long_cmove_cost() { return 0; }
1543 
1544 // No CMOVF/CMOVD with SSE2
1545 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1546 
1547 // Should the Matcher clone shifts on addressing modes, expecting them
1548 // to be subsumed into complex addressing expressions or compute them
1549 // into registers?  True for Intel but false for most RISCs
1550 const bool Matcher::clone_shift_expressions = true;
1551 
1552 // Do we need to mask the count passed to shift instructions or does
1553 // the cpu only look at the lower 5/6 bits anyway?
1554 const bool Matcher::need_masked_shift_count = false;
1555 
1556 bool Matcher::narrow_oop_use_complex_address() {
1557   assert(UseCompressedOops, "only for compressed oops code");
1558   return (LogMinObjAlignmentInBytes <= 3);
1559 }
1560 
1561 bool Matcher::narrow_klass_use_complex_address() {
1562   assert(UseCompressedKlassPointers, "only for compressed klass code");
1563   return (LogKlassAlignmentInBytes <= 3);
1564 }
1565 
1566 // Is it better to copy float constants, or load them directly from
1567 // memory?  Intel can load a float constant from a direct address,
1568 // requiring no extra registers.  Most RISCs will have to materialize
1569 // an address into a register first, so they would do better to copy
1570 // the constant from stack.
1571 const bool Matcher::rematerialize_float_constants = true; // XXX
1572 
1573 // If CPU can load and store mis-aligned doubles directly then no
1574 // fixup is needed.  Else we split the double into 2 integer pieces
1575 // and move it piece-by-piece.  Only happens when passing doubles into
1576 // C code as the Java calling convention forces doubles to be aligned.
1577 const bool Matcher::misaligned_doubles_ok = true;
1578 
1579 // No-op on amd64
1580 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1581 
1582 // Advertise here if the CPU requires explicit rounding operations to
1583 // implement the UseStrictFP mode.
1584 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1585 
1586 // Are floats conerted to double when stored to stack during deoptimization?
1587 // On x64 it is stored without convertion so we can use normal access.
1588 bool Matcher::float_in_double() { return false; }
1589 
1590 // Do ints take an entire long register or just half?
1591 const bool Matcher::int_in_long = true;
1592 
1593 // Return whether or not this register is ever used as an argument.
1594 // This function is used on startup to build the trampoline stubs in
1595 // generateOptoStub.  Registers not mentioned will be killed by the VM
1596 // call in the trampoline, and arguments in those registers not be
1597 // available to the callee.
1598 bool Matcher::can_be_java_arg(int reg)
1599 {
1600   return
1601     reg ==  RDI_num || reg == RDI_H_num ||
1602     reg ==  RSI_num || reg == RSI_H_num ||
1603     reg ==  RDX_num || reg == RDX_H_num ||
1604     reg ==  RCX_num || reg == RCX_H_num ||
1605     reg ==   R8_num || reg ==  R8_H_num ||
1606     reg ==   R9_num || reg ==  R9_H_num ||
1607     reg ==  R12_num || reg == R12_H_num ||
1608     reg == XMM0_num || reg == XMM0b_num ||
1609     reg == XMM1_num || reg == XMM1b_num ||
1610     reg == XMM2_num || reg == XMM2b_num ||
1611     reg == XMM3_num || reg == XMM3b_num ||
1612     reg == XMM4_num || reg == XMM4b_num ||
1613     reg == XMM5_num || reg == XMM5b_num ||
1614     reg == XMM6_num || reg == XMM6b_num ||
1615     reg == XMM7_num || reg == XMM7b_num;
1616 }
1617 
1618 bool Matcher::is_spillable_arg(int reg)
1619 {
1620   return can_be_java_arg(reg);
1621 }
1622 
1623 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1624   // In 64 bit mode a code which use multiply when
1625   // devisor is constant is faster than hardware
1626   // DIV instruction (it uses MulHiL).
1627   return false;
1628 }
1629 
1630 // Register for DIVI projection of divmodI
1631 RegMask Matcher::divI_proj_mask() {
1632   return INT_RAX_REG_mask();
1633 }
1634 
1635 // Register for MODI projection of divmodI
1636 RegMask Matcher::modI_proj_mask() {
1637   return INT_RDX_REG_mask();
1638 }
1639 
1640 // Register for DIVL projection of divmodL
1641 RegMask Matcher::divL_proj_mask() {
1642   return LONG_RAX_REG_mask();
1643 }
1644 
1645 // Register for MODL projection of divmodL
1646 RegMask Matcher::modL_proj_mask() {
1647   return LONG_RDX_REG_mask();
1648 }
1649 
1650 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1651   return PTR_RBP_REG_mask();
1652 }
1653 
1654 static Address build_address(int b, int i, int s, int d) {
1655   Register index = as_Register(i);
1656   Address::ScaleFactor scale = (Address::ScaleFactor)s;
1657   if (index == rsp) {
1658     index = noreg;
1659     scale = Address::no_scale;
1660   }
1661   Address addr(as_Register(b), index, scale, d);
1662   return addr;
1663 }
1664 
1665 %}
1666 
1667 //----------ENCODING BLOCK-----------------------------------------------------
1668 // This block specifies the encoding classes used by the compiler to
1669 // output byte streams.  Encoding classes are parameterized macros
1670 // used by Machine Instruction Nodes in order to generate the bit
1671 // encoding of the instruction.  Operands specify their base encoding
1672 // interface with the interface keyword.  There are currently
1673 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1674 // COND_INTER.  REG_INTER causes an operand to generate a function
1675 // which returns its register number when queried.  CONST_INTER causes
1676 // an operand to generate a function which returns the value of the
1677 // constant when queried.  MEMORY_INTER causes an operand to generate
1678 // four functions which return the Base Register, the Index Register,
1679 // the Scale Value, and the Offset Value of the operand when queried.
1680 // COND_INTER causes an operand to generate six functions which return
1681 // the encoding code (ie - encoding bits for the instruction)
1682 // associated with each basic boolean condition for a conditional
1683 // instruction.
1684 //
1685 // Instructions specify two basic values for encoding.  Again, a
1686 // function is available to check if the constant displacement is an
1687 // oop. They use the ins_encode keyword to specify their encoding
1688 // classes (which must be a sequence of enc_class names, and their
1689 // parameters, specified in the encoding block), and they use the
1690 // opcode keyword to specify, in order, their primary, secondary, and
1691 // tertiary opcode.  Only the opcode sections which a particular
1692 // instruction needs for encoding need to be specified.
1693 encode %{
1694   // Build emit functions for each basic byte or larger field in the
1695   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1696   // from C++ code in the enc_class source block.  Emit functions will
1697   // live in the main source block for now.  In future, we can
1698   // generalize this by adding a syntax that specifies the sizes of
1699   // fields in an order, so that the adlc can build the emit functions
1700   // automagically
1701 
1702   // Emit primary opcode
1703   enc_class OpcP
1704   %{
1705     emit_opcode(cbuf, $primary);
1706   %}
1707 
1708   // Emit secondary opcode
1709   enc_class OpcS
1710   %{
1711     emit_opcode(cbuf, $secondary);
1712   %}
1713 
1714   // Emit tertiary opcode
1715   enc_class OpcT
1716   %{
1717     emit_opcode(cbuf, $tertiary);
1718   %}
1719 
1720   // Emit opcode directly
1721   enc_class Opcode(immI d8)
1722   %{
1723     emit_opcode(cbuf, $d8$$constant);
1724   %}
1725 
1726   // Emit size prefix
1727   enc_class SizePrefix
1728   %{
1729     emit_opcode(cbuf, 0x66);
1730   %}
1731 
1732   enc_class reg(rRegI reg)
1733   %{
1734     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1735   %}
1736 
1737   enc_class reg_reg(rRegI dst, rRegI src)
1738   %{
1739     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1740   %}
1741 
1742   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1743   %{
1744     emit_opcode(cbuf, $opcode$$constant);
1745     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1746   %}
1747 
1748   enc_class cdql_enc(no_rax_rdx_RegI div)
1749   %{
1750     // Full implementation of Java idiv and irem; checks for
1751     // special case as described in JVM spec., p.243 & p.271.
1752     //
1753     //         normal case                           special case
1754     //
1755     // input : rax: dividend                         min_int
1756     //         reg: divisor                          -1
1757     //
1758     // output: rax: quotient  (= rax idiv reg)       min_int
1759     //         rdx: remainder (= rax irem reg)       0
1760     //
1761     //  Code sequnce:
1762     //
1763     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1764     //    5:   75 07/08                jne    e <normal>
1765     //    7:   33 d2                   xor    %edx,%edx
1766     //  [div >= 8 -> offset + 1]
1767     //  [REX_B]
1768     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1769     //    c:   74 03/04                je     11 <done>
1770     // 000000000000000e <normal>:
1771     //    e:   99                      cltd
1772     //  [div >= 8 -> offset + 1]
1773     //  [REX_B]
1774     //    f:   f7 f9                   idiv   $div
1775     // 0000000000000011 <done>:
1776 
1777     // cmp    $0x80000000,%eax
1778     emit_opcode(cbuf, 0x3d);
1779     emit_d8(cbuf, 0x00);
1780     emit_d8(cbuf, 0x00);
1781     emit_d8(cbuf, 0x00);
1782     emit_d8(cbuf, 0x80);
1783 
1784     // jne    e <normal>
1785     emit_opcode(cbuf, 0x75);
1786     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1787 
1788     // xor    %edx,%edx
1789     emit_opcode(cbuf, 0x33);
1790     emit_d8(cbuf, 0xD2);
1791 
1792     // cmp    $0xffffffffffffffff,%ecx
1793     if ($div$$reg >= 8) {
1794       emit_opcode(cbuf, Assembler::REX_B);
1795     }
1796     emit_opcode(cbuf, 0x83);
1797     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1798     emit_d8(cbuf, 0xFF);
1799 
1800     // je     11 <done>
1801     emit_opcode(cbuf, 0x74);
1802     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1803 
1804     // <normal>
1805     // cltd
1806     emit_opcode(cbuf, 0x99);
1807 
1808     // idivl (note: must be emitted by the user of this rule)
1809     // <done>
1810   %}
1811 
1812   enc_class cdqq_enc(no_rax_rdx_RegL div)
1813   %{
1814     // Full implementation of Java ldiv and lrem; checks for
1815     // special case as described in JVM spec., p.243 & p.271.
1816     //
1817     //         normal case                           special case
1818     //
1819     // input : rax: dividend                         min_long
1820     //         reg: divisor                          -1
1821     //
1822     // output: rax: quotient  (= rax idiv reg)       min_long
1823     //         rdx: remainder (= rax irem reg)       0
1824     //
1825     //  Code sequnce:
1826     //
1827     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1828     //    7:   00 00 80
1829     //    a:   48 39 d0                cmp    %rdx,%rax
1830     //    d:   75 08                   jne    17 <normal>
1831     //    f:   33 d2                   xor    %edx,%edx
1832     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1833     //   15:   74 05                   je     1c <done>
1834     // 0000000000000017 <normal>:
1835     //   17:   48 99                   cqto
1836     //   19:   48 f7 f9                idiv   $div
1837     // 000000000000001c <done>:
1838 
1839     // mov    $0x8000000000000000,%rdx
1840     emit_opcode(cbuf, Assembler::REX_W);
1841     emit_opcode(cbuf, 0xBA);
1842     emit_d8(cbuf, 0x00);
1843     emit_d8(cbuf, 0x00);
1844     emit_d8(cbuf, 0x00);
1845     emit_d8(cbuf, 0x00);
1846     emit_d8(cbuf, 0x00);
1847     emit_d8(cbuf, 0x00);
1848     emit_d8(cbuf, 0x00);
1849     emit_d8(cbuf, 0x80);
1850 
1851     // cmp    %rdx,%rax
1852     emit_opcode(cbuf, Assembler::REX_W);
1853     emit_opcode(cbuf, 0x39);
1854     emit_d8(cbuf, 0xD0);
1855 
1856     // jne    17 <normal>
1857     emit_opcode(cbuf, 0x75);
1858     emit_d8(cbuf, 0x08);
1859 
1860     // xor    %edx,%edx
1861     emit_opcode(cbuf, 0x33);
1862     emit_d8(cbuf, 0xD2);
1863 
1864     // cmp    $0xffffffffffffffff,$div
1865     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1866     emit_opcode(cbuf, 0x83);
1867     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1868     emit_d8(cbuf, 0xFF);
1869 
1870     // je     1e <done>
1871     emit_opcode(cbuf, 0x74);
1872     emit_d8(cbuf, 0x05);
1873 
1874     // <normal>
1875     // cqto
1876     emit_opcode(cbuf, Assembler::REX_W);
1877     emit_opcode(cbuf, 0x99);
1878 
1879     // idivq (note: must be emitted by the user of this rule)
1880     // <done>
1881   %}
1882 
1883   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1884   enc_class OpcSE(immI imm)
1885   %{
1886     // Emit primary opcode and set sign-extend bit
1887     // Check for 8-bit immediate, and set sign extend bit in opcode
1888     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1889       emit_opcode(cbuf, $primary | 0x02);
1890     } else {
1891       // 32-bit immediate
1892       emit_opcode(cbuf, $primary);
1893     }
1894   %}
1895 
1896   enc_class OpcSErm(rRegI dst, immI imm)
1897   %{
1898     // OpcSEr/m
1899     int dstenc = $dst$$reg;
1900     if (dstenc >= 8) {
1901       emit_opcode(cbuf, Assembler::REX_B);
1902       dstenc -= 8;
1903     }
1904     // Emit primary opcode and set sign-extend bit
1905     // Check for 8-bit immediate, and set sign extend bit in opcode
1906     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1907       emit_opcode(cbuf, $primary | 0x02);
1908     } else {
1909       // 32-bit immediate
1910       emit_opcode(cbuf, $primary);
1911     }
1912     // Emit r/m byte with secondary opcode, after primary opcode.
1913     emit_rm(cbuf, 0x3, $secondary, dstenc);
1914   %}
1915 
1916   enc_class OpcSErm_wide(rRegL dst, immI imm)
1917   %{
1918     // OpcSEr/m
1919     int dstenc = $dst$$reg;
1920     if (dstenc < 8) {
1921       emit_opcode(cbuf, Assembler::REX_W);
1922     } else {
1923       emit_opcode(cbuf, Assembler::REX_WB);
1924       dstenc -= 8;
1925     }
1926     // Emit primary opcode and set sign-extend bit
1927     // Check for 8-bit immediate, and set sign extend bit in opcode
1928     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1929       emit_opcode(cbuf, $primary | 0x02);
1930     } else {
1931       // 32-bit immediate
1932       emit_opcode(cbuf, $primary);
1933     }
1934     // Emit r/m byte with secondary opcode, after primary opcode.
1935     emit_rm(cbuf, 0x3, $secondary, dstenc);
1936   %}
1937 
1938   enc_class Con8or32(immI imm)
1939   %{
1940     // Check for 8-bit immediate, and set sign extend bit in opcode
1941     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1942       $$$emit8$imm$$constant;
1943     } else {
1944       // 32-bit immediate
1945       $$$emit32$imm$$constant;
1946     }
1947   %}
1948 
1949   enc_class opc2_reg(rRegI dst)
1950   %{
1951     // BSWAP
1952     emit_cc(cbuf, $secondary, $dst$$reg);
1953   %}
1954 
1955   enc_class opc3_reg(rRegI dst)
1956   %{
1957     // BSWAP
1958     emit_cc(cbuf, $tertiary, $dst$$reg);
1959   %}
1960 
1961   enc_class reg_opc(rRegI div)
1962   %{
1963     // INC, DEC, IDIV, IMOD, JMP indirect, ...
1964     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
1965   %}
1966 
1967   enc_class enc_cmov(cmpOp cop)
1968   %{
1969     // CMOV
1970     $$$emit8$primary;
1971     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1972   %}
1973 
1974   enc_class enc_PartialSubtypeCheck()
1975   %{
1976     Register Rrdi = as_Register(RDI_enc); // result register
1977     Register Rrax = as_Register(RAX_enc); // super class
1978     Register Rrcx = as_Register(RCX_enc); // killed
1979     Register Rrsi = as_Register(RSI_enc); // sub class
1980     Label miss;
1981     const bool set_cond_codes = true;
1982 
1983     MacroAssembler _masm(&cbuf);
1984     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1985                                      NULL, &miss,
1986                                      /*set_cond_codes:*/ true);
1987     if ($primary) {
1988       __ xorptr(Rrdi, Rrdi);
1989     }
1990     __ bind(miss);
1991   %}
1992 
1993   enc_class clear_avx %{
1994     debug_only(int off0 = cbuf.insts_size());
1995     if (ra_->C->max_vector_size() > 16) {
1996       // Clear upper bits of YMM registers when current compiled code uses
1997       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1998       MacroAssembler _masm(&cbuf);
1999       __ vzeroupper();
2000     }
2001     debug_only(int off1 = cbuf.insts_size());
2002     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2003   %}
2004 
2005   enc_class Java_To_Runtime(method meth) %{
2006     // No relocation needed
2007     MacroAssembler _masm(&cbuf);
2008     __ mov64(r10, (int64_t) $meth$$method);
2009     __ call(r10);
2010   %}
2011 
2012   enc_class Java_To_Interpreter(method meth)
2013   %{
2014     // CALL Java_To_Interpreter
2015     // This is the instruction starting address for relocation info.
2016     cbuf.set_insts_mark();
2017     $$$emit8$primary;
2018     // CALL directly to the runtime
2019     emit_d32_reloc(cbuf,
2020                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2021                    runtime_call_Relocation::spec(),
2022                    RELOC_DISP32);
2023   %}
2024 
2025   enc_class Java_Static_Call(method meth)
2026   %{
2027     // JAVA STATIC CALL
2028     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2029     // determine who we intended to call.
2030     cbuf.set_insts_mark();
2031     $$$emit8$primary;
2032 
2033     if (!_method) {
2034       emit_d32_reloc(cbuf,
2035                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2036                      runtime_call_Relocation::spec(),
2037                      RELOC_DISP32);
2038     } else if (_optimized_virtual) {
2039       emit_d32_reloc(cbuf,
2040                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2041                      opt_virtual_call_Relocation::spec(),
2042                      RELOC_DISP32);
2043     } else {
2044       emit_d32_reloc(cbuf,
2045                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2046                      static_call_Relocation::spec(),
2047                      RELOC_DISP32);
2048     }
2049     if (_method) {
2050       // Emit stub for static call.
2051       CompiledStaticCall::emit_to_interp_stub(cbuf);
2052     }
2053   %}
2054 
2055   enc_class Java_Dynamic_Call(method meth) %{
2056     MacroAssembler _masm(&cbuf);
2057     __ ic_call((address)$meth$$method);
2058   %}
2059 
2060   enc_class Java_Compiled_Call(method meth)
2061   %{
2062     // JAVA COMPILED CALL
2063     int disp = in_bytes(Method:: from_compiled_offset());
2064 
2065     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2066     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2067 
2068     // callq *disp(%rax)
2069     cbuf.set_insts_mark();
2070     $$$emit8$primary;
2071     if (disp < 0x80) {
2072       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2073       emit_d8(cbuf, disp); // Displacement
2074     } else {
2075       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2076       emit_d32(cbuf, disp); // Displacement
2077     }
2078   %}
2079 
2080   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2081   %{
2082     // SAL, SAR, SHR
2083     int dstenc = $dst$$reg;
2084     if (dstenc >= 8) {
2085       emit_opcode(cbuf, Assembler::REX_B);
2086       dstenc -= 8;
2087     }
2088     $$$emit8$primary;
2089     emit_rm(cbuf, 0x3, $secondary, dstenc);
2090     $$$emit8$shift$$constant;
2091   %}
2092 
2093   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2094   %{
2095     // SAL, SAR, SHR
2096     int dstenc = $dst$$reg;
2097     if (dstenc < 8) {
2098       emit_opcode(cbuf, Assembler::REX_W);
2099     } else {
2100       emit_opcode(cbuf, Assembler::REX_WB);
2101       dstenc -= 8;
2102     }
2103     $$$emit8$primary;
2104     emit_rm(cbuf, 0x3, $secondary, dstenc);
2105     $$$emit8$shift$$constant;
2106   %}
2107 
2108   enc_class load_immI(rRegI dst, immI src)
2109   %{
2110     int dstenc = $dst$$reg;
2111     if (dstenc >= 8) {
2112       emit_opcode(cbuf, Assembler::REX_B);
2113       dstenc -= 8;
2114     }
2115     emit_opcode(cbuf, 0xB8 | dstenc);
2116     $$$emit32$src$$constant;
2117   %}
2118 
2119   enc_class load_immL(rRegL dst, immL src)
2120   %{
2121     int dstenc = $dst$$reg;
2122     if (dstenc < 8) {
2123       emit_opcode(cbuf, Assembler::REX_W);
2124     } else {
2125       emit_opcode(cbuf, Assembler::REX_WB);
2126       dstenc -= 8;
2127     }
2128     emit_opcode(cbuf, 0xB8 | dstenc);
2129     emit_d64(cbuf, $src$$constant);
2130   %}
2131 
2132   enc_class load_immUL32(rRegL dst, immUL32 src)
2133   %{
2134     // same as load_immI, but this time we care about zeroes in the high word
2135     int dstenc = $dst$$reg;
2136     if (dstenc >= 8) {
2137       emit_opcode(cbuf, Assembler::REX_B);
2138       dstenc -= 8;
2139     }
2140     emit_opcode(cbuf, 0xB8 | dstenc);
2141     $$$emit32$src$$constant;
2142   %}
2143 
2144   enc_class load_immL32(rRegL dst, immL32 src)
2145   %{
2146     int dstenc = $dst$$reg;
2147     if (dstenc < 8) {
2148       emit_opcode(cbuf, Assembler::REX_W);
2149     } else {
2150       emit_opcode(cbuf, Assembler::REX_WB);
2151       dstenc -= 8;
2152     }
2153     emit_opcode(cbuf, 0xC7);
2154     emit_rm(cbuf, 0x03, 0x00, dstenc);
2155     $$$emit32$src$$constant;
2156   %}
2157 
2158   enc_class load_immP31(rRegP dst, immP32 src)
2159   %{
2160     // same as load_immI, but this time we care about zeroes in the high word
2161     int dstenc = $dst$$reg;
2162     if (dstenc >= 8) {
2163       emit_opcode(cbuf, Assembler::REX_B);
2164       dstenc -= 8;
2165     }
2166     emit_opcode(cbuf, 0xB8 | dstenc);
2167     $$$emit32$src$$constant;
2168   %}
2169 
2170   enc_class load_immP(rRegP dst, immP src)
2171   %{
2172     int dstenc = $dst$$reg;
2173     if (dstenc < 8) {
2174       emit_opcode(cbuf, Assembler::REX_W);
2175     } else {
2176       emit_opcode(cbuf, Assembler::REX_WB);
2177       dstenc -= 8;
2178     }
2179     emit_opcode(cbuf, 0xB8 | dstenc);
2180     // This next line should be generated from ADLC
2181     if ($src->constant_reloc() != relocInfo::none) {
2182       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2183     } else {
2184       emit_d64(cbuf, $src$$constant);
2185     }
2186   %}
2187 
2188   enc_class Con32(immI src)
2189   %{
2190     // Output immediate
2191     $$$emit32$src$$constant;
2192   %}
2193 
2194   enc_class Con64(immL src)
2195   %{
2196     // Output immediate
2197     emit_d64($src$$constant);
2198   %}
2199 
2200   enc_class Con32F_as_bits(immF src)
2201   %{
2202     // Output Float immediate bits
2203     jfloat jf = $src$$constant;
2204     jint jf_as_bits = jint_cast(jf);
2205     emit_d32(cbuf, jf_as_bits);
2206   %}
2207 
2208   enc_class Con16(immI src)
2209   %{
2210     // Output immediate
2211     $$$emit16$src$$constant;
2212   %}
2213 
2214   // How is this different from Con32??? XXX
2215   enc_class Con_d32(immI src)
2216   %{
2217     emit_d32(cbuf,$src$$constant);
2218   %}
2219 
2220   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2221     // Output immediate memory reference
2222     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2223     emit_d32(cbuf, 0x00);
2224   %}
2225 
2226   enc_class lock_prefix()
2227   %{
2228     if (os::is_MP()) {
2229       emit_opcode(cbuf, 0xF0); // lock
2230     }
2231   %}
2232 
2233   enc_class REX_mem(memory mem)
2234   %{
2235     if ($mem$$base >= 8) {
2236       if ($mem$$index < 8) {
2237         emit_opcode(cbuf, Assembler::REX_B);
2238       } else {
2239         emit_opcode(cbuf, Assembler::REX_XB);
2240       }
2241     } else {
2242       if ($mem$$index >= 8) {
2243         emit_opcode(cbuf, Assembler::REX_X);
2244       }
2245     }
2246   %}
2247 
2248   enc_class REX_mem_wide(memory mem)
2249   %{
2250     if ($mem$$base >= 8) {
2251       if ($mem$$index < 8) {
2252         emit_opcode(cbuf, Assembler::REX_WB);
2253       } else {
2254         emit_opcode(cbuf, Assembler::REX_WXB);
2255       }
2256     } else {
2257       if ($mem$$index < 8) {
2258         emit_opcode(cbuf, Assembler::REX_W);
2259       } else {
2260         emit_opcode(cbuf, Assembler::REX_WX);
2261       }
2262     }
2263   %}
2264 
2265   // for byte regs
2266   enc_class REX_breg(rRegI reg)
2267   %{
2268     if ($reg$$reg >= 4) {
2269       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2270     }
2271   %}
2272 
2273   // for byte regs
2274   enc_class REX_reg_breg(rRegI dst, rRegI src)
2275   %{
2276     if ($dst$$reg < 8) {
2277       if ($src$$reg >= 4) {
2278         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2279       }
2280     } else {
2281       if ($src$$reg < 8) {
2282         emit_opcode(cbuf, Assembler::REX_R);
2283       } else {
2284         emit_opcode(cbuf, Assembler::REX_RB);
2285       }
2286     }
2287   %}
2288 
2289   // for byte regs
2290   enc_class REX_breg_mem(rRegI reg, memory mem)
2291   %{
2292     if ($reg$$reg < 8) {
2293       if ($mem$$base < 8) {
2294         if ($mem$$index >= 8) {
2295           emit_opcode(cbuf, Assembler::REX_X);
2296         } else if ($reg$$reg >= 4) {
2297           emit_opcode(cbuf, Assembler::REX);
2298         }
2299       } else {
2300         if ($mem$$index < 8) {
2301           emit_opcode(cbuf, Assembler::REX_B);
2302         } else {
2303           emit_opcode(cbuf, Assembler::REX_XB);
2304         }
2305       }
2306     } else {
2307       if ($mem$$base < 8) {
2308         if ($mem$$index < 8) {
2309           emit_opcode(cbuf, Assembler::REX_R);
2310         } else {
2311           emit_opcode(cbuf, Assembler::REX_RX);
2312         }
2313       } else {
2314         if ($mem$$index < 8) {
2315           emit_opcode(cbuf, Assembler::REX_RB);
2316         } else {
2317           emit_opcode(cbuf, Assembler::REX_RXB);
2318         }
2319       }
2320     }
2321   %}
2322 
2323   enc_class REX_reg(rRegI reg)
2324   %{
2325     if ($reg$$reg >= 8) {
2326       emit_opcode(cbuf, Assembler::REX_B);
2327     }
2328   %}
2329 
2330   enc_class REX_reg_wide(rRegI reg)
2331   %{
2332     if ($reg$$reg < 8) {
2333       emit_opcode(cbuf, Assembler::REX_W);
2334     } else {
2335       emit_opcode(cbuf, Assembler::REX_WB);
2336     }
2337   %}
2338 
2339   enc_class REX_reg_reg(rRegI dst, rRegI src)
2340   %{
2341     if ($dst$$reg < 8) {
2342       if ($src$$reg >= 8) {
2343         emit_opcode(cbuf, Assembler::REX_B);
2344       }
2345     } else {
2346       if ($src$$reg < 8) {
2347         emit_opcode(cbuf, Assembler::REX_R);
2348       } else {
2349         emit_opcode(cbuf, Assembler::REX_RB);
2350       }
2351     }
2352   %}
2353 
2354   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2355   %{
2356     if ($dst$$reg < 8) {
2357       if ($src$$reg < 8) {
2358         emit_opcode(cbuf, Assembler::REX_W);
2359       } else {
2360         emit_opcode(cbuf, Assembler::REX_WB);
2361       }
2362     } else {
2363       if ($src$$reg < 8) {
2364         emit_opcode(cbuf, Assembler::REX_WR);
2365       } else {
2366         emit_opcode(cbuf, Assembler::REX_WRB);
2367       }
2368     }
2369   %}
2370 
2371   enc_class REX_reg_mem(rRegI reg, memory mem)
2372   %{
2373     if ($reg$$reg < 8) {
2374       if ($mem$$base < 8) {
2375         if ($mem$$index >= 8) {
2376           emit_opcode(cbuf, Assembler::REX_X);
2377         }
2378       } else {
2379         if ($mem$$index < 8) {
2380           emit_opcode(cbuf, Assembler::REX_B);
2381         } else {
2382           emit_opcode(cbuf, Assembler::REX_XB);
2383         }
2384       }
2385     } else {
2386       if ($mem$$base < 8) {
2387         if ($mem$$index < 8) {
2388           emit_opcode(cbuf, Assembler::REX_R);
2389         } else {
2390           emit_opcode(cbuf, Assembler::REX_RX);
2391         }
2392       } else {
2393         if ($mem$$index < 8) {
2394           emit_opcode(cbuf, Assembler::REX_RB);
2395         } else {
2396           emit_opcode(cbuf, Assembler::REX_RXB);
2397         }
2398       }
2399     }
2400   %}
2401 
2402   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2403   %{
2404     if ($reg$$reg < 8) {
2405       if ($mem$$base < 8) {
2406         if ($mem$$index < 8) {
2407           emit_opcode(cbuf, Assembler::REX_W);
2408         } else {
2409           emit_opcode(cbuf, Assembler::REX_WX);
2410         }
2411       } else {
2412         if ($mem$$index < 8) {
2413           emit_opcode(cbuf, Assembler::REX_WB);
2414         } else {
2415           emit_opcode(cbuf, Assembler::REX_WXB);
2416         }
2417       }
2418     } else {
2419       if ($mem$$base < 8) {
2420         if ($mem$$index < 8) {
2421           emit_opcode(cbuf, Assembler::REX_WR);
2422         } else {
2423           emit_opcode(cbuf, Assembler::REX_WRX);
2424         }
2425       } else {
2426         if ($mem$$index < 8) {
2427           emit_opcode(cbuf, Assembler::REX_WRB);
2428         } else {
2429           emit_opcode(cbuf, Assembler::REX_WRXB);
2430         }
2431       }
2432     }
2433   %}
2434 
2435   enc_class reg_mem(rRegI ereg, memory mem)
2436   %{
2437     // High registers handle in encode_RegMem
2438     int reg = $ereg$$reg;
2439     int base = $mem$$base;
2440     int index = $mem$$index;
2441     int scale = $mem$$scale;
2442     int disp = $mem$$disp;
2443     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2444 
2445     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2446   %}
2447 
2448   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2449   %{
2450     int rm_byte_opcode = $rm_opcode$$constant;
2451 
2452     // High registers handle in encode_RegMem
2453     int base = $mem$$base;
2454     int index = $mem$$index;
2455     int scale = $mem$$scale;
2456     int displace = $mem$$disp;
2457 
2458     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2459                                             // working with static
2460                                             // globals
2461     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2462                   disp_reloc);
2463   %}
2464 
2465   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2466   %{
2467     int reg_encoding = $dst$$reg;
2468     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2469     int index        = 0x04;            // 0x04 indicates no index
2470     int scale        = 0x00;            // 0x00 indicates no scale
2471     int displace     = $src1$$constant; // 0x00 indicates no displacement
2472     relocInfo::relocType disp_reloc = relocInfo::none;
2473     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2474                   disp_reloc);
2475   %}
2476 
2477   enc_class neg_reg(rRegI dst)
2478   %{
2479     int dstenc = $dst$$reg;
2480     if (dstenc >= 8) {
2481       emit_opcode(cbuf, Assembler::REX_B);
2482       dstenc -= 8;
2483     }
2484     // NEG $dst
2485     emit_opcode(cbuf, 0xF7);
2486     emit_rm(cbuf, 0x3, 0x03, dstenc);
2487   %}
2488 
2489   enc_class neg_reg_wide(rRegI dst)
2490   %{
2491     int dstenc = $dst$$reg;
2492     if (dstenc < 8) {
2493       emit_opcode(cbuf, Assembler::REX_W);
2494     } else {
2495       emit_opcode(cbuf, Assembler::REX_WB);
2496       dstenc -= 8;
2497     }
2498     // NEG $dst
2499     emit_opcode(cbuf, 0xF7);
2500     emit_rm(cbuf, 0x3, 0x03, dstenc);
2501   %}
2502 
2503   enc_class setLT_reg(rRegI dst)
2504   %{
2505     int dstenc = $dst$$reg;
2506     if (dstenc >= 8) {
2507       emit_opcode(cbuf, Assembler::REX_B);
2508       dstenc -= 8;
2509     } else if (dstenc >= 4) {
2510       emit_opcode(cbuf, Assembler::REX);
2511     }
2512     // SETLT $dst
2513     emit_opcode(cbuf, 0x0F);
2514     emit_opcode(cbuf, 0x9C);
2515     emit_rm(cbuf, 0x3, 0x0, dstenc);
2516   %}
2517 
2518   enc_class setNZ_reg(rRegI dst)
2519   %{
2520     int dstenc = $dst$$reg;
2521     if (dstenc >= 8) {
2522       emit_opcode(cbuf, Assembler::REX_B);
2523       dstenc -= 8;
2524     } else if (dstenc >= 4) {
2525       emit_opcode(cbuf, Assembler::REX);
2526     }
2527     // SETNZ $dst
2528     emit_opcode(cbuf, 0x0F);
2529     emit_opcode(cbuf, 0x95);
2530     emit_rm(cbuf, 0x3, 0x0, dstenc);
2531   %}
2532 
2533 
2534   // Compare the lonogs and set -1, 0, or 1 into dst
2535   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2536   %{
2537     int src1enc = $src1$$reg;
2538     int src2enc = $src2$$reg;
2539     int dstenc = $dst$$reg;
2540 
2541     // cmpq $src1, $src2
2542     if (src1enc < 8) {
2543       if (src2enc < 8) {
2544         emit_opcode(cbuf, Assembler::REX_W);
2545       } else {
2546         emit_opcode(cbuf, Assembler::REX_WB);
2547       }
2548     } else {
2549       if (src2enc < 8) {
2550         emit_opcode(cbuf, Assembler::REX_WR);
2551       } else {
2552         emit_opcode(cbuf, Assembler::REX_WRB);
2553       }
2554     }
2555     emit_opcode(cbuf, 0x3B);
2556     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2557 
2558     // movl $dst, -1
2559     if (dstenc >= 8) {
2560       emit_opcode(cbuf, Assembler::REX_B);
2561     }
2562     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2563     emit_d32(cbuf, -1);
2564 
2565     // jl,s done
2566     emit_opcode(cbuf, 0x7C);
2567     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2568 
2569     // setne $dst
2570     if (dstenc >= 4) {
2571       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2572     }
2573     emit_opcode(cbuf, 0x0F);
2574     emit_opcode(cbuf, 0x95);
2575     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2576 
2577     // movzbl $dst, $dst
2578     if (dstenc >= 4) {
2579       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2580     }
2581     emit_opcode(cbuf, 0x0F);
2582     emit_opcode(cbuf, 0xB6);
2583     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2584   %}
2585 
2586   enc_class Push_ResultXD(regD dst) %{
2587     MacroAssembler _masm(&cbuf);
2588     __ fstp_d(Address(rsp, 0));
2589     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2590     __ addptr(rsp, 8);
2591   %}
2592 
2593   enc_class Push_SrcXD(regD src) %{
2594     MacroAssembler _masm(&cbuf);
2595     __ subptr(rsp, 8);
2596     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2597     __ fld_d(Address(rsp, 0));
2598   %}
2599 
2600 
2601   // obj: object to lock
2602   // box: box address (header location) -- killed
2603   // tmp: rax -- killed
2604   // scr: rbx -- killed
2605   //
2606   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2607   // from i486.ad.  See that file for comments.
2608   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2609   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2610 
2611 
2612   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2613   %{
2614     Register objReg = as_Register((int)$obj$$reg);
2615     Register boxReg = as_Register((int)$box$$reg);
2616     Register tmpReg = as_Register($tmp$$reg);
2617     Register scrReg = as_Register($scr$$reg);
2618     MacroAssembler masm(&cbuf);
2619 
2620     // Verify uniqueness of register assignments -- necessary but not sufficient
2621     assert (objReg != boxReg && objReg != tmpReg &&
2622             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2623 
2624     if (_counters != NULL) {
2625       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2626     }
2627     if (EmitSync & 1) {
2628         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2629         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2630         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2631     } else
2632     if (EmitSync & 2) {
2633         Label DONE_LABEL;
2634         if (UseBiasedLocking) {
2635            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2636           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2637         }
2638         // QQQ was movl...
2639         masm.movptr(tmpReg, 0x1);
2640         masm.orptr(tmpReg, Address(objReg, 0));
2641         masm.movptr(Address(boxReg, 0), tmpReg);
2642         if (os::is_MP()) {
2643           masm.lock();
2644         }
2645         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2646         masm.jcc(Assembler::equal, DONE_LABEL);
2647 
2648         // Recursive locking
2649         masm.subptr(tmpReg, rsp);
2650         masm.andptr(tmpReg, 7 - os::vm_page_size());
2651         masm.movptr(Address(boxReg, 0), tmpReg);
2652 
2653         masm.bind(DONE_LABEL);
2654         masm.nop(); // avoid branch to branch
2655     } else {
2656         Label DONE_LABEL, IsInflated, Egress;
2657 
2658         masm.movptr(tmpReg, Address(objReg, 0)) ;
2659         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2660         masm.jcc   (Assembler::notZero, IsInflated) ;
2661 
2662         // it's stack-locked, biased or neutral
2663         // TODO: optimize markword triage order to reduce the number of
2664         // conditional branches in the most common cases.
2665         // Beware -- there's a subtle invariant that fetch of the markword
2666         // at [FETCH], below, will never observe a biased encoding (*101b).
2667         // If this invariant is not held we'll suffer exclusion (safety) failure.
2668 
2669         if (UseBiasedLocking && !UseOptoBiasInlining) {
2670           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2671           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2672         }
2673 
2674         // was q will it destroy high?
2675         masm.orl   (tmpReg, 1) ;
2676         masm.movptr(Address(boxReg, 0), tmpReg) ;
2677         if (os::is_MP()) { masm.lock(); }
2678         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2679         if (_counters != NULL) {
2680            masm.cond_inc32(Assembler::equal,
2681                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2682         }
2683         masm.jcc   (Assembler::equal, DONE_LABEL);
2684 
2685         // Recursive locking
2686         masm.subptr(tmpReg, rsp);
2687         masm.andptr(tmpReg, 7 - os::vm_page_size());
2688         masm.movptr(Address(boxReg, 0), tmpReg);
2689         if (_counters != NULL) {
2690            masm.cond_inc32(Assembler::equal,
2691                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2692         }
2693         masm.jmp   (DONE_LABEL) ;
2694 
2695         masm.bind  (IsInflated) ;
2696         // It's inflated
2697 
2698         // TODO: someday avoid the ST-before-CAS penalty by
2699         // relocating (deferring) the following ST.
2700         // We should also think about trying a CAS without having
2701         // fetched _owner.  If the CAS is successful we may
2702         // avoid an RTO->RTS upgrade on the $line.
2703         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2704         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2705 
2706         masm.mov    (boxReg, tmpReg) ;
2707         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2708         masm.testptr(tmpReg, tmpReg) ;
2709         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2710 
2711         // It's inflated and appears unlocked
2712         if (os::is_MP()) { masm.lock(); }
2713         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2714         // Intentional fall-through into DONE_LABEL ...
2715 
2716         masm.bind  (DONE_LABEL) ;
2717         masm.nop   () ;                 // avoid jmp to jmp
2718     }
2719   %}
2720 
2721   // obj: object to unlock
2722   // box: box address (displaced header location), killed
2723   // RBX: killed tmp; cannot be obj nor box
2724   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2725   %{
2726 
2727     Register objReg = as_Register($obj$$reg);
2728     Register boxReg = as_Register($box$$reg);
2729     Register tmpReg = as_Register($tmp$$reg);
2730     MacroAssembler masm(&cbuf);
2731 
2732     if (EmitSync & 4) {
2733        masm.cmpptr(rsp, 0) ;
2734     } else
2735     if (EmitSync & 8) {
2736        Label DONE_LABEL;
2737        if (UseBiasedLocking) {
2738          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2739        }
2740 
2741        // Check whether the displaced header is 0
2742        //(=> recursive unlock)
2743        masm.movptr(tmpReg, Address(boxReg, 0));
2744        masm.testptr(tmpReg, tmpReg);
2745        masm.jcc(Assembler::zero, DONE_LABEL);
2746 
2747        // If not recursive lock, reset the header to displaced header
2748        if (os::is_MP()) {
2749          masm.lock();
2750        }
2751        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2752        masm.bind(DONE_LABEL);
2753        masm.nop(); // avoid branch to branch
2754     } else {
2755        Label DONE_LABEL, Stacked, CheckSucc ;
2756 
2757        if (UseBiasedLocking && !UseOptoBiasInlining) {
2758          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2759        }
2760 
2761        masm.movptr(tmpReg, Address(objReg, 0)) ;
2762        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
2763        masm.jcc   (Assembler::zero, DONE_LABEL) ;
2764        masm.testl (tmpReg, 0x02) ;
2765        masm.jcc   (Assembler::zero, Stacked) ;
2766 
2767        // It's inflated
2768        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2769        masm.xorptr(boxReg, r15_thread) ;
2770        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2771        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
2772        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2773        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2774        masm.jcc   (Assembler::notZero, CheckSucc) ;
2775        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2776        masm.jmp   (DONE_LABEL) ;
2777 
2778        if ((EmitSync & 65536) == 0) {
2779          Label LSuccess, LGoSlowPath ;
2780          masm.bind  (CheckSucc) ;
2781          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2782          masm.jcc   (Assembler::zero, LGoSlowPath) ;
2783 
2784          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
2785          // the explicit ST;MEMBAR combination, but masm doesn't currently support
2786          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
2787          // are all faster when the write buffer is populated.
2788          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2789          if (os::is_MP()) {
2790             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
2791          }
2792          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2793          masm.jcc   (Assembler::notZero, LSuccess) ;
2794 
2795          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
2796          if (os::is_MP()) { masm.lock(); }
2797          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
2798          masm.jcc   (Assembler::notEqual, LSuccess) ;
2799          // Intentional fall-through into slow-path
2800 
2801          masm.bind  (LGoSlowPath) ;
2802          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
2803          masm.jmp   (DONE_LABEL) ;
2804 
2805          masm.bind  (LSuccess) ;
2806          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
2807          masm.jmp   (DONE_LABEL) ;
2808        }
2809 
2810        masm.bind  (Stacked) ;
2811        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
2812        if (os::is_MP()) { masm.lock(); }
2813        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2814 
2815        if (EmitSync & 65536) {
2816           masm.bind (CheckSucc) ;
2817        }
2818        masm.bind(DONE_LABEL);
2819        if (EmitSync & 32768) {
2820           masm.nop();                      // avoid branch to branch
2821        }
2822     }
2823   %}
2824 
2825 
2826   enc_class enc_rethrow()
2827   %{
2828     cbuf.set_insts_mark();
2829     emit_opcode(cbuf, 0xE9); // jmp entry
2830     emit_d32_reloc(cbuf,
2831                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2832                    runtime_call_Relocation::spec(),
2833                    RELOC_DISP32);
2834   %}
2835 
2836 %}
2837 
2838 
2839 
2840 //----------FRAME--------------------------------------------------------------
2841 // Definition of frame structure and management information.
2842 //
2843 //  S T A C K   L A Y O U T    Allocators stack-slot number
2844 //                             |   (to get allocators register number
2845 //  G  Owned by    |        |  v    add OptoReg::stack0())
2846 //  r   CALLER     |        |
2847 //  o     |        +--------+      pad to even-align allocators stack-slot
2848 //  w     V        |  pad0  |        numbers; owned by CALLER
2849 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2850 //  h     ^        |   in   |  5
2851 //        |        |  args  |  4   Holes in incoming args owned by SELF
2852 //  |     |        |        |  3
2853 //  |     |        +--------+
2854 //  V     |        | old out|      Empty on Intel, window on Sparc
2855 //        |    old |preserve|      Must be even aligned.
2856 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2857 //        |        |   in   |  3   area for Intel ret address
2858 //     Owned by    |preserve|      Empty on Sparc.
2859 //       SELF      +--------+
2860 //        |        |  pad2  |  2   pad to align old SP
2861 //        |        +--------+  1
2862 //        |        | locks  |  0
2863 //        |        +--------+----> OptoReg::stack0(), even aligned
2864 //        |        |  pad1  | 11   pad to align new SP
2865 //        |        +--------+
2866 //        |        |        | 10
2867 //        |        | spills |  9   spills
2868 //        V        |        |  8   (pad0 slot for callee)
2869 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2870 //        ^        |  out   |  7
2871 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2872 //     Owned by    +--------+
2873 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2874 //        |    new |preserve|      Must be even-aligned.
2875 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2876 //        |        |        |
2877 //
2878 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2879 //         known from SELF's arguments and the Java calling convention.
2880 //         Region 6-7 is determined per call site.
2881 // Note 2: If the calling convention leaves holes in the incoming argument
2882 //         area, those holes are owned by SELF.  Holes in the outgoing area
2883 //         are owned by the CALLEE.  Holes should not be nessecary in the
2884 //         incoming area, as the Java calling convention is completely under
2885 //         the control of the AD file.  Doubles can be sorted and packed to
2886 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2887 //         varargs C calling conventions.
2888 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2889 //         even aligned with pad0 as needed.
2890 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2891 //         region 6-11 is even aligned; it may be padded out more so that
2892 //         the region from SP to FP meets the minimum stack alignment.
2893 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2894 //         alignment.  Region 11, pad1, may be dynamically extended so that
2895 //         SP meets the minimum alignment.
2896 
2897 frame
2898 %{
2899   // What direction does stack grow in (assumed to be same for C & Java)
2900   stack_direction(TOWARDS_LOW);
2901 
2902   // These three registers define part of the calling convention
2903   // between compiled code and the interpreter.
2904   inline_cache_reg(RAX);                // Inline Cache Register
2905   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2906                                         // calling interpreter
2907 
2908   // Optional: name the operand used by cisc-spilling to access
2909   // [stack_pointer + offset]
2910   cisc_spilling_operand_name(indOffset32);
2911 
2912   // Number of stack slots consumed by locking an object
2913   sync_stack_slots(2);
2914 
2915   // Compiled code's Frame Pointer
2916   frame_pointer(RSP);
2917 
2918   // Interpreter stores its frame pointer in a register which is
2919   // stored to the stack by I2CAdaptors.
2920   // I2CAdaptors convert from interpreted java to compiled java.
2921   interpreter_frame_pointer(RBP);
2922 
2923   // Stack alignment requirement
2924   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2925 
2926   // Number of stack slots between incoming argument block and the start of
2927   // a new frame.  The PROLOG must add this many slots to the stack.  The
2928   // EPILOG must remove this many slots.  amd64 needs two slots for
2929   // return address.
2930   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2931 
2932   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2933   // for calls to C.  Supports the var-args backing area for register parms.
2934   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2935 
2936   // The after-PROLOG location of the return address.  Location of
2937   // return address specifies a type (REG or STACK) and a number
2938   // representing the register number (i.e. - use a register name) or
2939   // stack slot.
2940   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2941   // Otherwise, it is above the locks and verification slot and alignment word
2942   return_addr(STACK - 2 +
2943               round_to((Compile::current()->in_preserve_stack_slots() +
2944                         Compile::current()->fixed_slots()),
2945                        stack_alignment_in_slots()));
2946 
2947   // Body of function which returns an integer array locating
2948   // arguments either in registers or in stack slots.  Passed an array
2949   // of ideal registers called "sig" and a "length" count.  Stack-slot
2950   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2951   // arguments for a CALLEE.  Incoming stack arguments are
2952   // automatically biased by the preserve_stack_slots field above.
2953 
2954   calling_convention
2955   %{
2956     // No difference between ingoing/outgoing just pass false
2957     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2958   %}
2959 
2960   c_calling_convention
2961   %{
2962     // This is obviously always outgoing
2963     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
2964   %}
2965 
2966   // Location of compiled Java return values.  Same as C for now.
2967   return_value
2968   %{
2969     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2970            "only return normal values");
2971 
2972     static const int lo[Op_RegL + 1] = {
2973       0,
2974       0,
2975       RAX_num,  // Op_RegN
2976       RAX_num,  // Op_RegI
2977       RAX_num,  // Op_RegP
2978       XMM0_num, // Op_RegF
2979       XMM0_num, // Op_RegD
2980       RAX_num   // Op_RegL
2981     };
2982     static const int hi[Op_RegL + 1] = {
2983       0,
2984       0,
2985       OptoReg::Bad, // Op_RegN
2986       OptoReg::Bad, // Op_RegI
2987       RAX_H_num,    // Op_RegP
2988       OptoReg::Bad, // Op_RegF
2989       XMM0b_num,    // Op_RegD
2990       RAX_H_num     // Op_RegL
2991     };
2992     // Excluded flags and vector registers.
2993     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
2994     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2995   %}
2996 %}
2997 
2998 //----------ATTRIBUTES---------------------------------------------------------
2999 //----------Operand Attributes-------------------------------------------------
3000 op_attrib op_cost(0);        // Required cost attribute
3001 
3002 //----------Instruction Attributes---------------------------------------------
3003 ins_attrib ins_cost(100);       // Required cost attribute
3004 ins_attrib ins_size(8);         // Required size attribute (in bits)
3005 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3006                                 // a non-matching short branch variant
3007                                 // of some long branch?
3008 ins_attrib ins_alignment(1);    // Required alignment attribute (must
3009                                 // be a power of 2) specifies the
3010                                 // alignment that some part of the
3011                                 // instruction (not necessarily the
3012                                 // start) requires.  If > 1, a
3013                                 // compute_padding() function must be
3014                                 // provided for the instruction
3015 
3016 //----------OPERANDS-----------------------------------------------------------
3017 // Operand definitions must precede instruction definitions for correct parsing
3018 // in the ADLC because operands constitute user defined types which are used in
3019 // instruction definitions.
3020 
3021 //----------Simple Operands----------------------------------------------------
3022 // Immediate Operands
3023 // Integer Immediate
3024 operand immI()
3025 %{
3026   match(ConI);
3027 
3028   op_cost(10);
3029   format %{ %}
3030   interface(CONST_INTER);
3031 %}
3032 
3033 // Constant for test vs zero
3034 operand immI0()
3035 %{
3036   predicate(n->get_int() == 0);
3037   match(ConI);
3038 
3039   op_cost(0);
3040   format %{ %}
3041   interface(CONST_INTER);
3042 %}
3043 
3044 // Constant for increment
3045 operand immI1()
3046 %{
3047   predicate(n->get_int() == 1);
3048   match(ConI);
3049 
3050   op_cost(0);
3051   format %{ %}
3052   interface(CONST_INTER);
3053 %}
3054 
3055 // Constant for decrement
3056 operand immI_M1()
3057 %{
3058   predicate(n->get_int() == -1);
3059   match(ConI);
3060 
3061   op_cost(0);
3062   format %{ %}
3063   interface(CONST_INTER);
3064 %}
3065 
3066 // Valid scale values for addressing modes
3067 operand immI2()
3068 %{
3069   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3070   match(ConI);
3071 
3072   format %{ %}
3073   interface(CONST_INTER);
3074 %}
3075 
3076 operand immI8()
3077 %{
3078   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3079   match(ConI);
3080 
3081   op_cost(5);
3082   format %{ %}
3083   interface(CONST_INTER);
3084 %}
3085 
3086 operand immI16()
3087 %{
3088   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3089   match(ConI);
3090 
3091   op_cost(10);
3092   format %{ %}
3093   interface(CONST_INTER);
3094 %}
3095 
3096 // Constant for long shifts
3097 operand immI_32()
3098 %{
3099   predicate( n->get_int() == 32 );
3100   match(ConI);
3101 
3102   op_cost(0);
3103   format %{ %}
3104   interface(CONST_INTER);
3105 %}
3106 
3107 // Constant for long shifts
3108 operand immI_64()
3109 %{
3110   predicate( n->get_int() == 64 );
3111   match(ConI);
3112 
3113   op_cost(0);
3114   format %{ %}
3115   interface(CONST_INTER);
3116 %}
3117 
3118 // Pointer Immediate
3119 operand immP()
3120 %{
3121   match(ConP);
3122 
3123   op_cost(10);
3124   format %{ %}
3125   interface(CONST_INTER);
3126 %}
3127 
3128 // NULL Pointer Immediate
3129 operand immP0()
3130 %{
3131   predicate(n->get_ptr() == 0);
3132   match(ConP);
3133 
3134   op_cost(5);
3135   format %{ %}
3136   interface(CONST_INTER);
3137 %}
3138 
3139 // Pointer Immediate
3140 operand immN() %{
3141   match(ConN);
3142 
3143   op_cost(10);
3144   format %{ %}
3145   interface(CONST_INTER);
3146 %}
3147 
3148 operand immNKlass() %{
3149   match(ConNKlass);
3150 
3151   op_cost(10);
3152   format %{ %}
3153   interface(CONST_INTER);
3154 %}
3155 
3156 // NULL Pointer Immediate
3157 operand immN0() %{
3158   predicate(n->get_narrowcon() == 0);
3159   match(ConN);
3160 
3161   op_cost(5);
3162   format %{ %}
3163   interface(CONST_INTER);
3164 %}
3165 
3166 operand immP31()
3167 %{
3168   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3169             && (n->get_ptr() >> 31) == 0);
3170   match(ConP);
3171 
3172   op_cost(5);
3173   format %{ %}
3174   interface(CONST_INTER);
3175 %}
3176 
3177 
3178 // Long Immediate
3179 operand immL()
3180 %{
3181   match(ConL);
3182 
3183   op_cost(20);
3184   format %{ %}
3185   interface(CONST_INTER);
3186 %}
3187 
3188 // Long Immediate 8-bit
3189 operand immL8()
3190 %{
3191   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3192   match(ConL);
3193 
3194   op_cost(5);
3195   format %{ %}
3196   interface(CONST_INTER);
3197 %}
3198 
3199 // Long Immediate 32-bit unsigned
3200 operand immUL32()
3201 %{
3202   predicate(n->get_long() == (unsigned int) (n->get_long()));
3203   match(ConL);
3204 
3205   op_cost(10);
3206   format %{ %}
3207   interface(CONST_INTER);
3208 %}
3209 
3210 // Long Immediate 32-bit signed
3211 operand immL32()
3212 %{
3213   predicate(n->get_long() == (int) (n->get_long()));
3214   match(ConL);
3215 
3216   op_cost(15);
3217   format %{ %}
3218   interface(CONST_INTER);
3219 %}
3220 
3221 // Long Immediate zero
3222 operand immL0()
3223 %{
3224   predicate(n->get_long() == 0L);
3225   match(ConL);
3226 
3227   op_cost(10);
3228   format %{ %}
3229   interface(CONST_INTER);
3230 %}
3231 
3232 // Constant for increment
3233 operand immL1()
3234 %{
3235   predicate(n->get_long() == 1);
3236   match(ConL);
3237 
3238   format %{ %}
3239   interface(CONST_INTER);
3240 %}
3241 
3242 // Constant for decrement
3243 operand immL_M1()
3244 %{
3245   predicate(n->get_long() == -1);
3246   match(ConL);
3247 
3248   format %{ %}
3249   interface(CONST_INTER);
3250 %}
3251 
3252 // Long Immediate: the value 10
3253 operand immL10()
3254 %{
3255   predicate(n->get_long() == 10);
3256   match(ConL);
3257 
3258   format %{ %}
3259   interface(CONST_INTER);
3260 %}
3261 
3262 // Long immediate from 0 to 127.
3263 // Used for a shorter form of long mul by 10.
3264 operand immL_127()
3265 %{
3266   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3267   match(ConL);
3268 
3269   op_cost(10);
3270   format %{ %}
3271   interface(CONST_INTER);
3272 %}
3273 
3274 // Long Immediate: low 32-bit mask
3275 operand immL_32bits()
3276 %{
3277   predicate(n->get_long() == 0xFFFFFFFFL);
3278   match(ConL);
3279   op_cost(20);
3280 
3281   format %{ %}
3282   interface(CONST_INTER);
3283 %}
3284 
3285 // Float Immediate zero
3286 operand immF0()
3287 %{
3288   predicate(jint_cast(n->getf()) == 0);
3289   match(ConF);
3290 
3291   op_cost(5);
3292   format %{ %}
3293   interface(CONST_INTER);
3294 %}
3295 
3296 // Float Immediate
3297 operand immF()
3298 %{
3299   match(ConF);
3300 
3301   op_cost(15);
3302   format %{ %}
3303   interface(CONST_INTER);
3304 %}
3305 
3306 // Double Immediate zero
3307 operand immD0()
3308 %{
3309   predicate(jlong_cast(n->getd()) == 0);
3310   match(ConD);
3311 
3312   op_cost(5);
3313   format %{ %}
3314   interface(CONST_INTER);
3315 %}
3316 
3317 // Double Immediate
3318 operand immD()
3319 %{
3320   match(ConD);
3321 
3322   op_cost(15);
3323   format %{ %}
3324   interface(CONST_INTER);
3325 %}
3326 
3327 // Immediates for special shifts (sign extend)
3328 
3329 // Constants for increment
3330 operand immI_16()
3331 %{
3332   predicate(n->get_int() == 16);
3333   match(ConI);
3334 
3335   format %{ %}
3336   interface(CONST_INTER);
3337 %}
3338 
3339 operand immI_24()
3340 %{
3341   predicate(n->get_int() == 24);
3342   match(ConI);
3343 
3344   format %{ %}
3345   interface(CONST_INTER);
3346 %}
3347 
3348 // Constant for byte-wide masking
3349 operand immI_255()
3350 %{
3351   predicate(n->get_int() == 255);
3352   match(ConI);
3353 
3354   format %{ %}
3355   interface(CONST_INTER);
3356 %}
3357 
3358 // Constant for short-wide masking
3359 operand immI_65535()
3360 %{
3361   predicate(n->get_int() == 65535);
3362   match(ConI);
3363 
3364   format %{ %}
3365   interface(CONST_INTER);
3366 %}
3367 
3368 // Constant for byte-wide masking
3369 operand immL_255()
3370 %{
3371   predicate(n->get_long() == 255);
3372   match(ConL);
3373 
3374   format %{ %}
3375   interface(CONST_INTER);
3376 %}
3377 
3378 // Constant for short-wide masking
3379 operand immL_65535()
3380 %{
3381   predicate(n->get_long() == 65535);
3382   match(ConL);
3383 
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 // Register Operands
3389 // Integer Register
3390 operand rRegI()
3391 %{
3392   constraint(ALLOC_IN_RC(int_reg));
3393   match(RegI);
3394 
3395   match(rax_RegI);
3396   match(rbx_RegI);
3397   match(rcx_RegI);
3398   match(rdx_RegI);
3399   match(rdi_RegI);
3400 
3401   format %{ %}
3402   interface(REG_INTER);
3403 %}
3404 
3405 // Special Registers
3406 operand rax_RegI()
3407 %{
3408   constraint(ALLOC_IN_RC(int_rax_reg));
3409   match(RegI);
3410   match(rRegI);
3411 
3412   format %{ "RAX" %}
3413   interface(REG_INTER);
3414 %}
3415 
3416 // Special Registers
3417 operand rbx_RegI()
3418 %{
3419   constraint(ALLOC_IN_RC(int_rbx_reg));
3420   match(RegI);
3421   match(rRegI);
3422 
3423   format %{ "RBX" %}
3424   interface(REG_INTER);
3425 %}
3426 
3427 operand rcx_RegI()
3428 %{
3429   constraint(ALLOC_IN_RC(int_rcx_reg));
3430   match(RegI);
3431   match(rRegI);
3432 
3433   format %{ "RCX" %}
3434   interface(REG_INTER);
3435 %}
3436 
3437 operand rdx_RegI()
3438 %{
3439   constraint(ALLOC_IN_RC(int_rdx_reg));
3440   match(RegI);
3441   match(rRegI);
3442 
3443   format %{ "RDX" %}
3444   interface(REG_INTER);
3445 %}
3446 
3447 operand rdi_RegI()
3448 %{
3449   constraint(ALLOC_IN_RC(int_rdi_reg));
3450   match(RegI);
3451   match(rRegI);
3452 
3453   format %{ "RDI" %}
3454   interface(REG_INTER);
3455 %}
3456 
3457 operand no_rcx_RegI()
3458 %{
3459   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3460   match(RegI);
3461   match(rax_RegI);
3462   match(rbx_RegI);
3463   match(rdx_RegI);
3464   match(rdi_RegI);
3465 
3466   format %{ %}
3467   interface(REG_INTER);
3468 %}
3469 
3470 operand no_rax_rdx_RegI()
3471 %{
3472   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3473   match(RegI);
3474   match(rbx_RegI);
3475   match(rcx_RegI);
3476   match(rdi_RegI);
3477 
3478   format %{ %}
3479   interface(REG_INTER);
3480 %}
3481 
3482 // Pointer Register
3483 operand any_RegP()
3484 %{
3485   constraint(ALLOC_IN_RC(any_reg));
3486   match(RegP);
3487   match(rax_RegP);
3488   match(rbx_RegP);
3489   match(rdi_RegP);
3490   match(rsi_RegP);
3491   match(rbp_RegP);
3492   match(r15_RegP);
3493   match(rRegP);
3494 
3495   format %{ %}
3496   interface(REG_INTER);
3497 %}
3498 
3499 operand rRegP()
3500 %{
3501   constraint(ALLOC_IN_RC(ptr_reg));
3502   match(RegP);
3503   match(rax_RegP);
3504   match(rbx_RegP);
3505   match(rdi_RegP);
3506   match(rsi_RegP);
3507   match(rbp_RegP);
3508   match(r15_RegP);  // See Q&A below about r15_RegP.
3509 
3510   format %{ %}
3511   interface(REG_INTER);
3512 %}
3513 
3514 operand rRegN() %{
3515   constraint(ALLOC_IN_RC(int_reg));
3516   match(RegN);
3517 
3518   format %{ %}
3519   interface(REG_INTER);
3520 %}
3521 
3522 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3523 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3524 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3525 // The output of an instruction is controlled by the allocator, which respects
3526 // register class masks, not match rules.  Unless an instruction mentions
3527 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3528 // by the allocator as an input.
3529 
3530 operand no_rax_RegP()
3531 %{
3532   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3533   match(RegP);
3534   match(rbx_RegP);
3535   match(rsi_RegP);
3536   match(rdi_RegP);
3537 
3538   format %{ %}
3539   interface(REG_INTER);
3540 %}
3541 
3542 operand no_rbp_RegP()
3543 %{
3544   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3545   match(RegP);
3546   match(rbx_RegP);
3547   match(rsi_RegP);
3548   match(rdi_RegP);
3549 
3550   format %{ %}
3551   interface(REG_INTER);
3552 %}
3553 
3554 operand no_rax_rbx_RegP()
3555 %{
3556   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3557   match(RegP);
3558   match(rsi_RegP);
3559   match(rdi_RegP);
3560 
3561   format %{ %}
3562   interface(REG_INTER);
3563 %}
3564 
3565 // Special Registers
3566 // Return a pointer value
3567 operand rax_RegP()
3568 %{
3569   constraint(ALLOC_IN_RC(ptr_rax_reg));
3570   match(RegP);
3571   match(rRegP);
3572 
3573   format %{ %}
3574   interface(REG_INTER);
3575 %}
3576 
3577 // Special Registers
3578 // Return a compressed pointer value
3579 operand rax_RegN()
3580 %{
3581   constraint(ALLOC_IN_RC(int_rax_reg));
3582   match(RegN);
3583   match(rRegN);
3584 
3585   format %{ %}
3586   interface(REG_INTER);
3587 %}
3588 
3589 // Used in AtomicAdd
3590 operand rbx_RegP()
3591 %{
3592   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3593   match(RegP);
3594   match(rRegP);
3595 
3596   format %{ %}
3597   interface(REG_INTER);
3598 %}
3599 
3600 operand rsi_RegP()
3601 %{
3602   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3603   match(RegP);
3604   match(rRegP);
3605 
3606   format %{ %}
3607   interface(REG_INTER);
3608 %}
3609 
3610 // Used in rep stosq
3611 operand rdi_RegP()
3612 %{
3613   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3614   match(RegP);
3615   match(rRegP);
3616 
3617   format %{ %}
3618   interface(REG_INTER);
3619 %}
3620 
3621 operand rbp_RegP()
3622 %{
3623   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3624   match(RegP);
3625   match(rRegP);
3626 
3627   format %{ %}
3628   interface(REG_INTER);
3629 %}
3630 
3631 operand r15_RegP()
3632 %{
3633   constraint(ALLOC_IN_RC(ptr_r15_reg));
3634   match(RegP);
3635   match(rRegP);
3636 
3637   format %{ %}
3638   interface(REG_INTER);
3639 %}
3640 
3641 operand rRegL()
3642 %{
3643   constraint(ALLOC_IN_RC(long_reg));
3644   match(RegL);
3645   match(rax_RegL);
3646   match(rdx_RegL);
3647 
3648   format %{ %}
3649   interface(REG_INTER);
3650 %}
3651 
3652 // Special Registers
3653 operand no_rax_rdx_RegL()
3654 %{
3655   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3656   match(RegL);
3657   match(rRegL);
3658 
3659   format %{ %}
3660   interface(REG_INTER);
3661 %}
3662 
3663 operand no_rax_RegL()
3664 %{
3665   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3666   match(RegL);
3667   match(rRegL);
3668   match(rdx_RegL);
3669 
3670   format %{ %}
3671   interface(REG_INTER);
3672 %}
3673 
3674 operand no_rcx_RegL()
3675 %{
3676   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3677   match(RegL);
3678   match(rRegL);
3679 
3680   format %{ %}
3681   interface(REG_INTER);
3682 %}
3683 
3684 operand rax_RegL()
3685 %{
3686   constraint(ALLOC_IN_RC(long_rax_reg));
3687   match(RegL);
3688   match(rRegL);
3689 
3690   format %{ "RAX" %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 operand rcx_RegL()
3695 %{
3696   constraint(ALLOC_IN_RC(long_rcx_reg));
3697   match(RegL);
3698   match(rRegL);
3699 
3700   format %{ %}
3701   interface(REG_INTER);
3702 %}
3703 
3704 operand rdx_RegL()
3705 %{
3706   constraint(ALLOC_IN_RC(long_rdx_reg));
3707   match(RegL);
3708   match(rRegL);
3709 
3710   format %{ %}
3711   interface(REG_INTER);
3712 %}
3713 
3714 // Flags register, used as output of compare instructions
3715 operand rFlagsReg()
3716 %{
3717   constraint(ALLOC_IN_RC(int_flags));
3718   match(RegFlags);
3719 
3720   format %{ "RFLAGS" %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Flags register, used as output of FLOATING POINT compare instructions
3725 operand rFlagsRegU()
3726 %{
3727   constraint(ALLOC_IN_RC(int_flags));
3728   match(RegFlags);
3729 
3730   format %{ "RFLAGS_U" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand rFlagsRegUCF() %{
3735   constraint(ALLOC_IN_RC(int_flags));
3736   match(RegFlags);
3737   predicate(false);
3738 
3739   format %{ "RFLAGS_U_CF" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 // Float register operands
3744 operand regF()
3745 %{
3746   constraint(ALLOC_IN_RC(float_reg));
3747   match(RegF);
3748 
3749   format %{ %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 // Double register operands
3754 operand regD()
3755 %{
3756   constraint(ALLOC_IN_RC(double_reg));
3757   match(RegD);
3758 
3759   format %{ %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 //----------Memory Operands----------------------------------------------------
3764 // Direct Memory Operand
3765 // operand direct(immP addr)
3766 // %{
3767 //   match(addr);
3768 
3769 //   format %{ "[$addr]" %}
3770 //   interface(MEMORY_INTER) %{
3771 //     base(0xFFFFFFFF);
3772 //     index(0x4);
3773 //     scale(0x0);
3774 //     disp($addr);
3775 //   %}
3776 // %}
3777 
3778 // Indirect Memory Operand
3779 operand indirect(any_RegP reg)
3780 %{
3781   constraint(ALLOC_IN_RC(ptr_reg));
3782   match(reg);
3783 
3784   format %{ "[$reg]" %}
3785   interface(MEMORY_INTER) %{
3786     base($reg);
3787     index(0x4);
3788     scale(0x0);
3789     disp(0x0);
3790   %}
3791 %}
3792 
3793 // Indirect Memory Plus Short Offset Operand
3794 operand indOffset8(any_RegP reg, immL8 off)
3795 %{
3796   constraint(ALLOC_IN_RC(ptr_reg));
3797   match(AddP reg off);
3798 
3799   format %{ "[$reg + $off (8-bit)]" %}
3800   interface(MEMORY_INTER) %{
3801     base($reg);
3802     index(0x4);
3803     scale(0x0);
3804     disp($off);
3805   %}
3806 %}
3807 
3808 // Indirect Memory Plus Long Offset Operand
3809 operand indOffset32(any_RegP reg, immL32 off)
3810 %{
3811   constraint(ALLOC_IN_RC(ptr_reg));
3812   match(AddP reg off);
3813 
3814   format %{ "[$reg + $off (32-bit)]" %}
3815   interface(MEMORY_INTER) %{
3816     base($reg);
3817     index(0x4);
3818     scale(0x0);
3819     disp($off);
3820   %}
3821 %}
3822 
3823 // Indirect Memory Plus Index Register Plus Offset Operand
3824 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3825 %{
3826   constraint(ALLOC_IN_RC(ptr_reg));
3827   match(AddP (AddP reg lreg) off);
3828 
3829   op_cost(10);
3830   format %{"[$reg + $off + $lreg]" %}
3831   interface(MEMORY_INTER) %{
3832     base($reg);
3833     index($lreg);
3834     scale(0x0);
3835     disp($off);
3836   %}
3837 %}
3838 
3839 // Indirect Memory Plus Index Register Plus Offset Operand
3840 operand indIndex(any_RegP reg, rRegL lreg)
3841 %{
3842   constraint(ALLOC_IN_RC(ptr_reg));
3843   match(AddP reg lreg);
3844 
3845   op_cost(10);
3846   format %{"[$reg + $lreg]" %}
3847   interface(MEMORY_INTER) %{
3848     base($reg);
3849     index($lreg);
3850     scale(0x0);
3851     disp(0x0);
3852   %}
3853 %}
3854 
3855 // Indirect Memory Times Scale Plus Index Register
3856 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3857 %{
3858   constraint(ALLOC_IN_RC(ptr_reg));
3859   match(AddP reg (LShiftL lreg scale));
3860 
3861   op_cost(10);
3862   format %{"[$reg + $lreg << $scale]" %}
3863   interface(MEMORY_INTER) %{
3864     base($reg);
3865     index($lreg);
3866     scale($scale);
3867     disp(0x0);
3868   %}
3869 %}
3870 
3871 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3872 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3873 %{
3874   constraint(ALLOC_IN_RC(ptr_reg));
3875   match(AddP (AddP reg (LShiftL lreg scale)) off);
3876 
3877   op_cost(10);
3878   format %{"[$reg + $off + $lreg << $scale]" %}
3879   interface(MEMORY_INTER) %{
3880     base($reg);
3881     index($lreg);
3882     scale($scale);
3883     disp($off);
3884   %}
3885 %}
3886 
3887 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3888 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3889 %{
3890   constraint(ALLOC_IN_RC(ptr_reg));
3891   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3892   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3893 
3894   op_cost(10);
3895   format %{"[$reg + $off + $idx << $scale]" %}
3896   interface(MEMORY_INTER) %{
3897     base($reg);
3898     index($idx);
3899     scale($scale);
3900     disp($off);
3901   %}
3902 %}
3903 
3904 // Indirect Narrow Oop Plus Offset Operand
3905 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3906 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3907 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3908   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3909   constraint(ALLOC_IN_RC(ptr_reg));
3910   match(AddP (DecodeN reg) off);
3911 
3912   op_cost(10);
3913   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3914   interface(MEMORY_INTER) %{
3915     base(0xc); // R12
3916     index($reg);
3917     scale(0x3);
3918     disp($off);
3919   %}
3920 %}
3921 
3922 // Indirect Memory Operand
3923 operand indirectNarrow(rRegN reg)
3924 %{
3925   predicate(Universe::narrow_oop_shift() == 0);
3926   constraint(ALLOC_IN_RC(ptr_reg));
3927   match(DecodeN reg);
3928 
3929   format %{ "[$reg]" %}
3930   interface(MEMORY_INTER) %{
3931     base($reg);
3932     index(0x4);
3933     scale(0x0);
3934     disp(0x0);
3935   %}
3936 %}
3937 
3938 // Indirect Memory Plus Short Offset Operand
3939 operand indOffset8Narrow(rRegN reg, immL8 off)
3940 %{
3941   predicate(Universe::narrow_oop_shift() == 0);
3942   constraint(ALLOC_IN_RC(ptr_reg));
3943   match(AddP (DecodeN reg) off);
3944 
3945   format %{ "[$reg + $off (8-bit)]" %}
3946   interface(MEMORY_INTER) %{
3947     base($reg);
3948     index(0x4);
3949     scale(0x0);
3950     disp($off);
3951   %}
3952 %}
3953 
3954 // Indirect Memory Plus Long Offset Operand
3955 operand indOffset32Narrow(rRegN reg, immL32 off)
3956 %{
3957   predicate(Universe::narrow_oop_shift() == 0);
3958   constraint(ALLOC_IN_RC(ptr_reg));
3959   match(AddP (DecodeN reg) off);
3960 
3961   format %{ "[$reg + $off (32-bit)]" %}
3962   interface(MEMORY_INTER) %{
3963     base($reg);
3964     index(0x4);
3965     scale(0x0);
3966     disp($off);
3967   %}
3968 %}
3969 
3970 // Indirect Memory Plus Index Register Plus Offset Operand
3971 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3972 %{
3973   predicate(Universe::narrow_oop_shift() == 0);
3974   constraint(ALLOC_IN_RC(ptr_reg));
3975   match(AddP (AddP (DecodeN reg) lreg) off);
3976 
3977   op_cost(10);
3978   format %{"[$reg + $off + $lreg]" %}
3979   interface(MEMORY_INTER) %{
3980     base($reg);
3981     index($lreg);
3982     scale(0x0);
3983     disp($off);
3984   %}
3985 %}
3986 
3987 // Indirect Memory Plus Index Register Plus Offset Operand
3988 operand indIndexNarrow(rRegN reg, rRegL lreg)
3989 %{
3990   predicate(Universe::narrow_oop_shift() == 0);
3991   constraint(ALLOC_IN_RC(ptr_reg));
3992   match(AddP (DecodeN reg) lreg);
3993 
3994   op_cost(10);
3995   format %{"[$reg + $lreg]" %}
3996   interface(MEMORY_INTER) %{
3997     base($reg);
3998     index($lreg);
3999     scale(0x0);
4000     disp(0x0);
4001   %}
4002 %}
4003 
4004 // Indirect Memory Times Scale Plus Index Register
4005 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4006 %{
4007   predicate(Universe::narrow_oop_shift() == 0);
4008   constraint(ALLOC_IN_RC(ptr_reg));
4009   match(AddP (DecodeN reg) (LShiftL lreg scale));
4010 
4011   op_cost(10);
4012   format %{"[$reg + $lreg << $scale]" %}
4013   interface(MEMORY_INTER) %{
4014     base($reg);
4015     index($lreg);
4016     scale($scale);
4017     disp(0x0);
4018   %}
4019 %}
4020 
4021 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4022 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4023 %{
4024   predicate(Universe::narrow_oop_shift() == 0);
4025   constraint(ALLOC_IN_RC(ptr_reg));
4026   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4027 
4028   op_cost(10);
4029   format %{"[$reg + $off + $lreg << $scale]" %}
4030   interface(MEMORY_INTER) %{
4031     base($reg);
4032     index($lreg);
4033     scale($scale);
4034     disp($off);
4035   %}
4036 %}
4037 
4038 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4039 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4040 %{
4041   constraint(ALLOC_IN_RC(ptr_reg));
4042   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4043   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4044 
4045   op_cost(10);
4046   format %{"[$reg + $off + $idx << $scale]" %}
4047   interface(MEMORY_INTER) %{
4048     base($reg);
4049     index($idx);
4050     scale($scale);
4051     disp($off);
4052   %}
4053 %}
4054 
4055 operand indirectNarrowKlass(rRegN reg)
4056 %{
4057   predicate(Universe::narrow_klass_shift() == 0);
4058   constraint(ALLOC_IN_RC(ptr_reg));
4059   match(DecodeNKlass reg);
4060 
4061   format %{ "[$reg]" %}
4062   interface(MEMORY_INTER) %{
4063     base($reg);
4064     index(0x4);
4065     scale(0x0);
4066     disp(0x0);
4067   %}
4068 %}
4069 
4070 operand indOffset8NarrowKlass(rRegN reg, immL8 off)
4071 %{
4072   predicate(Universe::narrow_klass_shift() == 0);
4073   constraint(ALLOC_IN_RC(ptr_reg));
4074   match(AddP (DecodeNKlass reg) off);
4075 
4076   format %{ "[$reg + $off (8-bit)]" %}
4077   interface(MEMORY_INTER) %{
4078     base($reg);
4079     index(0x4);
4080     scale(0x0);
4081     disp($off);
4082   %}
4083 %}
4084 
4085 operand indOffset32NarrowKlass(rRegN reg, immL32 off)
4086 %{
4087   predicate(Universe::narrow_klass_shift() == 0);
4088   constraint(ALLOC_IN_RC(ptr_reg));
4089   match(AddP (DecodeNKlass reg) off);
4090 
4091   format %{ "[$reg + $off (32-bit)]" %}
4092   interface(MEMORY_INTER) %{
4093     base($reg);
4094     index(0x4);
4095     scale(0x0);
4096     disp($off);
4097   %}
4098 %}
4099 
4100 operand indIndexOffsetNarrowKlass(rRegN reg, rRegL lreg, immL32 off)
4101 %{
4102   predicate(Universe::narrow_klass_shift() == 0);
4103   constraint(ALLOC_IN_RC(ptr_reg));
4104   match(AddP (AddP (DecodeNKlass reg) lreg) off);
4105 
4106   op_cost(10);
4107   format %{"[$reg + $off + $lreg]" %}
4108   interface(MEMORY_INTER) %{
4109     base($reg);
4110     index($lreg);
4111     scale(0x0);
4112     disp($off);
4113   %}
4114 %}
4115 
4116 operand indIndexNarrowKlass(rRegN reg, rRegL lreg)
4117 %{
4118   predicate(Universe::narrow_klass_shift() == 0);
4119   constraint(ALLOC_IN_RC(ptr_reg));
4120   match(AddP (DecodeNKlass reg) lreg);
4121 
4122   op_cost(10);
4123   format %{"[$reg + $lreg]" %}
4124   interface(MEMORY_INTER) %{
4125     base($reg);
4126     index($lreg);
4127     scale(0x0);
4128     disp(0x0);
4129   %}
4130 %}
4131 
4132 operand indIndexScaleNarrowKlass(rRegN reg, rRegL lreg, immI2 scale)
4133 %{
4134   predicate(Universe::narrow_klass_shift() == 0);
4135   constraint(ALLOC_IN_RC(ptr_reg));
4136   match(AddP (DecodeNKlass reg) (LShiftL lreg scale));
4137 
4138   op_cost(10);
4139   format %{"[$reg + $lreg << $scale]" %}
4140   interface(MEMORY_INTER) %{
4141     base($reg);
4142     index($lreg);
4143     scale($scale);
4144     disp(0x0);
4145   %}
4146 %}
4147 
4148 operand indIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4149 %{
4150   predicate(Universe::narrow_klass_shift() == 0);
4151   constraint(ALLOC_IN_RC(ptr_reg));
4152   match(AddP (AddP (DecodeNKlass reg) (LShiftL lreg scale)) off);
4153 
4154   op_cost(10);
4155   format %{"[$reg + $off + $lreg << $scale]" %}
4156   interface(MEMORY_INTER) %{
4157     base($reg);
4158     index($lreg);
4159     scale($scale);
4160     disp($off);
4161   %}
4162 %}
4163 
4164 operand indCompressedKlassOffset(rRegN reg, immL32 off) %{
4165   predicate(UseCompressedKlassPointers && (Universe::narrow_klass_shift() == Address::times_8));
4166   constraint(ALLOC_IN_RC(ptr_reg));
4167   match(AddP (DecodeNKlass reg) off);
4168 
4169   op_cost(10);
4170   format %{"[R12 + $reg << 3 + $off] (compressed klass addressing)" %}
4171   interface(MEMORY_INTER) %{
4172     base(0xc); // R12
4173     index($reg);
4174     scale(0x3);
4175     disp($off);
4176   %}
4177 %}
4178 
4179 operand indPosIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4180 %{
4181   constraint(ALLOC_IN_RC(ptr_reg));
4182   predicate(Universe::narrow_klass_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4183   match(AddP (AddP (DecodeNKlass reg) (LShiftL (ConvI2L idx) scale)) off);
4184 
4185   op_cost(10);
4186   format %{"[$reg + $off + $idx << $scale]" %}
4187   interface(MEMORY_INTER) %{
4188     base($reg);
4189     index($idx);
4190     scale($scale);
4191     disp($off);
4192   %}
4193 %}
4194 
4195 //----------Special Memory Operands--------------------------------------------
4196 // Stack Slot Operand - This operand is used for loading and storing temporary
4197 //                      values on the stack where a match requires a value to
4198 //                      flow through memory.
4199 operand stackSlotP(sRegP reg)
4200 %{
4201   constraint(ALLOC_IN_RC(stack_slots));
4202   // No match rule because this operand is only generated in matching
4203 
4204   format %{ "[$reg]" %}
4205   interface(MEMORY_INTER) %{
4206     base(0x4);   // RSP
4207     index(0x4);  // No Index
4208     scale(0x0);  // No Scale
4209     disp($reg);  // Stack Offset
4210   %}
4211 %}
4212 
4213 operand stackSlotI(sRegI reg)
4214 %{
4215   constraint(ALLOC_IN_RC(stack_slots));
4216   // No match rule because this operand is only generated in matching
4217 
4218   format %{ "[$reg]" %}
4219   interface(MEMORY_INTER) %{
4220     base(0x4);   // RSP
4221     index(0x4);  // No Index
4222     scale(0x0);  // No Scale
4223     disp($reg);  // Stack Offset
4224   %}
4225 %}
4226 
4227 operand stackSlotF(sRegF reg)
4228 %{
4229   constraint(ALLOC_IN_RC(stack_slots));
4230   // No match rule because this operand is only generated in matching
4231 
4232   format %{ "[$reg]" %}
4233   interface(MEMORY_INTER) %{
4234     base(0x4);   // RSP
4235     index(0x4);  // No Index
4236     scale(0x0);  // No Scale
4237     disp($reg);  // Stack Offset
4238   %}
4239 %}
4240 
4241 operand stackSlotD(sRegD reg)
4242 %{
4243   constraint(ALLOC_IN_RC(stack_slots));
4244   // No match rule because this operand is only generated in matching
4245 
4246   format %{ "[$reg]" %}
4247   interface(MEMORY_INTER) %{
4248     base(0x4);   // RSP
4249     index(0x4);  // No Index
4250     scale(0x0);  // No Scale
4251     disp($reg);  // Stack Offset
4252   %}
4253 %}
4254 operand stackSlotL(sRegL reg)
4255 %{
4256   constraint(ALLOC_IN_RC(stack_slots));
4257   // No match rule because this operand is only generated in matching
4258 
4259   format %{ "[$reg]" %}
4260   interface(MEMORY_INTER) %{
4261     base(0x4);   // RSP
4262     index(0x4);  // No Index
4263     scale(0x0);  // No Scale
4264     disp($reg);  // Stack Offset
4265   %}
4266 %}
4267 
4268 //----------Conditional Branch Operands----------------------------------------
4269 // Comparison Op  - This is the operation of the comparison, and is limited to
4270 //                  the following set of codes:
4271 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4272 //
4273 // Other attributes of the comparison, such as unsignedness, are specified
4274 // by the comparison instruction that sets a condition code flags register.
4275 // That result is represented by a flags operand whose subtype is appropriate
4276 // to the unsignedness (etc.) of the comparison.
4277 //
4278 // Later, the instruction which matches both the Comparison Op (a Bool) and
4279 // the flags (produced by the Cmp) specifies the coding of the comparison op
4280 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4281 
4282 // Comparision Code
4283 operand cmpOp()
4284 %{
4285   match(Bool);
4286 
4287   format %{ "" %}
4288   interface(COND_INTER) %{
4289     equal(0x4, "e");
4290     not_equal(0x5, "ne");
4291     less(0xC, "l");
4292     greater_equal(0xD, "ge");
4293     less_equal(0xE, "le");
4294     greater(0xF, "g");
4295   %}
4296 %}
4297 
4298 // Comparison Code, unsigned compare.  Used by FP also, with
4299 // C2 (unordered) turned into GT or LT already.  The other bits
4300 // C0 and C3 are turned into Carry & Zero flags.
4301 operand cmpOpU()
4302 %{
4303   match(Bool);
4304 
4305   format %{ "" %}
4306   interface(COND_INTER) %{
4307     equal(0x4, "e");
4308     not_equal(0x5, "ne");
4309     less(0x2, "b");
4310     greater_equal(0x3, "nb");
4311     less_equal(0x6, "be");
4312     greater(0x7, "nbe");
4313   %}
4314 %}
4315 
4316 
4317 // Floating comparisons that don't require any fixup for the unordered case
4318 operand cmpOpUCF() %{
4319   match(Bool);
4320   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4321             n->as_Bool()->_test._test == BoolTest::ge ||
4322             n->as_Bool()->_test._test == BoolTest::le ||
4323             n->as_Bool()->_test._test == BoolTest::gt);
4324   format %{ "" %}
4325   interface(COND_INTER) %{
4326     equal(0x4, "e");
4327     not_equal(0x5, "ne");
4328     less(0x2, "b");
4329     greater_equal(0x3, "nb");
4330     less_equal(0x6, "be");
4331     greater(0x7, "nbe");
4332   %}
4333 %}
4334 
4335 
4336 // Floating comparisons that can be fixed up with extra conditional jumps
4337 operand cmpOpUCF2() %{
4338   match(Bool);
4339   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4340             n->as_Bool()->_test._test == BoolTest::eq);
4341   format %{ "" %}
4342   interface(COND_INTER) %{
4343     equal(0x4, "e");
4344     not_equal(0x5, "ne");
4345     less(0x2, "b");
4346     greater_equal(0x3, "nb");
4347     less_equal(0x6, "be");
4348     greater(0x7, "nbe");
4349   %}
4350 %}
4351 
4352 
4353 //----------OPERAND CLASSES----------------------------------------------------
4354 // Operand Classes are groups of operands that are used as to simplify
4355 // instruction definitions by not requiring the AD writer to specify separate
4356 // instructions for every form of operand when the instruction accepts
4357 // multiple operand types with the same basic encoding and format.  The classic
4358 // case of this is memory operands.
4359 
4360 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4361                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4362                indCompressedOopOffset,
4363                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4364                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4365                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow,
4366                indCompressedKlassOffset,
4367                indirectNarrowKlass, indOffset8NarrowKlass, indOffset32NarrowKlass,
4368                indIndexOffsetNarrowKlass, indIndexNarrowKlass, indIndexScaleNarrowKlass,
4369                indIndexScaleOffsetNarrowKlass, indPosIndexScaleOffsetNarrowKlass);
4370 
4371 //----------PIPELINE-----------------------------------------------------------
4372 // Rules which define the behavior of the target architectures pipeline.
4373 pipeline %{
4374 
4375 //----------ATTRIBUTES---------------------------------------------------------
4376 attributes %{
4377   variable_size_instructions;        // Fixed size instructions
4378   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4379   instruction_unit_size = 1;         // An instruction is 1 bytes long
4380   instruction_fetch_unit_size = 16;  // The processor fetches one line
4381   instruction_fetch_units = 1;       // of 16 bytes
4382 
4383   // List of nop instructions
4384   nops( MachNop );
4385 %}
4386 
4387 //----------RESOURCES----------------------------------------------------------
4388 // Resources are the functional units available to the machine
4389 
4390 // Generic P2/P3 pipeline
4391 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4392 // 3 instructions decoded per cycle.
4393 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4394 // 3 ALU op, only ALU0 handles mul instructions.
4395 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4396            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4397            BR, FPU,
4398            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4399 
4400 //----------PIPELINE DESCRIPTION-----------------------------------------------
4401 // Pipeline Description specifies the stages in the machine's pipeline
4402 
4403 // Generic P2/P3 pipeline
4404 pipe_desc(S0, S1, S2, S3, S4, S5);
4405 
4406 //----------PIPELINE CLASSES---------------------------------------------------
4407 // Pipeline Classes describe the stages in which input and output are
4408 // referenced by the hardware pipeline.
4409 
4410 // Naming convention: ialu or fpu
4411 // Then: _reg
4412 // Then: _reg if there is a 2nd register
4413 // Then: _long if it's a pair of instructions implementing a long
4414 // Then: _fat if it requires the big decoder
4415 //   Or: _mem if it requires the big decoder and a memory unit.
4416 
4417 // Integer ALU reg operation
4418 pipe_class ialu_reg(rRegI dst)
4419 %{
4420     single_instruction;
4421     dst    : S4(write);
4422     dst    : S3(read);
4423     DECODE : S0;        // any decoder
4424     ALU    : S3;        // any alu
4425 %}
4426 
4427 // Long ALU reg operation
4428 pipe_class ialu_reg_long(rRegL dst)
4429 %{
4430     instruction_count(2);
4431     dst    : S4(write);
4432     dst    : S3(read);
4433     DECODE : S0(2);     // any 2 decoders
4434     ALU    : S3(2);     // both alus
4435 %}
4436 
4437 // Integer ALU reg operation using big decoder
4438 pipe_class ialu_reg_fat(rRegI dst)
4439 %{
4440     single_instruction;
4441     dst    : S4(write);
4442     dst    : S3(read);
4443     D0     : S0;        // big decoder only
4444     ALU    : S3;        // any alu
4445 %}
4446 
4447 // Long ALU reg operation using big decoder
4448 pipe_class ialu_reg_long_fat(rRegL dst)
4449 %{
4450     instruction_count(2);
4451     dst    : S4(write);
4452     dst    : S3(read);
4453     D0     : S0(2);     // big decoder only; twice
4454     ALU    : S3(2);     // any 2 alus
4455 %}
4456 
4457 // Integer ALU reg-reg operation
4458 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4459 %{
4460     single_instruction;
4461     dst    : S4(write);
4462     src    : S3(read);
4463     DECODE : S0;        // any decoder
4464     ALU    : S3;        // any alu
4465 %}
4466 
4467 // Long ALU reg-reg operation
4468 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4469 %{
4470     instruction_count(2);
4471     dst    : S4(write);
4472     src    : S3(read);
4473     DECODE : S0(2);     // any 2 decoders
4474     ALU    : S3(2);     // both alus
4475 %}
4476 
4477 // Integer ALU reg-reg operation
4478 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4479 %{
4480     single_instruction;
4481     dst    : S4(write);
4482     src    : S3(read);
4483     D0     : S0;        // big decoder only
4484     ALU    : S3;        // any alu
4485 %}
4486 
4487 // Long ALU reg-reg operation
4488 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4489 %{
4490     instruction_count(2);
4491     dst    : S4(write);
4492     src    : S3(read);
4493     D0     : S0(2);     // big decoder only; twice
4494     ALU    : S3(2);     // both alus
4495 %}
4496 
4497 // Integer ALU reg-mem operation
4498 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4499 %{
4500     single_instruction;
4501     dst    : S5(write);
4502     mem    : S3(read);
4503     D0     : S0;        // big decoder only
4504     ALU    : S4;        // any alu
4505     MEM    : S3;        // any mem
4506 %}
4507 
4508 // Integer mem operation (prefetch)
4509 pipe_class ialu_mem(memory mem)
4510 %{
4511     single_instruction;
4512     mem    : S3(read);
4513     D0     : S0;        // big decoder only
4514     MEM    : S3;        // any mem
4515 %}
4516 
4517 // Integer Store to Memory
4518 pipe_class ialu_mem_reg(memory mem, rRegI src)
4519 %{
4520     single_instruction;
4521     mem    : S3(read);
4522     src    : S5(read);
4523     D0     : S0;        // big decoder only
4524     ALU    : S4;        // any alu
4525     MEM    : S3;
4526 %}
4527 
4528 // // Long Store to Memory
4529 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4530 // %{
4531 //     instruction_count(2);
4532 //     mem    : S3(read);
4533 //     src    : S5(read);
4534 //     D0     : S0(2);          // big decoder only; twice
4535 //     ALU    : S4(2);     // any 2 alus
4536 //     MEM    : S3(2);  // Both mems
4537 // %}
4538 
4539 // Integer Store to Memory
4540 pipe_class ialu_mem_imm(memory mem)
4541 %{
4542     single_instruction;
4543     mem    : S3(read);
4544     D0     : S0;        // big decoder only
4545     ALU    : S4;        // any alu
4546     MEM    : S3;
4547 %}
4548 
4549 // Integer ALU0 reg-reg operation
4550 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4551 %{
4552     single_instruction;
4553     dst    : S4(write);
4554     src    : S3(read);
4555     D0     : S0;        // Big decoder only
4556     ALU0   : S3;        // only alu0
4557 %}
4558 
4559 // Integer ALU0 reg-mem operation
4560 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4561 %{
4562     single_instruction;
4563     dst    : S5(write);
4564     mem    : S3(read);
4565     D0     : S0;        // big decoder only
4566     ALU0   : S4;        // ALU0 only
4567     MEM    : S3;        // any mem
4568 %}
4569 
4570 // Integer ALU reg-reg operation
4571 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4572 %{
4573     single_instruction;
4574     cr     : S4(write);
4575     src1   : S3(read);
4576     src2   : S3(read);
4577     DECODE : S0;        // any decoder
4578     ALU    : S3;        // any alu
4579 %}
4580 
4581 // Integer ALU reg-imm operation
4582 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4583 %{
4584     single_instruction;
4585     cr     : S4(write);
4586     src1   : S3(read);
4587     DECODE : S0;        // any decoder
4588     ALU    : S3;        // any alu
4589 %}
4590 
4591 // Integer ALU reg-mem operation
4592 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4593 %{
4594     single_instruction;
4595     cr     : S4(write);
4596     src1   : S3(read);
4597     src2   : S3(read);
4598     D0     : S0;        // big decoder only
4599     ALU    : S4;        // any alu
4600     MEM    : S3;
4601 %}
4602 
4603 // Conditional move reg-reg
4604 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4605 %{
4606     instruction_count(4);
4607     y      : S4(read);
4608     q      : S3(read);
4609     p      : S3(read);
4610     DECODE : S0(4);     // any decoder
4611 %}
4612 
4613 // Conditional move reg-reg
4614 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4615 %{
4616     single_instruction;
4617     dst    : S4(write);
4618     src    : S3(read);
4619     cr     : S3(read);
4620     DECODE : S0;        // any decoder
4621 %}
4622 
4623 // Conditional move reg-mem
4624 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4625 %{
4626     single_instruction;
4627     dst    : S4(write);
4628     src    : S3(read);
4629     cr     : S3(read);
4630     DECODE : S0;        // any decoder
4631     MEM    : S3;
4632 %}
4633 
4634 // Conditional move reg-reg long
4635 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4636 %{
4637     single_instruction;
4638     dst    : S4(write);
4639     src    : S3(read);
4640     cr     : S3(read);
4641     DECODE : S0(2);     // any 2 decoders
4642 %}
4643 
4644 // XXX
4645 // // Conditional move double reg-reg
4646 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4647 // %{
4648 //     single_instruction;
4649 //     dst    : S4(write);
4650 //     src    : S3(read);
4651 //     cr     : S3(read);
4652 //     DECODE : S0;     // any decoder
4653 // %}
4654 
4655 // Float reg-reg operation
4656 pipe_class fpu_reg(regD dst)
4657 %{
4658     instruction_count(2);
4659     dst    : S3(read);
4660     DECODE : S0(2);     // any 2 decoders
4661     FPU    : S3;
4662 %}
4663 
4664 // Float reg-reg operation
4665 pipe_class fpu_reg_reg(regD dst, regD src)
4666 %{
4667     instruction_count(2);
4668     dst    : S4(write);
4669     src    : S3(read);
4670     DECODE : S0(2);     // any 2 decoders
4671     FPU    : S3;
4672 %}
4673 
4674 // Float reg-reg operation
4675 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4676 %{
4677     instruction_count(3);
4678     dst    : S4(write);
4679     src1   : S3(read);
4680     src2   : S3(read);
4681     DECODE : S0(3);     // any 3 decoders
4682     FPU    : S3(2);
4683 %}
4684 
4685 // Float reg-reg operation
4686 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4687 %{
4688     instruction_count(4);
4689     dst    : S4(write);
4690     src1   : S3(read);
4691     src2   : S3(read);
4692     src3   : S3(read);
4693     DECODE : S0(4);     // any 3 decoders
4694     FPU    : S3(2);
4695 %}
4696 
4697 // Float reg-reg operation
4698 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4699 %{
4700     instruction_count(4);
4701     dst    : S4(write);
4702     src1   : S3(read);
4703     src2   : S3(read);
4704     src3   : S3(read);
4705     DECODE : S1(3);     // any 3 decoders
4706     D0     : S0;        // Big decoder only
4707     FPU    : S3(2);
4708     MEM    : S3;
4709 %}
4710 
4711 // Float reg-mem operation
4712 pipe_class fpu_reg_mem(regD dst, memory mem)
4713 %{
4714     instruction_count(2);
4715     dst    : S5(write);
4716     mem    : S3(read);
4717     D0     : S0;        // big decoder only
4718     DECODE : S1;        // any decoder for FPU POP
4719     FPU    : S4;
4720     MEM    : S3;        // any mem
4721 %}
4722 
4723 // Float reg-mem operation
4724 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4725 %{
4726     instruction_count(3);
4727     dst    : S5(write);
4728     src1   : S3(read);
4729     mem    : S3(read);
4730     D0     : S0;        // big decoder only
4731     DECODE : S1(2);     // any decoder for FPU POP
4732     FPU    : S4;
4733     MEM    : S3;        // any mem
4734 %}
4735 
4736 // Float mem-reg operation
4737 pipe_class fpu_mem_reg(memory mem, regD src)
4738 %{
4739     instruction_count(2);
4740     src    : S5(read);
4741     mem    : S3(read);
4742     DECODE : S0;        // any decoder for FPU PUSH
4743     D0     : S1;        // big decoder only
4744     FPU    : S4;
4745     MEM    : S3;        // any mem
4746 %}
4747 
4748 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4749 %{
4750     instruction_count(3);
4751     src1   : S3(read);
4752     src2   : S3(read);
4753     mem    : S3(read);
4754     DECODE : S0(2);     // any decoder for FPU PUSH
4755     D0     : S1;        // big decoder only
4756     FPU    : S4;
4757     MEM    : S3;        // any mem
4758 %}
4759 
4760 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4761 %{
4762     instruction_count(3);
4763     src1   : S3(read);
4764     src2   : S3(read);
4765     mem    : S4(read);
4766     DECODE : S0;        // any decoder for FPU PUSH
4767     D0     : S0(2);     // big decoder only
4768     FPU    : S4;
4769     MEM    : S3(2);     // any mem
4770 %}
4771 
4772 pipe_class fpu_mem_mem(memory dst, memory src1)
4773 %{
4774     instruction_count(2);
4775     src1   : S3(read);
4776     dst    : S4(read);
4777     D0     : S0(2);     // big decoder only
4778     MEM    : S3(2);     // any mem
4779 %}
4780 
4781 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4782 %{
4783     instruction_count(3);
4784     src1   : S3(read);
4785     src2   : S3(read);
4786     dst    : S4(read);
4787     D0     : S0(3);     // big decoder only
4788     FPU    : S4;
4789     MEM    : S3(3);     // any mem
4790 %}
4791 
4792 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4793 %{
4794     instruction_count(3);
4795     src1   : S4(read);
4796     mem    : S4(read);
4797     DECODE : S0;        // any decoder for FPU PUSH
4798     D0     : S0(2);     // big decoder only
4799     FPU    : S4;
4800     MEM    : S3(2);     // any mem
4801 %}
4802 
4803 // Float load constant
4804 pipe_class fpu_reg_con(regD dst)
4805 %{
4806     instruction_count(2);
4807     dst    : S5(write);
4808     D0     : S0;        // big decoder only for the load
4809     DECODE : S1;        // any decoder for FPU POP
4810     FPU    : S4;
4811     MEM    : S3;        // any mem
4812 %}
4813 
4814 // Float load constant
4815 pipe_class fpu_reg_reg_con(regD dst, regD src)
4816 %{
4817     instruction_count(3);
4818     dst    : S5(write);
4819     src    : S3(read);
4820     D0     : S0;        // big decoder only for the load
4821     DECODE : S1(2);     // any decoder for FPU POP
4822     FPU    : S4;
4823     MEM    : S3;        // any mem
4824 %}
4825 
4826 // UnConditional branch
4827 pipe_class pipe_jmp(label labl)
4828 %{
4829     single_instruction;
4830     BR   : S3;
4831 %}
4832 
4833 // Conditional branch
4834 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4835 %{
4836     single_instruction;
4837     cr    : S1(read);
4838     BR    : S3;
4839 %}
4840 
4841 // Allocation idiom
4842 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4843 %{
4844     instruction_count(1); force_serialization;
4845     fixed_latency(6);
4846     heap_ptr : S3(read);
4847     DECODE   : S0(3);
4848     D0       : S2;
4849     MEM      : S3;
4850     ALU      : S3(2);
4851     dst      : S5(write);
4852     BR       : S5;
4853 %}
4854 
4855 // Generic big/slow expanded idiom
4856 pipe_class pipe_slow()
4857 %{
4858     instruction_count(10); multiple_bundles; force_serialization;
4859     fixed_latency(100);
4860     D0  : S0(2);
4861     MEM : S3(2);
4862 %}
4863 
4864 // The real do-nothing guy
4865 pipe_class empty()
4866 %{
4867     instruction_count(0);
4868 %}
4869 
4870 // Define the class for the Nop node
4871 define
4872 %{
4873    MachNop = empty;
4874 %}
4875 
4876 %}
4877 
4878 //----------INSTRUCTIONS-------------------------------------------------------
4879 //
4880 // match      -- States which machine-independent subtree may be replaced
4881 //               by this instruction.
4882 // ins_cost   -- The estimated cost of this instruction is used by instruction
4883 //               selection to identify a minimum cost tree of machine
4884 //               instructions that matches a tree of machine-independent
4885 //               instructions.
4886 // format     -- A string providing the disassembly for this instruction.
4887 //               The value of an instruction's operand may be inserted
4888 //               by referring to it with a '$' prefix.
4889 // opcode     -- Three instruction opcodes may be provided.  These are referred
4890 //               to within an encode class as $primary, $secondary, and $tertiary
4891 //               rrspectively.  The primary opcode is commonly used to
4892 //               indicate the type of machine instruction, while secondary
4893 //               and tertiary are often used for prefix options or addressing
4894 //               modes.
4895 // ins_encode -- A list of encode classes with parameters. The encode class
4896 //               name must have been defined in an 'enc_class' specification
4897 //               in the encode section of the architecture description.
4898 
4899 
4900 //----------Load/Store/Move Instructions---------------------------------------
4901 //----------Load Instructions--------------------------------------------------
4902 
4903 // Load Byte (8 bit signed)
4904 instruct loadB(rRegI dst, memory mem)
4905 %{
4906   match(Set dst (LoadB mem));
4907 
4908   ins_cost(125);
4909   format %{ "movsbl  $dst, $mem\t# byte" %}
4910 
4911   ins_encode %{
4912     __ movsbl($dst$$Register, $mem$$Address);
4913   %}
4914 
4915   ins_pipe(ialu_reg_mem);
4916 %}
4917 
4918 // Load Byte (8 bit signed) into Long Register
4919 instruct loadB2L(rRegL dst, memory mem)
4920 %{
4921   match(Set dst (ConvI2L (LoadB mem)));
4922 
4923   ins_cost(125);
4924   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4925 
4926   ins_encode %{
4927     __ movsbq($dst$$Register, $mem$$Address);
4928   %}
4929 
4930   ins_pipe(ialu_reg_mem);
4931 %}
4932 
4933 // Load Unsigned Byte (8 bit UNsigned)
4934 instruct loadUB(rRegI dst, memory mem)
4935 %{
4936   match(Set dst (LoadUB mem));
4937 
4938   ins_cost(125);
4939   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4940 
4941   ins_encode %{
4942     __ movzbl($dst$$Register, $mem$$Address);
4943   %}
4944 
4945   ins_pipe(ialu_reg_mem);
4946 %}
4947 
4948 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4949 instruct loadUB2L(rRegL dst, memory mem)
4950 %{
4951   match(Set dst (ConvI2L (LoadUB mem)));
4952 
4953   ins_cost(125);
4954   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4955 
4956   ins_encode %{
4957     __ movzbq($dst$$Register, $mem$$Address);
4958   %}
4959 
4960   ins_pipe(ialu_reg_mem);
4961 %}
4962 
4963 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
4964 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
4965   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4966   effect(KILL cr);
4967 
4968   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
4969             "andl    $dst, $mask" %}
4970   ins_encode %{
4971     Register Rdst = $dst$$Register;
4972     __ movzbq(Rdst, $mem$$Address);
4973     __ andl(Rdst, $mask$$constant);
4974   %}
4975   ins_pipe(ialu_reg_mem);
4976 %}
4977 
4978 // Load Short (16 bit signed)
4979 instruct loadS(rRegI dst, memory mem)
4980 %{
4981   match(Set dst (LoadS mem));
4982 
4983   ins_cost(125);
4984   format %{ "movswl $dst, $mem\t# short" %}
4985 
4986   ins_encode %{
4987     __ movswl($dst$$Register, $mem$$Address);
4988   %}
4989 
4990   ins_pipe(ialu_reg_mem);
4991 %}
4992 
4993 // Load Short (16 bit signed) to Byte (8 bit signed)
4994 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4995   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4996 
4997   ins_cost(125);
4998   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4999   ins_encode %{
5000     __ movsbl($dst$$Register, $mem$$Address);
5001   %}
5002   ins_pipe(ialu_reg_mem);
5003 %}
5004 
5005 // Load Short (16 bit signed) into Long Register
5006 instruct loadS2L(rRegL dst, memory mem)
5007 %{
5008   match(Set dst (ConvI2L (LoadS mem)));
5009 
5010   ins_cost(125);
5011   format %{ "movswq $dst, $mem\t# short -> long" %}
5012 
5013   ins_encode %{
5014     __ movswq($dst$$Register, $mem$$Address);
5015   %}
5016 
5017   ins_pipe(ialu_reg_mem);
5018 %}
5019 
5020 // Load Unsigned Short/Char (16 bit UNsigned)
5021 instruct loadUS(rRegI dst, memory mem)
5022 %{
5023   match(Set dst (LoadUS mem));
5024 
5025   ins_cost(125);
5026   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5027 
5028   ins_encode %{
5029     __ movzwl($dst$$Register, $mem$$Address);
5030   %}
5031 
5032   ins_pipe(ialu_reg_mem);
5033 %}
5034 
5035 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5036 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5037   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5038 
5039   ins_cost(125);
5040   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5041   ins_encode %{
5042     __ movsbl($dst$$Register, $mem$$Address);
5043   %}
5044   ins_pipe(ialu_reg_mem);
5045 %}
5046 
5047 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5048 instruct loadUS2L(rRegL dst, memory mem)
5049 %{
5050   match(Set dst (ConvI2L (LoadUS mem)));
5051 
5052   ins_cost(125);
5053   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5054 
5055   ins_encode %{
5056     __ movzwq($dst$$Register, $mem$$Address);
5057   %}
5058 
5059   ins_pipe(ialu_reg_mem);
5060 %}
5061 
5062 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5063 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5064   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5065 
5066   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5067   ins_encode %{
5068     __ movzbq($dst$$Register, $mem$$Address);
5069   %}
5070   ins_pipe(ialu_reg_mem);
5071 %}
5072 
5073 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5074 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5075   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5076   effect(KILL cr);
5077 
5078   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5079             "andl    $dst, $mask" %}
5080   ins_encode %{
5081     Register Rdst = $dst$$Register;
5082     __ movzwq(Rdst, $mem$$Address);
5083     __ andl(Rdst, $mask$$constant);
5084   %}
5085   ins_pipe(ialu_reg_mem);
5086 %}
5087 
5088 // Load Integer
5089 instruct loadI(rRegI dst, memory mem)
5090 %{
5091   match(Set dst (LoadI mem));
5092 
5093   ins_cost(125);
5094   format %{ "movl    $dst, $mem\t# int" %}
5095 
5096   ins_encode %{
5097     __ movl($dst$$Register, $mem$$Address);
5098   %}
5099 
5100   ins_pipe(ialu_reg_mem);
5101 %}
5102 
5103 // Load Integer (32 bit signed) to Byte (8 bit signed)
5104 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5105   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5106 
5107   ins_cost(125);
5108   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5109   ins_encode %{
5110     __ movsbl($dst$$Register, $mem$$Address);
5111   %}
5112   ins_pipe(ialu_reg_mem);
5113 %}
5114 
5115 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5116 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5117   match(Set dst (AndI (LoadI mem) mask));
5118 
5119   ins_cost(125);
5120   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5121   ins_encode %{
5122     __ movzbl($dst$$Register, $mem$$Address);
5123   %}
5124   ins_pipe(ialu_reg_mem);
5125 %}
5126 
5127 // Load Integer (32 bit signed) to Short (16 bit signed)
5128 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5129   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5130 
5131   ins_cost(125);
5132   format %{ "movswl  $dst, $mem\t# int -> short" %}
5133   ins_encode %{
5134     __ movswl($dst$$Register, $mem$$Address);
5135   %}
5136   ins_pipe(ialu_reg_mem);
5137 %}
5138 
5139 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5140 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5141   match(Set dst (AndI (LoadI mem) mask));
5142 
5143   ins_cost(125);
5144   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5145   ins_encode %{
5146     __ movzwl($dst$$Register, $mem$$Address);
5147   %}
5148   ins_pipe(ialu_reg_mem);
5149 %}
5150 
5151 // Load Integer into Long Register
5152 instruct loadI2L(rRegL dst, memory mem)
5153 %{
5154   match(Set dst (ConvI2L (LoadI mem)));
5155 
5156   ins_cost(125);
5157   format %{ "movslq  $dst, $mem\t# int -> long" %}
5158 
5159   ins_encode %{
5160     __ movslq($dst$$Register, $mem$$Address);
5161   %}
5162 
5163   ins_pipe(ialu_reg_mem);
5164 %}
5165 
5166 // Load Integer with mask 0xFF into Long Register
5167 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5168   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5169 
5170   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5171   ins_encode %{
5172     __ movzbq($dst$$Register, $mem$$Address);
5173   %}
5174   ins_pipe(ialu_reg_mem);
5175 %}
5176 
5177 // Load Integer with mask 0xFFFF into Long Register
5178 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5179   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5180 
5181   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5182   ins_encode %{
5183     __ movzwq($dst$$Register, $mem$$Address);
5184   %}
5185   ins_pipe(ialu_reg_mem);
5186 %}
5187 
5188 // Load Integer with a 32-bit mask into Long Register
5189 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5190   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5191   effect(KILL cr);
5192 
5193   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
5194             "andl    $dst, $mask" %}
5195   ins_encode %{
5196     Register Rdst = $dst$$Register;
5197     __ movl(Rdst, $mem$$Address);
5198     __ andl(Rdst, $mask$$constant);
5199   %}
5200   ins_pipe(ialu_reg_mem);
5201 %}
5202 
5203 // Load Unsigned Integer into Long Register
5204 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
5205 %{
5206   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5207 
5208   ins_cost(125);
5209   format %{ "movl    $dst, $mem\t# uint -> long" %}
5210 
5211   ins_encode %{
5212     __ movl($dst$$Register, $mem$$Address);
5213   %}
5214 
5215   ins_pipe(ialu_reg_mem);
5216 %}
5217 
5218 // Load Long
5219 instruct loadL(rRegL dst, memory mem)
5220 %{
5221   match(Set dst (LoadL mem));
5222 
5223   ins_cost(125);
5224   format %{ "movq    $dst, $mem\t# long" %}
5225 
5226   ins_encode %{
5227     __ movq($dst$$Register, $mem$$Address);
5228   %}
5229 
5230   ins_pipe(ialu_reg_mem); // XXX
5231 %}
5232 
5233 // Load Range
5234 instruct loadRange(rRegI dst, memory mem)
5235 %{
5236   match(Set dst (LoadRange mem));
5237 
5238   ins_cost(125); // XXX
5239   format %{ "movl    $dst, $mem\t# range" %}
5240   opcode(0x8B);
5241   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5242   ins_pipe(ialu_reg_mem);
5243 %}
5244 
5245 // Load Pointer
5246 instruct loadP(rRegP dst, memory mem)
5247 %{
5248   match(Set dst (LoadP mem));
5249 
5250   ins_cost(125); // XXX
5251   format %{ "movq    $dst, $mem\t# ptr" %}
5252   opcode(0x8B);
5253   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5254   ins_pipe(ialu_reg_mem); // XXX
5255 %}
5256 
5257 // Load Compressed Pointer
5258 instruct loadN(rRegN dst, memory mem)
5259 %{
5260    match(Set dst (LoadN mem));
5261 
5262    ins_cost(125); // XXX
5263    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5264    ins_encode %{
5265      __ movl($dst$$Register, $mem$$Address);
5266    %}
5267    ins_pipe(ialu_reg_mem); // XXX
5268 %}
5269 
5270 
5271 // Load Klass Pointer
5272 instruct loadKlass(rRegP dst, memory mem)
5273 %{
5274   match(Set dst (LoadKlass mem));
5275 
5276   ins_cost(125); // XXX
5277   format %{ "movq    $dst, $mem\t# class" %}
5278   opcode(0x8B);
5279   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5280   ins_pipe(ialu_reg_mem); // XXX
5281 %}
5282 
5283 // Load narrow Klass Pointer
5284 instruct loadNKlass(rRegN dst, memory mem)
5285 %{
5286   match(Set dst (LoadNKlass mem));
5287 
5288   ins_cost(125); // XXX
5289   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5290   ins_encode %{
5291     __ movl($dst$$Register, $mem$$Address);
5292   %}
5293   ins_pipe(ialu_reg_mem); // XXX
5294 %}
5295 
5296 // Load Float
5297 instruct loadF(regF dst, memory mem)
5298 %{
5299   match(Set dst (LoadF mem));
5300 
5301   ins_cost(145); // XXX
5302   format %{ "movss   $dst, $mem\t# float" %}
5303   ins_encode %{
5304     __ movflt($dst$$XMMRegister, $mem$$Address);
5305   %}
5306   ins_pipe(pipe_slow); // XXX
5307 %}
5308 
5309 // Load Double
5310 instruct loadD_partial(regD dst, memory mem)
5311 %{
5312   predicate(!UseXmmLoadAndClearUpper);
5313   match(Set dst (LoadD mem));
5314 
5315   ins_cost(145); // XXX
5316   format %{ "movlpd  $dst, $mem\t# double" %}
5317   ins_encode %{
5318     __ movdbl($dst$$XMMRegister, $mem$$Address);
5319   %}
5320   ins_pipe(pipe_slow); // XXX
5321 %}
5322 
5323 instruct loadD(regD dst, memory mem)
5324 %{
5325   predicate(UseXmmLoadAndClearUpper);
5326   match(Set dst (LoadD mem));
5327 
5328   ins_cost(145); // XXX
5329   format %{ "movsd   $dst, $mem\t# double" %}
5330   ins_encode %{
5331     __ movdbl($dst$$XMMRegister, $mem$$Address);
5332   %}
5333   ins_pipe(pipe_slow); // XXX
5334 %}
5335 
5336 // Load Effective Address
5337 instruct leaP8(rRegP dst, indOffset8 mem)
5338 %{
5339   match(Set dst mem);
5340 
5341   ins_cost(110); // XXX
5342   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5343   opcode(0x8D);
5344   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5345   ins_pipe(ialu_reg_reg_fat);
5346 %}
5347 
5348 instruct leaP32(rRegP dst, indOffset32 mem)
5349 %{
5350   match(Set dst mem);
5351 
5352   ins_cost(110);
5353   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5354   opcode(0x8D);
5355   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5356   ins_pipe(ialu_reg_reg_fat);
5357 %}
5358 
5359 // instruct leaPIdx(rRegP dst, indIndex mem)
5360 // %{
5361 //   match(Set dst mem);
5362 
5363 //   ins_cost(110);
5364 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5365 //   opcode(0x8D);
5366 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5367 //   ins_pipe(ialu_reg_reg_fat);
5368 // %}
5369 
5370 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5371 %{
5372   match(Set dst mem);
5373 
5374   ins_cost(110);
5375   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5376   opcode(0x8D);
5377   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5378   ins_pipe(ialu_reg_reg_fat);
5379 %}
5380 
5381 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5382 %{
5383   match(Set dst mem);
5384 
5385   ins_cost(110);
5386   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5387   opcode(0x8D);
5388   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5389   ins_pipe(ialu_reg_reg_fat);
5390 %}
5391 
5392 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5393 %{
5394   match(Set dst mem);
5395 
5396   ins_cost(110);
5397   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5398   opcode(0x8D);
5399   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5400   ins_pipe(ialu_reg_reg_fat);
5401 %}
5402 
5403 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5404 %{
5405   match(Set dst mem);
5406 
5407   ins_cost(110);
5408   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5409   opcode(0x8D);
5410   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5411   ins_pipe(ialu_reg_reg_fat);
5412 %}
5413 
5414 // Load Effective Address which uses Narrow (32-bits) oop
5415 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5416 %{
5417   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5418   match(Set dst mem);
5419 
5420   ins_cost(110);
5421   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5422   opcode(0x8D);
5423   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5424   ins_pipe(ialu_reg_reg_fat);
5425 %}
5426 
5427 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5428 %{
5429   predicate(Universe::narrow_oop_shift() == 0);
5430   match(Set dst mem);
5431 
5432   ins_cost(110); // XXX
5433   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5434   opcode(0x8D);
5435   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5436   ins_pipe(ialu_reg_reg_fat);
5437 %}
5438 
5439 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5440 %{
5441   predicate(Universe::narrow_oop_shift() == 0);
5442   match(Set dst mem);
5443 
5444   ins_cost(110);
5445   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5446   opcode(0x8D);
5447   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5448   ins_pipe(ialu_reg_reg_fat);
5449 %}
5450 
5451 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5452 %{
5453   predicate(Universe::narrow_oop_shift() == 0);
5454   match(Set dst mem);
5455 
5456   ins_cost(110);
5457   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5458   opcode(0x8D);
5459   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5460   ins_pipe(ialu_reg_reg_fat);
5461 %}
5462 
5463 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5464 %{
5465   predicate(Universe::narrow_oop_shift() == 0);
5466   match(Set dst mem);
5467 
5468   ins_cost(110);
5469   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5470   opcode(0x8D);
5471   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5472   ins_pipe(ialu_reg_reg_fat);
5473 %}
5474 
5475 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5476 %{
5477   predicate(Universe::narrow_oop_shift() == 0);
5478   match(Set dst mem);
5479 
5480   ins_cost(110);
5481   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5482   opcode(0x8D);
5483   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5484   ins_pipe(ialu_reg_reg_fat);
5485 %}
5486 
5487 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5488 %{
5489   predicate(Universe::narrow_oop_shift() == 0);
5490   match(Set dst mem);
5491 
5492   ins_cost(110);
5493   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5494   opcode(0x8D);
5495   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5496   ins_pipe(ialu_reg_reg_fat);
5497 %}
5498 
5499 instruct loadConI(rRegI dst, immI src)
5500 %{
5501   match(Set dst src);
5502 
5503   format %{ "movl    $dst, $src\t# int" %}
5504   ins_encode(load_immI(dst, src));
5505   ins_pipe(ialu_reg_fat); // XXX
5506 %}
5507 
5508 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5509 %{
5510   match(Set dst src);
5511   effect(KILL cr);
5512 
5513   ins_cost(50);
5514   format %{ "xorl    $dst, $dst\t# int" %}
5515   opcode(0x33); /* + rd */
5516   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5517   ins_pipe(ialu_reg);
5518 %}
5519 
5520 instruct loadConL(rRegL dst, immL src)
5521 %{
5522   match(Set dst src);
5523 
5524   ins_cost(150);
5525   format %{ "movq    $dst, $src\t# long" %}
5526   ins_encode(load_immL(dst, src));
5527   ins_pipe(ialu_reg);
5528 %}
5529 
5530 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5531 %{
5532   match(Set dst src);
5533   effect(KILL cr);
5534 
5535   ins_cost(50);
5536   format %{ "xorl    $dst, $dst\t# long" %}
5537   opcode(0x33); /* + rd */
5538   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5539   ins_pipe(ialu_reg); // XXX
5540 %}
5541 
5542 instruct loadConUL32(rRegL dst, immUL32 src)
5543 %{
5544   match(Set dst src);
5545 
5546   ins_cost(60);
5547   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5548   ins_encode(load_immUL32(dst, src));
5549   ins_pipe(ialu_reg);
5550 %}
5551 
5552 instruct loadConL32(rRegL dst, immL32 src)
5553 %{
5554   match(Set dst src);
5555 
5556   ins_cost(70);
5557   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5558   ins_encode(load_immL32(dst, src));
5559   ins_pipe(ialu_reg);
5560 %}
5561 
5562 instruct loadConP(rRegP dst, immP con) %{
5563   match(Set dst con);
5564 
5565   format %{ "movq    $dst, $con\t# ptr" %}
5566   ins_encode(load_immP(dst, con));
5567   ins_pipe(ialu_reg_fat); // XXX
5568 %}
5569 
5570 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5571 %{
5572   match(Set dst src);
5573   effect(KILL cr);
5574 
5575   ins_cost(50);
5576   format %{ "xorl    $dst, $dst\t# ptr" %}
5577   opcode(0x33); /* + rd */
5578   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5579   ins_pipe(ialu_reg);
5580 %}
5581 
5582 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5583 %{
5584   match(Set dst src);
5585   effect(KILL cr);
5586 
5587   ins_cost(60);
5588   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5589   ins_encode(load_immP31(dst, src));
5590   ins_pipe(ialu_reg);
5591 %}
5592 
5593 instruct loadConF(regF dst, immF con) %{
5594   match(Set dst con);
5595   ins_cost(125);
5596   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5597   ins_encode %{
5598     __ movflt($dst$$XMMRegister, $constantaddress($con));
5599   %}
5600   ins_pipe(pipe_slow);
5601 %}
5602 
5603 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5604   match(Set dst src);
5605   effect(KILL cr);
5606   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5607   ins_encode %{
5608     __ xorq($dst$$Register, $dst$$Register);
5609   %}
5610   ins_pipe(ialu_reg);
5611 %}
5612 
5613 instruct loadConN(rRegN dst, immN src) %{
5614   match(Set dst src);
5615 
5616   ins_cost(125);
5617   format %{ "movl    $dst, $src\t# compressed ptr" %}
5618   ins_encode %{
5619     address con = (address)$src$$constant;
5620     if (con == NULL) {
5621       ShouldNotReachHere();
5622     } else {
5623       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5624     }
5625   %}
5626   ins_pipe(ialu_reg_fat); // XXX
5627 %}
5628 
5629 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5630   match(Set dst src);
5631 
5632   ins_cost(125);
5633   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5634   ins_encode %{
5635     address con = (address)$src$$constant;
5636     if (con == NULL) {
5637       ShouldNotReachHere();
5638     } else {
5639       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5640     }
5641   %}
5642   ins_pipe(ialu_reg_fat); // XXX
5643 %}
5644 
5645 instruct loadConF0(regF dst, immF0 src)
5646 %{
5647   match(Set dst src);
5648   ins_cost(100);
5649 
5650   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5651   ins_encode %{
5652     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5653   %}
5654   ins_pipe(pipe_slow);
5655 %}
5656 
5657 // Use the same format since predicate() can not be used here.
5658 instruct loadConD(regD dst, immD con) %{
5659   match(Set dst con);
5660   ins_cost(125);
5661   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5662   ins_encode %{
5663     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5664   %}
5665   ins_pipe(pipe_slow);
5666 %}
5667 
5668 instruct loadConD0(regD dst, immD0 src)
5669 %{
5670   match(Set dst src);
5671   ins_cost(100);
5672 
5673   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5674   ins_encode %{
5675     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5676   %}
5677   ins_pipe(pipe_slow);
5678 %}
5679 
5680 instruct loadSSI(rRegI dst, stackSlotI src)
5681 %{
5682   match(Set dst src);
5683 
5684   ins_cost(125);
5685   format %{ "movl    $dst, $src\t# int stk" %}
5686   opcode(0x8B);
5687   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5688   ins_pipe(ialu_reg_mem);
5689 %}
5690 
5691 instruct loadSSL(rRegL dst, stackSlotL src)
5692 %{
5693   match(Set dst src);
5694 
5695   ins_cost(125);
5696   format %{ "movq    $dst, $src\t# long stk" %}
5697   opcode(0x8B);
5698   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5699   ins_pipe(ialu_reg_mem);
5700 %}
5701 
5702 instruct loadSSP(rRegP dst, stackSlotP src)
5703 %{
5704   match(Set dst src);
5705 
5706   ins_cost(125);
5707   format %{ "movq    $dst, $src\t# ptr stk" %}
5708   opcode(0x8B);
5709   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5710   ins_pipe(ialu_reg_mem);
5711 %}
5712 
5713 instruct loadSSF(regF dst, stackSlotF src)
5714 %{
5715   match(Set dst src);
5716 
5717   ins_cost(125);
5718   format %{ "movss   $dst, $src\t# float stk" %}
5719   ins_encode %{
5720     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5721   %}
5722   ins_pipe(pipe_slow); // XXX
5723 %}
5724 
5725 // Use the same format since predicate() can not be used here.
5726 instruct loadSSD(regD dst, stackSlotD src)
5727 %{
5728   match(Set dst src);
5729 
5730   ins_cost(125);
5731   format %{ "movsd   $dst, $src\t# double stk" %}
5732   ins_encode  %{
5733     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5734   %}
5735   ins_pipe(pipe_slow); // XXX
5736 %}
5737 
5738 // Prefetch instructions.
5739 // Must be safe to execute with invalid address (cannot fault).
5740 
5741 instruct prefetchr( memory mem ) %{
5742   predicate(ReadPrefetchInstr==3);
5743   match(PrefetchRead mem);
5744   ins_cost(125);
5745 
5746   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5747   ins_encode %{
5748     __ prefetchr($mem$$Address);
5749   %}
5750   ins_pipe(ialu_mem);
5751 %}
5752 
5753 instruct prefetchrNTA( memory mem ) %{
5754   predicate(ReadPrefetchInstr==0);
5755   match(PrefetchRead mem);
5756   ins_cost(125);
5757 
5758   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5759   ins_encode %{
5760     __ prefetchnta($mem$$Address);
5761   %}
5762   ins_pipe(ialu_mem);
5763 %}
5764 
5765 instruct prefetchrT0( memory mem ) %{
5766   predicate(ReadPrefetchInstr==1);
5767   match(PrefetchRead mem);
5768   ins_cost(125);
5769 
5770   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5771   ins_encode %{
5772     __ prefetcht0($mem$$Address);
5773   %}
5774   ins_pipe(ialu_mem);
5775 %}
5776 
5777 instruct prefetchrT2( memory mem ) %{
5778   predicate(ReadPrefetchInstr==2);
5779   match(PrefetchRead mem);
5780   ins_cost(125);
5781 
5782   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5783   ins_encode %{
5784     __ prefetcht2($mem$$Address);
5785   %}
5786   ins_pipe(ialu_mem);
5787 %}
5788 
5789 instruct prefetchwNTA( memory mem ) %{
5790   match(PrefetchWrite mem);
5791   ins_cost(125);
5792 
5793   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5794   ins_encode %{
5795     __ prefetchnta($mem$$Address);
5796   %}
5797   ins_pipe(ialu_mem);
5798 %}
5799 
5800 // Prefetch instructions for allocation.
5801 
5802 instruct prefetchAlloc( memory mem ) %{
5803   predicate(AllocatePrefetchInstr==3);
5804   match(PrefetchAllocation mem);
5805   ins_cost(125);
5806 
5807   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5808   ins_encode %{
5809     __ prefetchw($mem$$Address);
5810   %}
5811   ins_pipe(ialu_mem);
5812 %}
5813 
5814 instruct prefetchAllocNTA( memory mem ) %{
5815   predicate(AllocatePrefetchInstr==0);
5816   match(PrefetchAllocation mem);
5817   ins_cost(125);
5818 
5819   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5820   ins_encode %{
5821     __ prefetchnta($mem$$Address);
5822   %}
5823   ins_pipe(ialu_mem);
5824 %}
5825 
5826 instruct prefetchAllocT0( memory mem ) %{
5827   predicate(AllocatePrefetchInstr==1);
5828   match(PrefetchAllocation mem);
5829   ins_cost(125);
5830 
5831   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5832   ins_encode %{
5833     __ prefetcht0($mem$$Address);
5834   %}
5835   ins_pipe(ialu_mem);
5836 %}
5837 
5838 instruct prefetchAllocT2( memory mem ) %{
5839   predicate(AllocatePrefetchInstr==2);
5840   match(PrefetchAllocation mem);
5841   ins_cost(125);
5842 
5843   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5844   ins_encode %{
5845     __ prefetcht2($mem$$Address);
5846   %}
5847   ins_pipe(ialu_mem);
5848 %}
5849 
5850 //----------Store Instructions-------------------------------------------------
5851 
5852 // Store Byte
5853 instruct storeB(memory mem, rRegI src)
5854 %{
5855   match(Set mem (StoreB mem src));
5856 
5857   ins_cost(125); // XXX
5858   format %{ "movb    $mem, $src\t# byte" %}
5859   opcode(0x88);
5860   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5861   ins_pipe(ialu_mem_reg);
5862 %}
5863 
5864 // Store Char/Short
5865 instruct storeC(memory mem, rRegI src)
5866 %{
5867   match(Set mem (StoreC mem src));
5868 
5869   ins_cost(125); // XXX
5870   format %{ "movw    $mem, $src\t# char/short" %}
5871   opcode(0x89);
5872   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5873   ins_pipe(ialu_mem_reg);
5874 %}
5875 
5876 // Store Integer
5877 instruct storeI(memory mem, rRegI src)
5878 %{
5879   match(Set mem (StoreI mem src));
5880 
5881   ins_cost(125); // XXX
5882   format %{ "movl    $mem, $src\t# int" %}
5883   opcode(0x89);
5884   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5885   ins_pipe(ialu_mem_reg);
5886 %}
5887 
5888 // Store Long
5889 instruct storeL(memory mem, rRegL src)
5890 %{
5891   match(Set mem (StoreL mem src));
5892 
5893   ins_cost(125); // XXX
5894   format %{ "movq    $mem, $src\t# long" %}
5895   opcode(0x89);
5896   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5897   ins_pipe(ialu_mem_reg); // XXX
5898 %}
5899 
5900 // Store Pointer
5901 instruct storeP(memory mem, any_RegP src)
5902 %{
5903   match(Set mem (StoreP mem src));
5904 
5905   ins_cost(125); // XXX
5906   format %{ "movq    $mem, $src\t# ptr" %}
5907   opcode(0x89);
5908   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5909   ins_pipe(ialu_mem_reg);
5910 %}
5911 
5912 instruct storeImmP0(memory mem, immP0 zero)
5913 %{
5914   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5915   match(Set mem (StoreP mem zero));
5916 
5917   ins_cost(125); // XXX
5918   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5919   ins_encode %{
5920     __ movq($mem$$Address, r12);
5921   %}
5922   ins_pipe(ialu_mem_reg);
5923 %}
5924 
5925 // Store NULL Pointer, mark word, or other simple pointer constant.
5926 instruct storeImmP(memory mem, immP31 src)
5927 %{
5928   match(Set mem (StoreP mem src));
5929 
5930   ins_cost(150); // XXX
5931   format %{ "movq    $mem, $src\t# ptr" %}
5932   opcode(0xC7); /* C7 /0 */
5933   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5934   ins_pipe(ialu_mem_imm);
5935 %}
5936 
5937 // Store Compressed Pointer
5938 instruct storeN(memory mem, rRegN src)
5939 %{
5940   match(Set mem (StoreN mem src));
5941 
5942   ins_cost(125); // XXX
5943   format %{ "movl    $mem, $src\t# compressed ptr" %}
5944   ins_encode %{
5945     __ movl($mem$$Address, $src$$Register);
5946   %}
5947   ins_pipe(ialu_mem_reg);
5948 %}
5949 
5950 instruct storeNKlass(memory mem, rRegN src)
5951 %{
5952   match(Set mem (StoreNKlass mem src));
5953 
5954   ins_cost(125); // XXX
5955   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5956   ins_encode %{
5957     __ movl($mem$$Address, $src$$Register);
5958   %}
5959   ins_pipe(ialu_mem_reg);
5960 %}
5961 
5962 instruct storeImmN0(memory mem, immN0 zero)
5963 %{
5964   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5965   match(Set mem (StoreN mem zero));
5966 
5967   ins_cost(125); // XXX
5968   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5969   ins_encode %{
5970     __ movl($mem$$Address, r12);
5971   %}
5972   ins_pipe(ialu_mem_reg);
5973 %}
5974 
5975 instruct storeImmN(memory mem, immN src)
5976 %{
5977   match(Set mem (StoreN mem src));
5978 
5979   ins_cost(150); // XXX
5980   format %{ "movl    $mem, $src\t# compressed ptr" %}
5981   ins_encode %{
5982     address con = (address)$src$$constant;
5983     if (con == NULL) {
5984       __ movl($mem$$Address, (int32_t)0);
5985     } else {
5986       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5987     }
5988   %}
5989   ins_pipe(ialu_mem_imm);
5990 %}
5991 
5992 instruct storeImmNKlass(memory mem, immNKlass src)
5993 %{
5994   match(Set mem (StoreNKlass mem src));
5995 
5996   ins_cost(150); // XXX
5997   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5998   ins_encode %{
5999     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6000   %}
6001   ins_pipe(ialu_mem_imm);
6002 %}
6003 
6004 // Store Integer Immediate
6005 instruct storeImmI0(memory mem, immI0 zero)
6006 %{
6007   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6008   match(Set mem (StoreI mem zero));
6009 
6010   ins_cost(125); // XXX
6011   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6012   ins_encode %{
6013     __ movl($mem$$Address, r12);
6014   %}
6015   ins_pipe(ialu_mem_reg);
6016 %}
6017 
6018 instruct storeImmI(memory mem, immI src)
6019 %{
6020   match(Set mem (StoreI mem src));
6021 
6022   ins_cost(150);
6023   format %{ "movl    $mem, $src\t# int" %}
6024   opcode(0xC7); /* C7 /0 */
6025   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6026   ins_pipe(ialu_mem_imm);
6027 %}
6028 
6029 // Store Long Immediate
6030 instruct storeImmL0(memory mem, immL0 zero)
6031 %{
6032   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6033   match(Set mem (StoreL mem zero));
6034 
6035   ins_cost(125); // XXX
6036   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6037   ins_encode %{
6038     __ movq($mem$$Address, r12);
6039   %}
6040   ins_pipe(ialu_mem_reg);
6041 %}
6042 
6043 instruct storeImmL(memory mem, immL32 src)
6044 %{
6045   match(Set mem (StoreL mem src));
6046 
6047   ins_cost(150);
6048   format %{ "movq    $mem, $src\t# long" %}
6049   opcode(0xC7); /* C7 /0 */
6050   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6051   ins_pipe(ialu_mem_imm);
6052 %}
6053 
6054 // Store Short/Char Immediate
6055 instruct storeImmC0(memory mem, immI0 zero)
6056 %{
6057   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6058   match(Set mem (StoreC mem zero));
6059 
6060   ins_cost(125); // XXX
6061   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6062   ins_encode %{
6063     __ movw($mem$$Address, r12);
6064   %}
6065   ins_pipe(ialu_mem_reg);
6066 %}
6067 
6068 instruct storeImmI16(memory mem, immI16 src)
6069 %{
6070   predicate(UseStoreImmI16);
6071   match(Set mem (StoreC mem src));
6072 
6073   ins_cost(150);
6074   format %{ "movw    $mem, $src\t# short/char" %}
6075   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6076   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6077   ins_pipe(ialu_mem_imm);
6078 %}
6079 
6080 // Store Byte Immediate
6081 instruct storeImmB0(memory mem, immI0 zero)
6082 %{
6083   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6084   match(Set mem (StoreB mem zero));
6085 
6086   ins_cost(125); // XXX
6087   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6088   ins_encode %{
6089     __ movb($mem$$Address, r12);
6090   %}
6091   ins_pipe(ialu_mem_reg);
6092 %}
6093 
6094 instruct storeImmB(memory mem, immI8 src)
6095 %{
6096   match(Set mem (StoreB mem src));
6097 
6098   ins_cost(150); // XXX
6099   format %{ "movb    $mem, $src\t# byte" %}
6100   opcode(0xC6); /* C6 /0 */
6101   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6102   ins_pipe(ialu_mem_imm);
6103 %}
6104 
6105 // Store CMS card-mark Immediate
6106 instruct storeImmCM0_reg(memory mem, immI0 zero)
6107 %{
6108   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6109   match(Set mem (StoreCM mem zero));
6110 
6111   ins_cost(125); // XXX
6112   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6113   ins_encode %{
6114     __ movb($mem$$Address, r12);
6115   %}
6116   ins_pipe(ialu_mem_reg);
6117 %}
6118 
6119 instruct storeImmCM0(memory mem, immI0 src)
6120 %{
6121   match(Set mem (StoreCM mem src));
6122 
6123   ins_cost(150); // XXX
6124   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6125   opcode(0xC6); /* C6 /0 */
6126   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6127   ins_pipe(ialu_mem_imm);
6128 %}
6129 
6130 // Store Float
6131 instruct storeF(memory mem, regF src)
6132 %{
6133   match(Set mem (StoreF mem src));
6134 
6135   ins_cost(95); // XXX
6136   format %{ "movss   $mem, $src\t# float" %}
6137   ins_encode %{
6138     __ movflt($mem$$Address, $src$$XMMRegister);
6139   %}
6140   ins_pipe(pipe_slow); // XXX
6141 %}
6142 
6143 // Store immediate Float value (it is faster than store from XMM register)
6144 instruct storeF0(memory mem, immF0 zero)
6145 %{
6146   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6147   match(Set mem (StoreF mem zero));
6148 
6149   ins_cost(25); // XXX
6150   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6151   ins_encode %{
6152     __ movl($mem$$Address, r12);
6153   %}
6154   ins_pipe(ialu_mem_reg);
6155 %}
6156 
6157 instruct storeF_imm(memory mem, immF src)
6158 %{
6159   match(Set mem (StoreF mem src));
6160 
6161   ins_cost(50);
6162   format %{ "movl    $mem, $src\t# float" %}
6163   opcode(0xC7); /* C7 /0 */
6164   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6165   ins_pipe(ialu_mem_imm);
6166 %}
6167 
6168 // Store Double
6169 instruct storeD(memory mem, regD src)
6170 %{
6171   match(Set mem (StoreD mem src));
6172 
6173   ins_cost(95); // XXX
6174   format %{ "movsd   $mem, $src\t# double" %}
6175   ins_encode %{
6176     __ movdbl($mem$$Address, $src$$XMMRegister);
6177   %}
6178   ins_pipe(pipe_slow); // XXX
6179 %}
6180 
6181 // Store immediate double 0.0 (it is faster than store from XMM register)
6182 instruct storeD0_imm(memory mem, immD0 src)
6183 %{
6184   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6185   match(Set mem (StoreD mem src));
6186 
6187   ins_cost(50);
6188   format %{ "movq    $mem, $src\t# double 0." %}
6189   opcode(0xC7); /* C7 /0 */
6190   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6191   ins_pipe(ialu_mem_imm);
6192 %}
6193 
6194 instruct storeD0(memory mem, immD0 zero)
6195 %{
6196   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6197   match(Set mem (StoreD mem zero));
6198 
6199   ins_cost(25); // XXX
6200   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6201   ins_encode %{
6202     __ movq($mem$$Address, r12);
6203   %}
6204   ins_pipe(ialu_mem_reg);
6205 %}
6206 
6207 instruct storeSSI(stackSlotI dst, rRegI src)
6208 %{
6209   match(Set dst src);
6210 
6211   ins_cost(100);
6212   format %{ "movl    $dst, $src\t# int stk" %}
6213   opcode(0x89);
6214   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6215   ins_pipe( ialu_mem_reg );
6216 %}
6217 
6218 instruct storeSSL(stackSlotL dst, rRegL src)
6219 %{
6220   match(Set dst src);
6221 
6222   ins_cost(100);
6223   format %{ "movq    $dst, $src\t# long stk" %}
6224   opcode(0x89);
6225   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6226   ins_pipe(ialu_mem_reg);
6227 %}
6228 
6229 instruct storeSSP(stackSlotP dst, rRegP src)
6230 %{
6231   match(Set dst src);
6232 
6233   ins_cost(100);
6234   format %{ "movq    $dst, $src\t# ptr stk" %}
6235   opcode(0x89);
6236   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6237   ins_pipe(ialu_mem_reg);
6238 %}
6239 
6240 instruct storeSSF(stackSlotF dst, regF src)
6241 %{
6242   match(Set dst src);
6243 
6244   ins_cost(95); // XXX
6245   format %{ "movss   $dst, $src\t# float stk" %}
6246   ins_encode %{
6247     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6248   %}
6249   ins_pipe(pipe_slow); // XXX
6250 %}
6251 
6252 instruct storeSSD(stackSlotD dst, regD src)
6253 %{
6254   match(Set dst src);
6255 
6256   ins_cost(95); // XXX
6257   format %{ "movsd   $dst, $src\t# double stk" %}
6258   ins_encode %{
6259     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6260   %}
6261   ins_pipe(pipe_slow); // XXX
6262 %}
6263 
6264 //----------BSWAP Instructions-------------------------------------------------
6265 instruct bytes_reverse_int(rRegI dst) %{
6266   match(Set dst (ReverseBytesI dst));
6267 
6268   format %{ "bswapl  $dst" %}
6269   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6270   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6271   ins_pipe( ialu_reg );
6272 %}
6273 
6274 instruct bytes_reverse_long(rRegL dst) %{
6275   match(Set dst (ReverseBytesL dst));
6276 
6277   format %{ "bswapq  $dst" %}
6278   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6279   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6280   ins_pipe( ialu_reg);
6281 %}
6282 
6283 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6284   match(Set dst (ReverseBytesUS dst));
6285   effect(KILL cr);
6286 
6287   format %{ "bswapl  $dst\n\t"
6288             "shrl    $dst,16\n\t" %}
6289   ins_encode %{
6290     __ bswapl($dst$$Register);
6291     __ shrl($dst$$Register, 16);
6292   %}
6293   ins_pipe( ialu_reg );
6294 %}
6295 
6296 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6297   match(Set dst (ReverseBytesS dst));
6298   effect(KILL cr);
6299 
6300   format %{ "bswapl  $dst\n\t"
6301             "sar     $dst,16\n\t" %}
6302   ins_encode %{
6303     __ bswapl($dst$$Register);
6304     __ sarl($dst$$Register, 16);
6305   %}
6306   ins_pipe( ialu_reg );
6307 %}
6308 
6309 //---------- Zeros Count Instructions ------------------------------------------
6310 
6311 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6312   predicate(UseCountLeadingZerosInstruction);
6313   match(Set dst (CountLeadingZerosI src));
6314   effect(KILL cr);
6315 
6316   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6317   ins_encode %{
6318     __ lzcntl($dst$$Register, $src$$Register);
6319   %}
6320   ins_pipe(ialu_reg);
6321 %}
6322 
6323 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6324   predicate(!UseCountLeadingZerosInstruction);
6325   match(Set dst (CountLeadingZerosI src));
6326   effect(KILL cr);
6327 
6328   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6329             "jnz     skip\n\t"
6330             "movl    $dst, -1\n"
6331       "skip:\n\t"
6332             "negl    $dst\n\t"
6333             "addl    $dst, 31" %}
6334   ins_encode %{
6335     Register Rdst = $dst$$Register;
6336     Register Rsrc = $src$$Register;
6337     Label skip;
6338     __ bsrl(Rdst, Rsrc);
6339     __ jccb(Assembler::notZero, skip);
6340     __ movl(Rdst, -1);
6341     __ bind(skip);
6342     __ negl(Rdst);
6343     __ addl(Rdst, BitsPerInt - 1);
6344   %}
6345   ins_pipe(ialu_reg);
6346 %}
6347 
6348 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6349   predicate(UseCountLeadingZerosInstruction);
6350   match(Set dst (CountLeadingZerosL src));
6351   effect(KILL cr);
6352 
6353   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6354   ins_encode %{
6355     __ lzcntq($dst$$Register, $src$$Register);
6356   %}
6357   ins_pipe(ialu_reg);
6358 %}
6359 
6360 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6361   predicate(!UseCountLeadingZerosInstruction);
6362   match(Set dst (CountLeadingZerosL src));
6363   effect(KILL cr);
6364 
6365   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6366             "jnz     skip\n\t"
6367             "movl    $dst, -1\n"
6368       "skip:\n\t"
6369             "negl    $dst\n\t"
6370             "addl    $dst, 63" %}
6371   ins_encode %{
6372     Register Rdst = $dst$$Register;
6373     Register Rsrc = $src$$Register;
6374     Label skip;
6375     __ bsrq(Rdst, Rsrc);
6376     __ jccb(Assembler::notZero, skip);
6377     __ movl(Rdst, -1);
6378     __ bind(skip);
6379     __ negl(Rdst);
6380     __ addl(Rdst, BitsPerLong - 1);
6381   %}
6382   ins_pipe(ialu_reg);
6383 %}
6384 
6385 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6386   match(Set dst (CountTrailingZerosI src));
6387   effect(KILL cr);
6388 
6389   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6390             "jnz     done\n\t"
6391             "movl    $dst, 32\n"
6392       "done:" %}
6393   ins_encode %{
6394     Register Rdst = $dst$$Register;
6395     Label done;
6396     __ bsfl(Rdst, $src$$Register);
6397     __ jccb(Assembler::notZero, done);
6398     __ movl(Rdst, BitsPerInt);
6399     __ bind(done);
6400   %}
6401   ins_pipe(ialu_reg);
6402 %}
6403 
6404 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6405   match(Set dst (CountTrailingZerosL src));
6406   effect(KILL cr);
6407 
6408   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6409             "jnz     done\n\t"
6410             "movl    $dst, 64\n"
6411       "done:" %}
6412   ins_encode %{
6413     Register Rdst = $dst$$Register;
6414     Label done;
6415     __ bsfq(Rdst, $src$$Register);
6416     __ jccb(Assembler::notZero, done);
6417     __ movl(Rdst, BitsPerLong);
6418     __ bind(done);
6419   %}
6420   ins_pipe(ialu_reg);
6421 %}
6422 
6423 
6424 //---------- Population Count Instructions -------------------------------------
6425 
6426 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6427   predicate(UsePopCountInstruction);
6428   match(Set dst (PopCountI src));
6429   effect(KILL cr);
6430 
6431   format %{ "popcnt  $dst, $src" %}
6432   ins_encode %{
6433     __ popcntl($dst$$Register, $src$$Register);
6434   %}
6435   ins_pipe(ialu_reg);
6436 %}
6437 
6438 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6439   predicate(UsePopCountInstruction);
6440   match(Set dst (PopCountI (LoadI mem)));
6441   effect(KILL cr);
6442 
6443   format %{ "popcnt  $dst, $mem" %}
6444   ins_encode %{
6445     __ popcntl($dst$$Register, $mem$$Address);
6446   %}
6447   ins_pipe(ialu_reg);
6448 %}
6449 
6450 // Note: Long.bitCount(long) returns an int.
6451 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6452   predicate(UsePopCountInstruction);
6453   match(Set dst (PopCountL src));
6454   effect(KILL cr);
6455 
6456   format %{ "popcnt  $dst, $src" %}
6457   ins_encode %{
6458     __ popcntq($dst$$Register, $src$$Register);
6459   %}
6460   ins_pipe(ialu_reg);
6461 %}
6462 
6463 // Note: Long.bitCount(long) returns an int.
6464 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6465   predicate(UsePopCountInstruction);
6466   match(Set dst (PopCountL (LoadL mem)));
6467   effect(KILL cr);
6468 
6469   format %{ "popcnt  $dst, $mem" %}
6470   ins_encode %{
6471     __ popcntq($dst$$Register, $mem$$Address);
6472   %}
6473   ins_pipe(ialu_reg);
6474 %}
6475 
6476 
6477 //----------MemBar Instructions-----------------------------------------------
6478 // Memory barrier flavors
6479 
6480 instruct membar_acquire()
6481 %{
6482   match(MemBarAcquire);
6483   ins_cost(0);
6484 
6485   size(0);
6486   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6487   ins_encode();
6488   ins_pipe(empty);
6489 %}
6490 
6491 instruct membar_acquire_lock()
6492 %{
6493   match(MemBarAcquireLock);
6494   ins_cost(0);
6495 
6496   size(0);
6497   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6498   ins_encode();
6499   ins_pipe(empty);
6500 %}
6501 
6502 instruct membar_release()
6503 %{
6504   match(MemBarRelease);
6505   ins_cost(0);
6506 
6507   size(0);
6508   format %{ "MEMBAR-release ! (empty encoding)" %}
6509   ins_encode();
6510   ins_pipe(empty);
6511 %}
6512 
6513 instruct membar_release_lock()
6514 %{
6515   match(MemBarReleaseLock);
6516   ins_cost(0);
6517 
6518   size(0);
6519   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6520   ins_encode();
6521   ins_pipe(empty);
6522 %}
6523 
6524 instruct membar_volatile(rFlagsReg cr) %{
6525   match(MemBarVolatile);
6526   effect(KILL cr);
6527   ins_cost(400);
6528 
6529   format %{
6530     $$template
6531     if (os::is_MP()) {
6532       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6533     } else {
6534       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6535     }
6536   %}
6537   ins_encode %{
6538     __ membar(Assembler::StoreLoad);
6539   %}
6540   ins_pipe(pipe_slow);
6541 %}
6542 
6543 instruct unnecessary_membar_volatile()
6544 %{
6545   match(MemBarVolatile);
6546   predicate(Matcher::post_store_load_barrier(n));
6547   ins_cost(0);
6548 
6549   size(0);
6550   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6551   ins_encode();
6552   ins_pipe(empty);
6553 %}
6554 
6555 instruct membar_storestore() %{
6556   match(MemBarStoreStore);
6557   ins_cost(0);
6558 
6559   size(0);
6560   format %{ "MEMBAR-storestore (empty encoding)" %}
6561   ins_encode( );
6562   ins_pipe(empty);
6563 %}
6564 
6565 //----------Move Instructions--------------------------------------------------
6566 
6567 instruct castX2P(rRegP dst, rRegL src)
6568 %{
6569   match(Set dst (CastX2P src));
6570 
6571   format %{ "movq    $dst, $src\t# long->ptr" %}
6572   ins_encode %{
6573     if ($dst$$reg != $src$$reg) {
6574       __ movptr($dst$$Register, $src$$Register);
6575     }
6576   %}
6577   ins_pipe(ialu_reg_reg); // XXX
6578 %}
6579 
6580 instruct castP2X(rRegL dst, rRegP src)
6581 %{
6582   match(Set dst (CastP2X src));
6583 
6584   format %{ "movq    $dst, $src\t# ptr -> long" %}
6585   ins_encode %{
6586     if ($dst$$reg != $src$$reg) {
6587       __ movptr($dst$$Register, $src$$Register);
6588     }
6589   %}
6590   ins_pipe(ialu_reg_reg); // XXX
6591 %}
6592 
6593 // Convert oop into int for vectors alignment masking
6594 instruct convP2I(rRegI dst, rRegP src)
6595 %{
6596   match(Set dst (ConvL2I (CastP2X src)));
6597 
6598   format %{ "movl    $dst, $src\t# ptr -> int" %}
6599   ins_encode %{
6600     __ movl($dst$$Register, $src$$Register);
6601   %}
6602   ins_pipe(ialu_reg_reg); // XXX
6603 %}
6604 
6605 // Convert compressed oop into int for vectors alignment masking
6606 // in case of 32bit oops (heap < 4Gb).
6607 instruct convN2I(rRegI dst, rRegN src)
6608 %{
6609   predicate(Universe::narrow_oop_shift() == 0);
6610   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6611 
6612   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6613   ins_encode %{
6614     __ movl($dst$$Register, $src$$Register);
6615   %}
6616   ins_pipe(ialu_reg_reg); // XXX
6617 %}
6618 
6619 // Convert oop pointer into compressed form
6620 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6621   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6622   match(Set dst (EncodeP src));
6623   effect(KILL cr);
6624   format %{ "encode_heap_oop $dst,$src" %}
6625   ins_encode %{
6626     Register s = $src$$Register;
6627     Register d = $dst$$Register;
6628     if (s != d) {
6629       __ movq(d, s);
6630     }
6631     __ encode_heap_oop(d);
6632   %}
6633   ins_pipe(ialu_reg_long);
6634 %}
6635 
6636 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6637   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6638   match(Set dst (EncodeP src));
6639   effect(KILL cr);
6640   format %{ "encode_heap_oop_not_null $dst,$src" %}
6641   ins_encode %{
6642     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6643   %}
6644   ins_pipe(ialu_reg_long);
6645 %}
6646 
6647 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6648   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6649             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6650   match(Set dst (DecodeN src));
6651   effect(KILL cr);
6652   format %{ "decode_heap_oop $dst,$src" %}
6653   ins_encode %{
6654     Register s = $src$$Register;
6655     Register d = $dst$$Register;
6656     if (s != d) {
6657       __ movq(d, s);
6658     }
6659     __ decode_heap_oop(d);
6660   %}
6661   ins_pipe(ialu_reg_long);
6662 %}
6663 
6664 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6665   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6666             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6667   match(Set dst (DecodeN src));
6668   effect(KILL cr);
6669   format %{ "decode_heap_oop_not_null $dst,$src" %}
6670   ins_encode %{
6671     Register s = $src$$Register;
6672     Register d = $dst$$Register;
6673     if (s != d) {
6674       __ decode_heap_oop_not_null(d, s);
6675     } else {
6676       __ decode_heap_oop_not_null(d);
6677     }
6678   %}
6679   ins_pipe(ialu_reg_long);
6680 %}
6681 
6682 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6683   match(Set dst (EncodePKlass src));
6684   effect(KILL cr);
6685   format %{ "encode_heap_oop_not_null $dst,$src" %}
6686   ins_encode %{
6687     __ encode_klass_not_null($dst$$Register, $src$$Register);
6688   %}
6689   ins_pipe(ialu_reg_long);
6690 %}
6691 
6692 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6693   match(Set dst (DecodeNKlass src));
6694   effect(KILL cr);
6695   format %{ "decode_heap_oop_not_null $dst,$src" %}
6696   ins_encode %{
6697     Register s = $src$$Register;
6698     Register d = $dst$$Register;
6699     if (s != d) {
6700       __ decode_klass_not_null(d, s);
6701     } else {
6702       __ decode_klass_not_null(d);
6703     }
6704   %}
6705   ins_pipe(ialu_reg_long);
6706 %}
6707 
6708 
6709 //----------Conditional Move---------------------------------------------------
6710 // Jump
6711 // dummy instruction for generating temp registers
6712 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6713   match(Jump (LShiftL switch_val shift));
6714   ins_cost(350);
6715   predicate(false);
6716   effect(TEMP dest);
6717 
6718   format %{ "leaq    $dest, [$constantaddress]\n\t"
6719             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6720   ins_encode %{
6721     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6722     // to do that and the compiler is using that register as one it can allocate.
6723     // So we build it all by hand.
6724     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6725     // ArrayAddress dispatch(table, index);
6726     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6727     __ lea($dest$$Register, $constantaddress);
6728     __ jmp(dispatch);
6729   %}
6730   ins_pipe(pipe_jmp);
6731 %}
6732 
6733 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6734   match(Jump (AddL (LShiftL switch_val shift) offset));
6735   ins_cost(350);
6736   effect(TEMP dest);
6737 
6738   format %{ "leaq    $dest, [$constantaddress]\n\t"
6739             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6740   ins_encode %{
6741     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6742     // to do that and the compiler is using that register as one it can allocate.
6743     // So we build it all by hand.
6744     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6745     // ArrayAddress dispatch(table, index);
6746     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6747     __ lea($dest$$Register, $constantaddress);
6748     __ jmp(dispatch);
6749   %}
6750   ins_pipe(pipe_jmp);
6751 %}
6752 
6753 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6754   match(Jump switch_val);
6755   ins_cost(350);
6756   effect(TEMP dest);
6757 
6758   format %{ "leaq    $dest, [$constantaddress]\n\t"
6759             "jmp     [$dest + $switch_val]\n\t" %}
6760   ins_encode %{
6761     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6762     // to do that and the compiler is using that register as one it can allocate.
6763     // So we build it all by hand.
6764     // Address index(noreg, switch_reg, Address::times_1);
6765     // ArrayAddress dispatch(table, index);
6766     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6767     __ lea($dest$$Register, $constantaddress);
6768     __ jmp(dispatch);
6769   %}
6770   ins_pipe(pipe_jmp);
6771 %}
6772 
6773 // Conditional move
6774 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6775 %{
6776   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6777 
6778   ins_cost(200); // XXX
6779   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6780   opcode(0x0F, 0x40);
6781   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6782   ins_pipe(pipe_cmov_reg);
6783 %}
6784 
6785 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6786   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6787 
6788   ins_cost(200); // XXX
6789   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6790   opcode(0x0F, 0x40);
6791   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6792   ins_pipe(pipe_cmov_reg);
6793 %}
6794 
6795 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6796   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6797   ins_cost(200);
6798   expand %{
6799     cmovI_regU(cop, cr, dst, src);
6800   %}
6801 %}
6802 
6803 // Conditional move
6804 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6805   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6806 
6807   ins_cost(250); // XXX
6808   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6809   opcode(0x0F, 0x40);
6810   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6811   ins_pipe(pipe_cmov_mem);
6812 %}
6813 
6814 // Conditional move
6815 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6816 %{
6817   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6818 
6819   ins_cost(250); // XXX
6820   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6821   opcode(0x0F, 0x40);
6822   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6823   ins_pipe(pipe_cmov_mem);
6824 %}
6825 
6826 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6827   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6828   ins_cost(250);
6829   expand %{
6830     cmovI_memU(cop, cr, dst, src);
6831   %}
6832 %}
6833 
6834 // Conditional move
6835 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6836 %{
6837   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6838 
6839   ins_cost(200); // XXX
6840   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6841   opcode(0x0F, 0x40);
6842   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6843   ins_pipe(pipe_cmov_reg);
6844 %}
6845 
6846 // Conditional move
6847 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6848 %{
6849   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6850 
6851   ins_cost(200); // XXX
6852   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6853   opcode(0x0F, 0x40);
6854   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6855   ins_pipe(pipe_cmov_reg);
6856 %}
6857 
6858 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6859   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6860   ins_cost(200);
6861   expand %{
6862     cmovN_regU(cop, cr, dst, src);
6863   %}
6864 %}
6865 
6866 // Conditional move
6867 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6868 %{
6869   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6870 
6871   ins_cost(200); // XXX
6872   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6873   opcode(0x0F, 0x40);
6874   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6875   ins_pipe(pipe_cmov_reg);  // XXX
6876 %}
6877 
6878 // Conditional move
6879 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6880 %{
6881   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6882 
6883   ins_cost(200); // XXX
6884   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6885   opcode(0x0F, 0x40);
6886   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6887   ins_pipe(pipe_cmov_reg); // XXX
6888 %}
6889 
6890 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6891   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6892   ins_cost(200);
6893   expand %{
6894     cmovP_regU(cop, cr, dst, src);
6895   %}
6896 %}
6897 
6898 // DISABLED: Requires the ADLC to emit a bottom_type call that
6899 // correctly meets the two pointer arguments; one is an incoming
6900 // register but the other is a memory operand.  ALSO appears to
6901 // be buggy with implicit null checks.
6902 //
6903 //// Conditional move
6904 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6905 //%{
6906 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6907 //  ins_cost(250);
6908 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6909 //  opcode(0x0F,0x40);
6910 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6911 //  ins_pipe( pipe_cmov_mem );
6912 //%}
6913 //
6914 //// Conditional move
6915 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6916 //%{
6917 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6918 //  ins_cost(250);
6919 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6920 //  opcode(0x0F,0x40);
6921 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6922 //  ins_pipe( pipe_cmov_mem );
6923 //%}
6924 
6925 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6926 %{
6927   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6928 
6929   ins_cost(200); // XXX
6930   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6931   opcode(0x0F, 0x40);
6932   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6933   ins_pipe(pipe_cmov_reg);  // XXX
6934 %}
6935 
6936 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6937 %{
6938   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6939 
6940   ins_cost(200); // XXX
6941   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6942   opcode(0x0F, 0x40);
6943   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6944   ins_pipe(pipe_cmov_mem);  // XXX
6945 %}
6946 
6947 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6948 %{
6949   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6950 
6951   ins_cost(200); // XXX
6952   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6953   opcode(0x0F, 0x40);
6954   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6955   ins_pipe(pipe_cmov_reg); // XXX
6956 %}
6957 
6958 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6959   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6960   ins_cost(200);
6961   expand %{
6962     cmovL_regU(cop, cr, dst, src);
6963   %}
6964 %}
6965 
6966 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6967 %{
6968   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6969 
6970   ins_cost(200); // XXX
6971   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6972   opcode(0x0F, 0x40);
6973   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6974   ins_pipe(pipe_cmov_mem); // XXX
6975 %}
6976 
6977 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6978   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6979   ins_cost(200);
6980   expand %{
6981     cmovL_memU(cop, cr, dst, src);
6982   %}
6983 %}
6984 
6985 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6986 %{
6987   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6988 
6989   ins_cost(200); // XXX
6990   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6991             "movss     $dst, $src\n"
6992     "skip:" %}
6993   ins_encode %{
6994     Label Lskip;
6995     // Invert sense of branch from sense of CMOV
6996     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6997     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6998     __ bind(Lskip);
6999   %}
7000   ins_pipe(pipe_slow);
7001 %}
7002 
7003 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7004 // %{
7005 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7006 
7007 //   ins_cost(200); // XXX
7008 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7009 //             "movss     $dst, $src\n"
7010 //     "skip:" %}
7011 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7012 //   ins_pipe(pipe_slow);
7013 // %}
7014 
7015 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7016 %{
7017   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7018 
7019   ins_cost(200); // XXX
7020   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7021             "movss     $dst, $src\n"
7022     "skip:" %}
7023   ins_encode %{
7024     Label Lskip;
7025     // Invert sense of branch from sense of CMOV
7026     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7027     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7028     __ bind(Lskip);
7029   %}
7030   ins_pipe(pipe_slow);
7031 %}
7032 
7033 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7034   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7035   ins_cost(200);
7036   expand %{
7037     cmovF_regU(cop, cr, dst, src);
7038   %}
7039 %}
7040 
7041 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7042 %{
7043   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7044 
7045   ins_cost(200); // XXX
7046   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7047             "movsd     $dst, $src\n"
7048     "skip:" %}
7049   ins_encode %{
7050     Label Lskip;
7051     // Invert sense of branch from sense of CMOV
7052     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7053     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7054     __ bind(Lskip);
7055   %}
7056   ins_pipe(pipe_slow);
7057 %}
7058 
7059 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7060 %{
7061   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7062 
7063   ins_cost(200); // XXX
7064   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7065             "movsd     $dst, $src\n"
7066     "skip:" %}
7067   ins_encode %{
7068     Label Lskip;
7069     // Invert sense of branch from sense of CMOV
7070     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7071     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7072     __ bind(Lskip);
7073   %}
7074   ins_pipe(pipe_slow);
7075 %}
7076 
7077 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7078   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7079   ins_cost(200);
7080   expand %{
7081     cmovD_regU(cop, cr, dst, src);
7082   %}
7083 %}
7084 
7085 //----------Arithmetic Instructions--------------------------------------------
7086 //----------Addition Instructions----------------------------------------------
7087 
7088 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7089 %{
7090   match(Set dst (AddI dst src));
7091   effect(KILL cr);
7092 
7093   format %{ "addl    $dst, $src\t# int" %}
7094   opcode(0x03);
7095   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7096   ins_pipe(ialu_reg_reg);
7097 %}
7098 
7099 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7100 %{
7101   match(Set dst (AddI dst src));
7102   effect(KILL cr);
7103 
7104   format %{ "addl    $dst, $src\t# int" %}
7105   opcode(0x81, 0x00); /* /0 id */
7106   ins_encode(OpcSErm(dst, src), Con8or32(src));
7107   ins_pipe( ialu_reg );
7108 %}
7109 
7110 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7111 %{
7112   match(Set dst (AddI dst (LoadI src)));
7113   effect(KILL cr);
7114 
7115   ins_cost(125); // XXX
7116   format %{ "addl    $dst, $src\t# int" %}
7117   opcode(0x03);
7118   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7119   ins_pipe(ialu_reg_mem);
7120 %}
7121 
7122 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7123 %{
7124   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7125   effect(KILL cr);
7126 
7127   ins_cost(150); // XXX
7128   format %{ "addl    $dst, $src\t# int" %}
7129   opcode(0x01); /* Opcode 01 /r */
7130   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7131   ins_pipe(ialu_mem_reg);
7132 %}
7133 
7134 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7135 %{
7136   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7137   effect(KILL cr);
7138 
7139   ins_cost(125); // XXX
7140   format %{ "addl    $dst, $src\t# int" %}
7141   opcode(0x81); /* Opcode 81 /0 id */
7142   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7143   ins_pipe(ialu_mem_imm);
7144 %}
7145 
7146 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7147 %{
7148   predicate(UseIncDec);
7149   match(Set dst (AddI dst src));
7150   effect(KILL cr);
7151 
7152   format %{ "incl    $dst\t# int" %}
7153   opcode(0xFF, 0x00); // FF /0
7154   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7155   ins_pipe(ialu_reg);
7156 %}
7157 
7158 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7159 %{
7160   predicate(UseIncDec);
7161   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7162   effect(KILL cr);
7163 
7164   ins_cost(125); // XXX
7165   format %{ "incl    $dst\t# int" %}
7166   opcode(0xFF); /* Opcode FF /0 */
7167   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7168   ins_pipe(ialu_mem_imm);
7169 %}
7170 
7171 // XXX why does that use AddI
7172 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7173 %{
7174   predicate(UseIncDec);
7175   match(Set dst (AddI dst src));
7176   effect(KILL cr);
7177 
7178   format %{ "decl    $dst\t# int" %}
7179   opcode(0xFF, 0x01); // FF /1
7180   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7181   ins_pipe(ialu_reg);
7182 %}
7183 
7184 // XXX why does that use AddI
7185 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7186 %{
7187   predicate(UseIncDec);
7188   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7189   effect(KILL cr);
7190 
7191   ins_cost(125); // XXX
7192   format %{ "decl    $dst\t# int" %}
7193   opcode(0xFF); /* Opcode FF /1 */
7194   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7195   ins_pipe(ialu_mem_imm);
7196 %}
7197 
7198 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7199 %{
7200   match(Set dst (AddI src0 src1));
7201 
7202   ins_cost(110);
7203   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7204   opcode(0x8D); /* 0x8D /r */
7205   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7206   ins_pipe(ialu_reg_reg);
7207 %}
7208 
7209 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7210 %{
7211   match(Set dst (AddL dst src));
7212   effect(KILL cr);
7213 
7214   format %{ "addq    $dst, $src\t# long" %}
7215   opcode(0x03);
7216   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7217   ins_pipe(ialu_reg_reg);
7218 %}
7219 
7220 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7221 %{
7222   match(Set dst (AddL dst src));
7223   effect(KILL cr);
7224 
7225   format %{ "addq    $dst, $src\t# long" %}
7226   opcode(0x81, 0x00); /* /0 id */
7227   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7228   ins_pipe( ialu_reg );
7229 %}
7230 
7231 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7232 %{
7233   match(Set dst (AddL dst (LoadL src)));
7234   effect(KILL cr);
7235 
7236   ins_cost(125); // XXX
7237   format %{ "addq    $dst, $src\t# long" %}
7238   opcode(0x03);
7239   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7240   ins_pipe(ialu_reg_mem);
7241 %}
7242 
7243 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7244 %{
7245   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7246   effect(KILL cr);
7247 
7248   ins_cost(150); // XXX
7249   format %{ "addq    $dst, $src\t# long" %}
7250   opcode(0x01); /* Opcode 01 /r */
7251   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7252   ins_pipe(ialu_mem_reg);
7253 %}
7254 
7255 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7256 %{
7257   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7258   effect(KILL cr);
7259 
7260   ins_cost(125); // XXX
7261   format %{ "addq    $dst, $src\t# long" %}
7262   opcode(0x81); /* Opcode 81 /0 id */
7263   ins_encode(REX_mem_wide(dst),
7264              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7265   ins_pipe(ialu_mem_imm);
7266 %}
7267 
7268 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7269 %{
7270   predicate(UseIncDec);
7271   match(Set dst (AddL dst src));
7272   effect(KILL cr);
7273 
7274   format %{ "incq    $dst\t# long" %}
7275   opcode(0xFF, 0x00); // FF /0
7276   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7277   ins_pipe(ialu_reg);
7278 %}
7279 
7280 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7281 %{
7282   predicate(UseIncDec);
7283   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7284   effect(KILL cr);
7285 
7286   ins_cost(125); // XXX
7287   format %{ "incq    $dst\t# long" %}
7288   opcode(0xFF); /* Opcode FF /0 */
7289   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7290   ins_pipe(ialu_mem_imm);
7291 %}
7292 
7293 // XXX why does that use AddL
7294 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7295 %{
7296   predicate(UseIncDec);
7297   match(Set dst (AddL dst src));
7298   effect(KILL cr);
7299 
7300   format %{ "decq    $dst\t# long" %}
7301   opcode(0xFF, 0x01); // FF /1
7302   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7303   ins_pipe(ialu_reg);
7304 %}
7305 
7306 // XXX why does that use AddL
7307 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7308 %{
7309   predicate(UseIncDec);
7310   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7311   effect(KILL cr);
7312 
7313   ins_cost(125); // XXX
7314   format %{ "decq    $dst\t# long" %}
7315   opcode(0xFF); /* Opcode FF /1 */
7316   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7317   ins_pipe(ialu_mem_imm);
7318 %}
7319 
7320 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7321 %{
7322   match(Set dst (AddL src0 src1));
7323 
7324   ins_cost(110);
7325   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7326   opcode(0x8D); /* 0x8D /r */
7327   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7328   ins_pipe(ialu_reg_reg);
7329 %}
7330 
7331 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7332 %{
7333   match(Set dst (AddP dst src));
7334   effect(KILL cr);
7335 
7336   format %{ "addq    $dst, $src\t# ptr" %}
7337   opcode(0x03);
7338   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7339   ins_pipe(ialu_reg_reg);
7340 %}
7341 
7342 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7343 %{
7344   match(Set dst (AddP dst src));
7345   effect(KILL cr);
7346 
7347   format %{ "addq    $dst, $src\t# ptr" %}
7348   opcode(0x81, 0x00); /* /0 id */
7349   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7350   ins_pipe( ialu_reg );
7351 %}
7352 
7353 // XXX addP mem ops ????
7354 
7355 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7356 %{
7357   match(Set dst (AddP src0 src1));
7358 
7359   ins_cost(110);
7360   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7361   opcode(0x8D); /* 0x8D /r */
7362   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7363   ins_pipe(ialu_reg_reg);
7364 %}
7365 
7366 instruct checkCastPP(rRegP dst)
7367 %{
7368   match(Set dst (CheckCastPP dst));
7369 
7370   size(0);
7371   format %{ "# checkcastPP of $dst" %}
7372   ins_encode(/* empty encoding */);
7373   ins_pipe(empty);
7374 %}
7375 
7376 instruct castPP(rRegP dst)
7377 %{
7378   match(Set dst (CastPP dst));
7379 
7380   size(0);
7381   format %{ "# castPP of $dst" %}
7382   ins_encode(/* empty encoding */);
7383   ins_pipe(empty);
7384 %}
7385 
7386 instruct castII(rRegI dst)
7387 %{
7388   match(Set dst (CastII dst));
7389 
7390   size(0);
7391   format %{ "# castII of $dst" %}
7392   ins_encode(/* empty encoding */);
7393   ins_cost(0);
7394   ins_pipe(empty);
7395 %}
7396 
7397 // LoadP-locked same as a regular LoadP when used with compare-swap
7398 instruct loadPLocked(rRegP dst, memory mem)
7399 %{
7400   match(Set dst (LoadPLocked mem));
7401 
7402   ins_cost(125); // XXX
7403   format %{ "movq    $dst, $mem\t# ptr locked" %}
7404   opcode(0x8B);
7405   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7406   ins_pipe(ialu_reg_mem); // XXX
7407 %}
7408 
7409 // Conditional-store of the updated heap-top.
7410 // Used during allocation of the shared heap.
7411 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7412 
7413 instruct storePConditional(memory heap_top_ptr,
7414                            rax_RegP oldval, rRegP newval,
7415                            rFlagsReg cr)
7416 %{
7417   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7418 
7419   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7420             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7421   opcode(0x0F, 0xB1);
7422   ins_encode(lock_prefix,
7423              REX_reg_mem_wide(newval, heap_top_ptr),
7424              OpcP, OpcS,
7425              reg_mem(newval, heap_top_ptr));
7426   ins_pipe(pipe_cmpxchg);
7427 %}
7428 
7429 // Conditional-store of an int value.
7430 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7431 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7432 %{
7433   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7434   effect(KILL oldval);
7435 
7436   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7437   opcode(0x0F, 0xB1);
7438   ins_encode(lock_prefix,
7439              REX_reg_mem(newval, mem),
7440              OpcP, OpcS,
7441              reg_mem(newval, mem));
7442   ins_pipe(pipe_cmpxchg);
7443 %}
7444 
7445 // Conditional-store of a long value.
7446 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7447 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7448 %{
7449   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7450   effect(KILL oldval);
7451 
7452   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7453   opcode(0x0F, 0xB1);
7454   ins_encode(lock_prefix,
7455              REX_reg_mem_wide(newval, mem),
7456              OpcP, OpcS,
7457              reg_mem(newval, mem));
7458   ins_pipe(pipe_cmpxchg);
7459 %}
7460 
7461 
7462 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7463 instruct compareAndSwapP(rRegI res,
7464                          memory mem_ptr,
7465                          rax_RegP oldval, rRegP newval,
7466                          rFlagsReg cr)
7467 %{
7468   predicate(VM_Version::supports_cx8());
7469   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7470   effect(KILL cr, KILL oldval);
7471 
7472   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7473             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7474             "sete    $res\n\t"
7475             "movzbl  $res, $res" %}
7476   opcode(0x0F, 0xB1);
7477   ins_encode(lock_prefix,
7478              REX_reg_mem_wide(newval, mem_ptr),
7479              OpcP, OpcS,
7480              reg_mem(newval, mem_ptr),
7481              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7482              REX_reg_breg(res, res), // movzbl
7483              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct compareAndSwapL(rRegI res,
7488                          memory mem_ptr,
7489                          rax_RegL oldval, rRegL newval,
7490                          rFlagsReg cr)
7491 %{
7492   predicate(VM_Version::supports_cx8());
7493   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7494   effect(KILL cr, KILL oldval);
7495 
7496   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7497             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7498             "sete    $res\n\t"
7499             "movzbl  $res, $res" %}
7500   opcode(0x0F, 0xB1);
7501   ins_encode(lock_prefix,
7502              REX_reg_mem_wide(newval, mem_ptr),
7503              OpcP, OpcS,
7504              reg_mem(newval, mem_ptr),
7505              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7506              REX_reg_breg(res, res), // movzbl
7507              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7508   ins_pipe( pipe_cmpxchg );
7509 %}
7510 
7511 instruct compareAndSwapI(rRegI res,
7512                          memory mem_ptr,
7513                          rax_RegI oldval, rRegI newval,
7514                          rFlagsReg cr)
7515 %{
7516   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7517   effect(KILL cr, KILL oldval);
7518 
7519   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7520             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7521             "sete    $res\n\t"
7522             "movzbl  $res, $res" %}
7523   opcode(0x0F, 0xB1);
7524   ins_encode(lock_prefix,
7525              REX_reg_mem(newval, mem_ptr),
7526              OpcP, OpcS,
7527              reg_mem(newval, mem_ptr),
7528              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7529              REX_reg_breg(res, res), // movzbl
7530              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7531   ins_pipe( pipe_cmpxchg );
7532 %}
7533 
7534 
7535 instruct compareAndSwapN(rRegI res,
7536                           memory mem_ptr,
7537                           rax_RegN oldval, rRegN newval,
7538                           rFlagsReg cr) %{
7539   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7540   effect(KILL cr, KILL oldval);
7541 
7542   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7543             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7544             "sete    $res\n\t"
7545             "movzbl  $res, $res" %}
7546   opcode(0x0F, 0xB1);
7547   ins_encode(lock_prefix,
7548              REX_reg_mem(newval, mem_ptr),
7549              OpcP, OpcS,
7550              reg_mem(newval, mem_ptr),
7551              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7552              REX_reg_breg(res, res), // movzbl
7553              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7554   ins_pipe( pipe_cmpxchg );
7555 %}
7556 
7557 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7558   predicate(n->as_LoadStore()->result_not_used());
7559   match(Set dummy (GetAndAddI mem add));
7560   effect(KILL cr);
7561   format %{ "ADDL  [$mem],$add" %}
7562   ins_encode %{
7563     if (os::is_MP()) { __ lock(); }
7564     __ addl($mem$$Address, $add$$constant);
7565   %}
7566   ins_pipe( pipe_cmpxchg );
7567 %}
7568 
7569 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7570   match(Set newval (GetAndAddI mem newval));
7571   effect(KILL cr);
7572   format %{ "XADDL  [$mem],$newval" %}
7573   ins_encode %{
7574     if (os::is_MP()) { __ lock(); }
7575     __ xaddl($mem$$Address, $newval$$Register);
7576   %}
7577   ins_pipe( pipe_cmpxchg );
7578 %}
7579 
7580 instruct xaddL_no_res( memory mem, Universe dummy, immL add, rFlagsReg cr) %{
7581   predicate(n->as_LoadStore()->result_not_used());
7582   match(Set dummy (GetAndAddL mem add));
7583   effect(KILL cr);
7584   format %{ "ADDQ  [$mem],$add" %}
7585   ins_encode %{
7586     if (os::is_MP()) { __ lock(); }
7587     __ addq($mem$$Address, $add$$constant);
7588   %}
7589   ins_pipe( pipe_cmpxchg );
7590 %}
7591 
7592 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7593   match(Set newval (GetAndAddL mem newval));
7594   effect(KILL cr);
7595   format %{ "XADDQ  [$mem],$newval" %}
7596   ins_encode %{
7597     if (os::is_MP()) { __ lock(); }
7598     __ xaddq($mem$$Address, $newval$$Register);
7599   %}
7600   ins_pipe( pipe_cmpxchg );
7601 %}
7602 
7603 instruct xchgI( memory mem, rRegI newval) %{
7604   match(Set newval (GetAndSetI mem newval));
7605   format %{ "XCHGL  $newval,[$mem]" %}
7606   ins_encode %{
7607     __ xchgl($newval$$Register, $mem$$Address);
7608   %}
7609   ins_pipe( pipe_cmpxchg );
7610 %}
7611 
7612 instruct xchgL( memory mem, rRegL newval) %{
7613   match(Set newval (GetAndSetL mem newval));
7614   format %{ "XCHGL  $newval,[$mem]" %}
7615   ins_encode %{
7616     __ xchgq($newval$$Register, $mem$$Address);
7617   %}
7618   ins_pipe( pipe_cmpxchg );
7619 %}
7620 
7621 instruct xchgP( memory mem, rRegP newval) %{
7622   match(Set newval (GetAndSetP mem newval));
7623   format %{ "XCHGQ  $newval,[$mem]" %}
7624   ins_encode %{
7625     __ xchgq($newval$$Register, $mem$$Address);
7626   %}
7627   ins_pipe( pipe_cmpxchg );
7628 %}
7629 
7630 instruct xchgN( memory mem, rRegN newval) %{
7631   match(Set newval (GetAndSetN mem newval));
7632   format %{ "XCHGL  $newval,$mem]" %}
7633   ins_encode %{
7634     __ xchgl($newval$$Register, $mem$$Address);
7635   %}
7636   ins_pipe( pipe_cmpxchg );
7637 %}
7638 
7639 //----------Subtraction Instructions-------------------------------------------
7640 
7641 // Integer Subtraction Instructions
7642 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7643 %{
7644   match(Set dst (SubI dst src));
7645   effect(KILL cr);
7646 
7647   format %{ "subl    $dst, $src\t# int" %}
7648   opcode(0x2B);
7649   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7650   ins_pipe(ialu_reg_reg);
7651 %}
7652 
7653 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7654 %{
7655   match(Set dst (SubI dst src));
7656   effect(KILL cr);
7657 
7658   format %{ "subl    $dst, $src\t# int" %}
7659   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7660   ins_encode(OpcSErm(dst, src), Con8or32(src));
7661   ins_pipe(ialu_reg);
7662 %}
7663 
7664 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7665 %{
7666   match(Set dst (SubI dst (LoadI src)));
7667   effect(KILL cr);
7668 
7669   ins_cost(125);
7670   format %{ "subl    $dst, $src\t# int" %}
7671   opcode(0x2B);
7672   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7673   ins_pipe(ialu_reg_mem);
7674 %}
7675 
7676 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7677 %{
7678   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7679   effect(KILL cr);
7680 
7681   ins_cost(150);
7682   format %{ "subl    $dst, $src\t# int" %}
7683   opcode(0x29); /* Opcode 29 /r */
7684   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7685   ins_pipe(ialu_mem_reg);
7686 %}
7687 
7688 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7689 %{
7690   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7691   effect(KILL cr);
7692 
7693   ins_cost(125); // XXX
7694   format %{ "subl    $dst, $src\t# int" %}
7695   opcode(0x81); /* Opcode 81 /5 id */
7696   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7697   ins_pipe(ialu_mem_imm);
7698 %}
7699 
7700 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7701 %{
7702   match(Set dst (SubL dst src));
7703   effect(KILL cr);
7704 
7705   format %{ "subq    $dst, $src\t# long" %}
7706   opcode(0x2B);
7707   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7708   ins_pipe(ialu_reg_reg);
7709 %}
7710 
7711 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7712 %{
7713   match(Set dst (SubL dst src));
7714   effect(KILL cr);
7715 
7716   format %{ "subq    $dst, $src\t# long" %}
7717   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7718   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7719   ins_pipe(ialu_reg);
7720 %}
7721 
7722 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7723 %{
7724   match(Set dst (SubL dst (LoadL src)));
7725   effect(KILL cr);
7726 
7727   ins_cost(125);
7728   format %{ "subq    $dst, $src\t# long" %}
7729   opcode(0x2B);
7730   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7731   ins_pipe(ialu_reg_mem);
7732 %}
7733 
7734 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7735 %{
7736   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7737   effect(KILL cr);
7738 
7739   ins_cost(150);
7740   format %{ "subq    $dst, $src\t# long" %}
7741   opcode(0x29); /* Opcode 29 /r */
7742   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7743   ins_pipe(ialu_mem_reg);
7744 %}
7745 
7746 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7747 %{
7748   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7749   effect(KILL cr);
7750 
7751   ins_cost(125); // XXX
7752   format %{ "subq    $dst, $src\t# long" %}
7753   opcode(0x81); /* Opcode 81 /5 id */
7754   ins_encode(REX_mem_wide(dst),
7755              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7756   ins_pipe(ialu_mem_imm);
7757 %}
7758 
7759 // Subtract from a pointer
7760 // XXX hmpf???
7761 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7762 %{
7763   match(Set dst (AddP dst (SubI zero src)));
7764   effect(KILL cr);
7765 
7766   format %{ "subq    $dst, $src\t# ptr - int" %}
7767   opcode(0x2B);
7768   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7769   ins_pipe(ialu_reg_reg);
7770 %}
7771 
7772 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7773 %{
7774   match(Set dst (SubI zero dst));
7775   effect(KILL cr);
7776 
7777   format %{ "negl    $dst\t# int" %}
7778   opcode(0xF7, 0x03);  // Opcode F7 /3
7779   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7780   ins_pipe(ialu_reg);
7781 %}
7782 
7783 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7784 %{
7785   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7786   effect(KILL cr);
7787 
7788   format %{ "negl    $dst\t# int" %}
7789   opcode(0xF7, 0x03);  // Opcode F7 /3
7790   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7791   ins_pipe(ialu_reg);
7792 %}
7793 
7794 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7795 %{
7796   match(Set dst (SubL zero dst));
7797   effect(KILL cr);
7798 
7799   format %{ "negq    $dst\t# long" %}
7800   opcode(0xF7, 0x03);  // Opcode F7 /3
7801   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7802   ins_pipe(ialu_reg);
7803 %}
7804 
7805 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7806 %{
7807   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7808   effect(KILL cr);
7809 
7810   format %{ "negq    $dst\t# long" %}
7811   opcode(0xF7, 0x03);  // Opcode F7 /3
7812   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7813   ins_pipe(ialu_reg);
7814 %}
7815 
7816 
7817 //----------Multiplication/Division Instructions-------------------------------
7818 // Integer Multiplication Instructions
7819 // Multiply Register
7820 
7821 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7822 %{
7823   match(Set dst (MulI dst src));
7824   effect(KILL cr);
7825 
7826   ins_cost(300);
7827   format %{ "imull   $dst, $src\t# int" %}
7828   opcode(0x0F, 0xAF);
7829   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7830   ins_pipe(ialu_reg_reg_alu0);
7831 %}
7832 
7833 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7834 %{
7835   match(Set dst (MulI src imm));
7836   effect(KILL cr);
7837 
7838   ins_cost(300);
7839   format %{ "imull   $dst, $src, $imm\t# int" %}
7840   opcode(0x69); /* 69 /r id */
7841   ins_encode(REX_reg_reg(dst, src),
7842              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7843   ins_pipe(ialu_reg_reg_alu0);
7844 %}
7845 
7846 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7847 %{
7848   match(Set dst (MulI dst (LoadI src)));
7849   effect(KILL cr);
7850 
7851   ins_cost(350);
7852   format %{ "imull   $dst, $src\t# int" %}
7853   opcode(0x0F, 0xAF);
7854   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7855   ins_pipe(ialu_reg_mem_alu0);
7856 %}
7857 
7858 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7859 %{
7860   match(Set dst (MulI (LoadI src) imm));
7861   effect(KILL cr);
7862 
7863   ins_cost(300);
7864   format %{ "imull   $dst, $src, $imm\t# int" %}
7865   opcode(0x69); /* 69 /r id */
7866   ins_encode(REX_reg_mem(dst, src),
7867              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7868   ins_pipe(ialu_reg_mem_alu0);
7869 %}
7870 
7871 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7872 %{
7873   match(Set dst (MulL dst src));
7874   effect(KILL cr);
7875 
7876   ins_cost(300);
7877   format %{ "imulq   $dst, $src\t# long" %}
7878   opcode(0x0F, 0xAF);
7879   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7880   ins_pipe(ialu_reg_reg_alu0);
7881 %}
7882 
7883 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7884 %{
7885   match(Set dst (MulL src imm));
7886   effect(KILL cr);
7887 
7888   ins_cost(300);
7889   format %{ "imulq   $dst, $src, $imm\t# long" %}
7890   opcode(0x69); /* 69 /r id */
7891   ins_encode(REX_reg_reg_wide(dst, src),
7892              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7893   ins_pipe(ialu_reg_reg_alu0);
7894 %}
7895 
7896 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7897 %{
7898   match(Set dst (MulL dst (LoadL src)));
7899   effect(KILL cr);
7900 
7901   ins_cost(350);
7902   format %{ "imulq   $dst, $src\t# long" %}
7903   opcode(0x0F, 0xAF);
7904   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7905   ins_pipe(ialu_reg_mem_alu0);
7906 %}
7907 
7908 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7909 %{
7910   match(Set dst (MulL (LoadL src) imm));
7911   effect(KILL cr);
7912 
7913   ins_cost(300);
7914   format %{ "imulq   $dst, $src, $imm\t# long" %}
7915   opcode(0x69); /* 69 /r id */
7916   ins_encode(REX_reg_mem_wide(dst, src),
7917              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7918   ins_pipe(ialu_reg_mem_alu0);
7919 %}
7920 
7921 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7922 %{
7923   match(Set dst (MulHiL src rax));
7924   effect(USE_KILL rax, KILL cr);
7925 
7926   ins_cost(300);
7927   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
7928   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7929   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7930   ins_pipe(ialu_reg_reg_alu0);
7931 %}
7932 
7933 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7934                    rFlagsReg cr)
7935 %{
7936   match(Set rax (DivI rax div));
7937   effect(KILL rdx, KILL cr);
7938 
7939   ins_cost(30*100+10*100); // XXX
7940   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7941             "jne,s   normal\n\t"
7942             "xorl    rdx, rdx\n\t"
7943             "cmpl    $div, -1\n\t"
7944             "je,s    done\n"
7945     "normal: cdql\n\t"
7946             "idivl   $div\n"
7947     "done:"        %}
7948   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7949   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7950   ins_pipe(ialu_reg_reg_alu0);
7951 %}
7952 
7953 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7954                    rFlagsReg cr)
7955 %{
7956   match(Set rax (DivL rax div));
7957   effect(KILL rdx, KILL cr);
7958 
7959   ins_cost(30*100+10*100); // XXX
7960   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7961             "cmpq    rax, rdx\n\t"
7962             "jne,s   normal\n\t"
7963             "xorl    rdx, rdx\n\t"
7964             "cmpq    $div, -1\n\t"
7965             "je,s    done\n"
7966     "normal: cdqq\n\t"
7967             "idivq   $div\n"
7968     "done:"        %}
7969   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7970   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7971   ins_pipe(ialu_reg_reg_alu0);
7972 %}
7973 
7974 // Integer DIVMOD with Register, both quotient and mod results
7975 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7976                              rFlagsReg cr)
7977 %{
7978   match(DivModI rax div);
7979   effect(KILL cr);
7980 
7981   ins_cost(30*100+10*100); // XXX
7982   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7983             "jne,s   normal\n\t"
7984             "xorl    rdx, rdx\n\t"
7985             "cmpl    $div, -1\n\t"
7986             "je,s    done\n"
7987     "normal: cdql\n\t"
7988             "idivl   $div\n"
7989     "done:"        %}
7990   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7991   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7992   ins_pipe(pipe_slow);
7993 %}
7994 
7995 // Long DIVMOD with Register, both quotient and mod results
7996 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7997                              rFlagsReg cr)
7998 %{
7999   match(DivModL rax div);
8000   effect(KILL cr);
8001 
8002   ins_cost(30*100+10*100); // XXX
8003   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8004             "cmpq    rax, rdx\n\t"
8005             "jne,s   normal\n\t"
8006             "xorl    rdx, rdx\n\t"
8007             "cmpq    $div, -1\n\t"
8008             "je,s    done\n"
8009     "normal: cdqq\n\t"
8010             "idivq   $div\n"
8011     "done:"        %}
8012   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8013   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8014   ins_pipe(pipe_slow);
8015 %}
8016 
8017 //----------- DivL-By-Constant-Expansions--------------------------------------
8018 // DivI cases are handled by the compiler
8019 
8020 // Magic constant, reciprocal of 10
8021 instruct loadConL_0x6666666666666667(rRegL dst)
8022 %{
8023   effect(DEF dst);
8024 
8025   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8026   ins_encode(load_immL(dst, 0x6666666666666667));
8027   ins_pipe(ialu_reg);
8028 %}
8029 
8030 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8031 %{
8032   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8033 
8034   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8035   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8036   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8037   ins_pipe(ialu_reg_reg_alu0);
8038 %}
8039 
8040 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8041 %{
8042   effect(USE_DEF dst, KILL cr);
8043 
8044   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8045   opcode(0xC1, 0x7); /* C1 /7 ib */
8046   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8047   ins_pipe(ialu_reg);
8048 %}
8049 
8050 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8051 %{
8052   effect(USE_DEF dst, KILL cr);
8053 
8054   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8055   opcode(0xC1, 0x7); /* C1 /7 ib */
8056   ins_encode(reg_opc_imm_wide(dst, 0x2));
8057   ins_pipe(ialu_reg);
8058 %}
8059 
8060 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8061 %{
8062   match(Set dst (DivL src div));
8063 
8064   ins_cost((5+8)*100);
8065   expand %{
8066     rax_RegL rax;                     // Killed temp
8067     rFlagsReg cr;                     // Killed
8068     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8069     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8070     sarL_rReg_63(src, cr);            // sarq  src, 63
8071     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8072     subL_rReg(dst, src, cr);          // subl  rdx, src
8073   %}
8074 %}
8075 
8076 //-----------------------------------------------------------------------------
8077 
8078 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8079                    rFlagsReg cr)
8080 %{
8081   match(Set rdx (ModI rax div));
8082   effect(KILL rax, KILL cr);
8083 
8084   ins_cost(300); // XXX
8085   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8086             "jne,s   normal\n\t"
8087             "xorl    rdx, rdx\n\t"
8088             "cmpl    $div, -1\n\t"
8089             "je,s    done\n"
8090     "normal: cdql\n\t"
8091             "idivl   $div\n"
8092     "done:"        %}
8093   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8094   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8095   ins_pipe(ialu_reg_reg_alu0);
8096 %}
8097 
8098 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8099                    rFlagsReg cr)
8100 %{
8101   match(Set rdx (ModL rax div));
8102   effect(KILL rax, KILL cr);
8103 
8104   ins_cost(300); // XXX
8105   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8106             "cmpq    rax, rdx\n\t"
8107             "jne,s   normal\n\t"
8108             "xorl    rdx, rdx\n\t"
8109             "cmpq    $div, -1\n\t"
8110             "je,s    done\n"
8111     "normal: cdqq\n\t"
8112             "idivq   $div\n"
8113     "done:"        %}
8114   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8115   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8116   ins_pipe(ialu_reg_reg_alu0);
8117 %}
8118 
8119 // Integer Shift Instructions
8120 // Shift Left by one
8121 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8122 %{
8123   match(Set dst (LShiftI dst shift));
8124   effect(KILL cr);
8125 
8126   format %{ "sall    $dst, $shift" %}
8127   opcode(0xD1, 0x4); /* D1 /4 */
8128   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8129   ins_pipe(ialu_reg);
8130 %}
8131 
8132 // Shift Left by one
8133 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8134 %{
8135   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8136   effect(KILL cr);
8137 
8138   format %{ "sall    $dst, $shift\t" %}
8139   opcode(0xD1, 0x4); /* D1 /4 */
8140   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8141   ins_pipe(ialu_mem_imm);
8142 %}
8143 
8144 // Shift Left by 8-bit immediate
8145 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8146 %{
8147   match(Set dst (LShiftI dst shift));
8148   effect(KILL cr);
8149 
8150   format %{ "sall    $dst, $shift" %}
8151   opcode(0xC1, 0x4); /* C1 /4 ib */
8152   ins_encode(reg_opc_imm(dst, shift));
8153   ins_pipe(ialu_reg);
8154 %}
8155 
8156 // Shift Left by 8-bit immediate
8157 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8158 %{
8159   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8160   effect(KILL cr);
8161 
8162   format %{ "sall    $dst, $shift" %}
8163   opcode(0xC1, 0x4); /* C1 /4 ib */
8164   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8165   ins_pipe(ialu_mem_imm);
8166 %}
8167 
8168 // Shift Left by variable
8169 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8170 %{
8171   match(Set dst (LShiftI dst shift));
8172   effect(KILL cr);
8173 
8174   format %{ "sall    $dst, $shift" %}
8175   opcode(0xD3, 0x4); /* D3 /4 */
8176   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8177   ins_pipe(ialu_reg_reg);
8178 %}
8179 
8180 // Shift Left by variable
8181 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8182 %{
8183   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8184   effect(KILL cr);
8185 
8186   format %{ "sall    $dst, $shift" %}
8187   opcode(0xD3, 0x4); /* D3 /4 */
8188   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8189   ins_pipe(ialu_mem_reg);
8190 %}
8191 
8192 // Arithmetic shift right by one
8193 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8194 %{
8195   match(Set dst (RShiftI dst shift));
8196   effect(KILL cr);
8197 
8198   format %{ "sarl    $dst, $shift" %}
8199   opcode(0xD1, 0x7); /* D1 /7 */
8200   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8201   ins_pipe(ialu_reg);
8202 %}
8203 
8204 // Arithmetic shift right by one
8205 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8206 %{
8207   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8208   effect(KILL cr);
8209 
8210   format %{ "sarl    $dst, $shift" %}
8211   opcode(0xD1, 0x7); /* D1 /7 */
8212   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8213   ins_pipe(ialu_mem_imm);
8214 %}
8215 
8216 // Arithmetic Shift Right by 8-bit immediate
8217 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8218 %{
8219   match(Set dst (RShiftI dst shift));
8220   effect(KILL cr);
8221 
8222   format %{ "sarl    $dst, $shift" %}
8223   opcode(0xC1, 0x7); /* C1 /7 ib */
8224   ins_encode(reg_opc_imm(dst, shift));
8225   ins_pipe(ialu_mem_imm);
8226 %}
8227 
8228 // Arithmetic Shift Right by 8-bit immediate
8229 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8230 %{
8231   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8232   effect(KILL cr);
8233 
8234   format %{ "sarl    $dst, $shift" %}
8235   opcode(0xC1, 0x7); /* C1 /7 ib */
8236   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8237   ins_pipe(ialu_mem_imm);
8238 %}
8239 
8240 // Arithmetic Shift Right by variable
8241 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8242 %{
8243   match(Set dst (RShiftI dst shift));
8244   effect(KILL cr);
8245 
8246   format %{ "sarl    $dst, $shift" %}
8247   opcode(0xD3, 0x7); /* D3 /7 */
8248   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8249   ins_pipe(ialu_reg_reg);
8250 %}
8251 
8252 // Arithmetic Shift Right by variable
8253 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8254 %{
8255   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8256   effect(KILL cr);
8257 
8258   format %{ "sarl    $dst, $shift" %}
8259   opcode(0xD3, 0x7); /* D3 /7 */
8260   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8261   ins_pipe(ialu_mem_reg);
8262 %}
8263 
8264 // Logical shift right by one
8265 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8266 %{
8267   match(Set dst (URShiftI dst shift));
8268   effect(KILL cr);
8269 
8270   format %{ "shrl    $dst, $shift" %}
8271   opcode(0xD1, 0x5); /* D1 /5 */
8272   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8273   ins_pipe(ialu_reg);
8274 %}
8275 
8276 // Logical shift right by one
8277 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8278 %{
8279   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8280   effect(KILL cr);
8281 
8282   format %{ "shrl    $dst, $shift" %}
8283   opcode(0xD1, 0x5); /* D1 /5 */
8284   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8285   ins_pipe(ialu_mem_imm);
8286 %}
8287 
8288 // Logical Shift Right by 8-bit immediate
8289 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8290 %{
8291   match(Set dst (URShiftI dst shift));
8292   effect(KILL cr);
8293 
8294   format %{ "shrl    $dst, $shift" %}
8295   opcode(0xC1, 0x5); /* C1 /5 ib */
8296   ins_encode(reg_opc_imm(dst, shift));
8297   ins_pipe(ialu_reg);
8298 %}
8299 
8300 // Logical Shift Right by 8-bit immediate
8301 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8302 %{
8303   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8304   effect(KILL cr);
8305 
8306   format %{ "shrl    $dst, $shift" %}
8307   opcode(0xC1, 0x5); /* C1 /5 ib */
8308   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8309   ins_pipe(ialu_mem_imm);
8310 %}
8311 
8312 // Logical Shift Right by variable
8313 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8314 %{
8315   match(Set dst (URShiftI dst shift));
8316   effect(KILL cr);
8317 
8318   format %{ "shrl    $dst, $shift" %}
8319   opcode(0xD3, 0x5); /* D3 /5 */
8320   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8321   ins_pipe(ialu_reg_reg);
8322 %}
8323 
8324 // Logical Shift Right by variable
8325 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8326 %{
8327   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8328   effect(KILL cr);
8329 
8330   format %{ "shrl    $dst, $shift" %}
8331   opcode(0xD3, 0x5); /* D3 /5 */
8332   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8333   ins_pipe(ialu_mem_reg);
8334 %}
8335 
8336 // Long Shift Instructions
8337 // Shift Left by one
8338 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8339 %{
8340   match(Set dst (LShiftL dst shift));
8341   effect(KILL cr);
8342 
8343   format %{ "salq    $dst, $shift" %}
8344   opcode(0xD1, 0x4); /* D1 /4 */
8345   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8346   ins_pipe(ialu_reg);
8347 %}
8348 
8349 // Shift Left by one
8350 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8351 %{
8352   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8353   effect(KILL cr);
8354 
8355   format %{ "salq    $dst, $shift" %}
8356   opcode(0xD1, 0x4); /* D1 /4 */
8357   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8358   ins_pipe(ialu_mem_imm);
8359 %}
8360 
8361 // Shift Left by 8-bit immediate
8362 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8363 %{
8364   match(Set dst (LShiftL dst shift));
8365   effect(KILL cr);
8366 
8367   format %{ "salq    $dst, $shift" %}
8368   opcode(0xC1, 0x4); /* C1 /4 ib */
8369   ins_encode(reg_opc_imm_wide(dst, shift));
8370   ins_pipe(ialu_reg);
8371 %}
8372 
8373 // Shift Left by 8-bit immediate
8374 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8375 %{
8376   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8377   effect(KILL cr);
8378 
8379   format %{ "salq    $dst, $shift" %}
8380   opcode(0xC1, 0x4); /* C1 /4 ib */
8381   ins_encode(REX_mem_wide(dst), OpcP,
8382              RM_opc_mem(secondary, dst), Con8or32(shift));
8383   ins_pipe(ialu_mem_imm);
8384 %}
8385 
8386 // Shift Left by variable
8387 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8388 %{
8389   match(Set dst (LShiftL dst shift));
8390   effect(KILL cr);
8391 
8392   format %{ "salq    $dst, $shift" %}
8393   opcode(0xD3, 0x4); /* D3 /4 */
8394   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8395   ins_pipe(ialu_reg_reg);
8396 %}
8397 
8398 // Shift Left by variable
8399 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8400 %{
8401   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8402   effect(KILL cr);
8403 
8404   format %{ "salq    $dst, $shift" %}
8405   opcode(0xD3, 0x4); /* D3 /4 */
8406   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8407   ins_pipe(ialu_mem_reg);
8408 %}
8409 
8410 // Arithmetic shift right by one
8411 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8412 %{
8413   match(Set dst (RShiftL dst shift));
8414   effect(KILL cr);
8415 
8416   format %{ "sarq    $dst, $shift" %}
8417   opcode(0xD1, 0x7); /* D1 /7 */
8418   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8419   ins_pipe(ialu_reg);
8420 %}
8421 
8422 // Arithmetic shift right by one
8423 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8424 %{
8425   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8426   effect(KILL cr);
8427 
8428   format %{ "sarq    $dst, $shift" %}
8429   opcode(0xD1, 0x7); /* D1 /7 */
8430   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8431   ins_pipe(ialu_mem_imm);
8432 %}
8433 
8434 // Arithmetic Shift Right by 8-bit immediate
8435 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8436 %{
8437   match(Set dst (RShiftL dst shift));
8438   effect(KILL cr);
8439 
8440   format %{ "sarq    $dst, $shift" %}
8441   opcode(0xC1, 0x7); /* C1 /7 ib */
8442   ins_encode(reg_opc_imm_wide(dst, shift));
8443   ins_pipe(ialu_mem_imm);
8444 %}
8445 
8446 // Arithmetic Shift Right by 8-bit immediate
8447 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8448 %{
8449   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8450   effect(KILL cr);
8451 
8452   format %{ "sarq    $dst, $shift" %}
8453   opcode(0xC1, 0x7); /* C1 /7 ib */
8454   ins_encode(REX_mem_wide(dst), OpcP,
8455              RM_opc_mem(secondary, dst), Con8or32(shift));
8456   ins_pipe(ialu_mem_imm);
8457 %}
8458 
8459 // Arithmetic Shift Right by variable
8460 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8461 %{
8462   match(Set dst (RShiftL dst shift));
8463   effect(KILL cr);
8464 
8465   format %{ "sarq    $dst, $shift" %}
8466   opcode(0xD3, 0x7); /* D3 /7 */
8467   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8468   ins_pipe(ialu_reg_reg);
8469 %}
8470 
8471 // Arithmetic Shift Right by variable
8472 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8473 %{
8474   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8475   effect(KILL cr);
8476 
8477   format %{ "sarq    $dst, $shift" %}
8478   opcode(0xD3, 0x7); /* D3 /7 */
8479   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8480   ins_pipe(ialu_mem_reg);
8481 %}
8482 
8483 // Logical shift right by one
8484 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8485 %{
8486   match(Set dst (URShiftL dst shift));
8487   effect(KILL cr);
8488 
8489   format %{ "shrq    $dst, $shift" %}
8490   opcode(0xD1, 0x5); /* D1 /5 */
8491   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8492   ins_pipe(ialu_reg);
8493 %}
8494 
8495 // Logical shift right by one
8496 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8497 %{
8498   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8499   effect(KILL cr);
8500 
8501   format %{ "shrq    $dst, $shift" %}
8502   opcode(0xD1, 0x5); /* D1 /5 */
8503   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8504   ins_pipe(ialu_mem_imm);
8505 %}
8506 
8507 // Logical Shift Right by 8-bit immediate
8508 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8509 %{
8510   match(Set dst (URShiftL dst shift));
8511   effect(KILL cr);
8512 
8513   format %{ "shrq    $dst, $shift" %}
8514   opcode(0xC1, 0x5); /* C1 /5 ib */
8515   ins_encode(reg_opc_imm_wide(dst, shift));
8516   ins_pipe(ialu_reg);
8517 %}
8518 
8519 
8520 // Logical Shift Right by 8-bit immediate
8521 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8522 %{
8523   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8524   effect(KILL cr);
8525 
8526   format %{ "shrq    $dst, $shift" %}
8527   opcode(0xC1, 0x5); /* C1 /5 ib */
8528   ins_encode(REX_mem_wide(dst), OpcP,
8529              RM_opc_mem(secondary, dst), Con8or32(shift));
8530   ins_pipe(ialu_mem_imm);
8531 %}
8532 
8533 // Logical Shift Right by variable
8534 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8535 %{
8536   match(Set dst (URShiftL dst shift));
8537   effect(KILL cr);
8538 
8539   format %{ "shrq    $dst, $shift" %}
8540   opcode(0xD3, 0x5); /* D3 /5 */
8541   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8542   ins_pipe(ialu_reg_reg);
8543 %}
8544 
8545 // Logical Shift Right by variable
8546 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8547 %{
8548   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8549   effect(KILL cr);
8550 
8551   format %{ "shrq    $dst, $shift" %}
8552   opcode(0xD3, 0x5); /* D3 /5 */
8553   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8554   ins_pipe(ialu_mem_reg);
8555 %}
8556 
8557 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8558 // This idiom is used by the compiler for the i2b bytecode.
8559 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8560 %{
8561   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8562 
8563   format %{ "movsbl  $dst, $src\t# i2b" %}
8564   opcode(0x0F, 0xBE);
8565   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8566   ins_pipe(ialu_reg_reg);
8567 %}
8568 
8569 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8570 // This idiom is used by the compiler the i2s bytecode.
8571 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8572 %{
8573   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8574 
8575   format %{ "movswl  $dst, $src\t# i2s" %}
8576   opcode(0x0F, 0xBF);
8577   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8578   ins_pipe(ialu_reg_reg);
8579 %}
8580 
8581 // ROL/ROR instructions
8582 
8583 // ROL expand
8584 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8585   effect(KILL cr, USE_DEF dst);
8586 
8587   format %{ "roll    $dst" %}
8588   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8589   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8590   ins_pipe(ialu_reg);
8591 %}
8592 
8593 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8594   effect(USE_DEF dst, USE shift, KILL cr);
8595 
8596   format %{ "roll    $dst, $shift" %}
8597   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8598   ins_encode( reg_opc_imm(dst, shift) );
8599   ins_pipe(ialu_reg);
8600 %}
8601 
8602 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8603 %{
8604   effect(USE_DEF dst, USE shift, KILL cr);
8605 
8606   format %{ "roll    $dst, $shift" %}
8607   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8608   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8609   ins_pipe(ialu_reg_reg);
8610 %}
8611 // end of ROL expand
8612 
8613 // Rotate Left by one
8614 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8615 %{
8616   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8617 
8618   expand %{
8619     rolI_rReg_imm1(dst, cr);
8620   %}
8621 %}
8622 
8623 // Rotate Left by 8-bit immediate
8624 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8625 %{
8626   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8627   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8628 
8629   expand %{
8630     rolI_rReg_imm8(dst, lshift, cr);
8631   %}
8632 %}
8633 
8634 // Rotate Left by variable
8635 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8636 %{
8637   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8638 
8639   expand %{
8640     rolI_rReg_CL(dst, shift, cr);
8641   %}
8642 %}
8643 
8644 // Rotate Left by variable
8645 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8646 %{
8647   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8648 
8649   expand %{
8650     rolI_rReg_CL(dst, shift, cr);
8651   %}
8652 %}
8653 
8654 // ROR expand
8655 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8656 %{
8657   effect(USE_DEF dst, KILL cr);
8658 
8659   format %{ "rorl    $dst" %}
8660   opcode(0xD1, 0x1); /* D1 /1 */
8661   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8662   ins_pipe(ialu_reg);
8663 %}
8664 
8665 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8666 %{
8667   effect(USE_DEF dst, USE shift, KILL cr);
8668 
8669   format %{ "rorl    $dst, $shift" %}
8670   opcode(0xC1, 0x1); /* C1 /1 ib */
8671   ins_encode(reg_opc_imm(dst, shift));
8672   ins_pipe(ialu_reg);
8673 %}
8674 
8675 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8676 %{
8677   effect(USE_DEF dst, USE shift, KILL cr);
8678 
8679   format %{ "rorl    $dst, $shift" %}
8680   opcode(0xD3, 0x1); /* D3 /1 */
8681   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8682   ins_pipe(ialu_reg_reg);
8683 %}
8684 // end of ROR expand
8685 
8686 // Rotate Right by one
8687 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8688 %{
8689   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8690 
8691   expand %{
8692     rorI_rReg_imm1(dst, cr);
8693   %}
8694 %}
8695 
8696 // Rotate Right by 8-bit immediate
8697 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8698 %{
8699   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8700   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8701 
8702   expand %{
8703     rorI_rReg_imm8(dst, rshift, cr);
8704   %}
8705 %}
8706 
8707 // Rotate Right by variable
8708 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8709 %{
8710   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8711 
8712   expand %{
8713     rorI_rReg_CL(dst, shift, cr);
8714   %}
8715 %}
8716 
8717 // Rotate Right by variable
8718 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8719 %{
8720   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8721 
8722   expand %{
8723     rorI_rReg_CL(dst, shift, cr);
8724   %}
8725 %}
8726 
8727 // for long rotate
8728 // ROL expand
8729 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8730   effect(USE_DEF dst, KILL cr);
8731 
8732   format %{ "rolq    $dst" %}
8733   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8734   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8735   ins_pipe(ialu_reg);
8736 %}
8737 
8738 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8739   effect(USE_DEF dst, USE shift, KILL cr);
8740 
8741   format %{ "rolq    $dst, $shift" %}
8742   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8743   ins_encode( reg_opc_imm_wide(dst, shift) );
8744   ins_pipe(ialu_reg);
8745 %}
8746 
8747 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8748 %{
8749   effect(USE_DEF dst, USE shift, KILL cr);
8750 
8751   format %{ "rolq    $dst, $shift" %}
8752   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8753   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8754   ins_pipe(ialu_reg_reg);
8755 %}
8756 // end of ROL expand
8757 
8758 // Rotate Left by one
8759 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8760 %{
8761   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8762 
8763   expand %{
8764     rolL_rReg_imm1(dst, cr);
8765   %}
8766 %}
8767 
8768 // Rotate Left by 8-bit immediate
8769 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8770 %{
8771   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8772   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8773 
8774   expand %{
8775     rolL_rReg_imm8(dst, lshift, cr);
8776   %}
8777 %}
8778 
8779 // Rotate Left by variable
8780 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8781 %{
8782   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8783 
8784   expand %{
8785     rolL_rReg_CL(dst, shift, cr);
8786   %}
8787 %}
8788 
8789 // Rotate Left by variable
8790 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8791 %{
8792   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8793 
8794   expand %{
8795     rolL_rReg_CL(dst, shift, cr);
8796   %}
8797 %}
8798 
8799 // ROR expand
8800 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8801 %{
8802   effect(USE_DEF dst, KILL cr);
8803 
8804   format %{ "rorq    $dst" %}
8805   opcode(0xD1, 0x1); /* D1 /1 */
8806   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8807   ins_pipe(ialu_reg);
8808 %}
8809 
8810 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8811 %{
8812   effect(USE_DEF dst, USE shift, KILL cr);
8813 
8814   format %{ "rorq    $dst, $shift" %}
8815   opcode(0xC1, 0x1); /* C1 /1 ib */
8816   ins_encode(reg_opc_imm_wide(dst, shift));
8817   ins_pipe(ialu_reg);
8818 %}
8819 
8820 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8821 %{
8822   effect(USE_DEF dst, USE shift, KILL cr);
8823 
8824   format %{ "rorq    $dst, $shift" %}
8825   opcode(0xD3, 0x1); /* D3 /1 */
8826   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8827   ins_pipe(ialu_reg_reg);
8828 %}
8829 // end of ROR expand
8830 
8831 // Rotate Right by one
8832 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8833 %{
8834   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8835 
8836   expand %{
8837     rorL_rReg_imm1(dst, cr);
8838   %}
8839 %}
8840 
8841 // Rotate Right by 8-bit immediate
8842 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8843 %{
8844   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8845   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8846 
8847   expand %{
8848     rorL_rReg_imm8(dst, rshift, cr);
8849   %}
8850 %}
8851 
8852 // Rotate Right by variable
8853 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8854 %{
8855   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8856 
8857   expand %{
8858     rorL_rReg_CL(dst, shift, cr);
8859   %}
8860 %}
8861 
8862 // Rotate Right by variable
8863 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8864 %{
8865   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8866 
8867   expand %{
8868     rorL_rReg_CL(dst, shift, cr);
8869   %}
8870 %}
8871 
8872 // Logical Instructions
8873 
8874 // Integer Logical Instructions
8875 
8876 // And Instructions
8877 // And Register with Register
8878 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8879 %{
8880   match(Set dst (AndI dst src));
8881   effect(KILL cr);
8882 
8883   format %{ "andl    $dst, $src\t# int" %}
8884   opcode(0x23);
8885   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8886   ins_pipe(ialu_reg_reg);
8887 %}
8888 
8889 // And Register with Immediate 255
8890 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8891 %{
8892   match(Set dst (AndI dst src));
8893 
8894   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8895   opcode(0x0F, 0xB6);
8896   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8897   ins_pipe(ialu_reg);
8898 %}
8899 
8900 // And Register with Immediate 255 and promote to long
8901 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8902 %{
8903   match(Set dst (ConvI2L (AndI src mask)));
8904 
8905   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8906   opcode(0x0F, 0xB6);
8907   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8908   ins_pipe(ialu_reg);
8909 %}
8910 
8911 // And Register with Immediate 65535
8912 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8913 %{
8914   match(Set dst (AndI dst src));
8915 
8916   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8917   opcode(0x0F, 0xB7);
8918   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8919   ins_pipe(ialu_reg);
8920 %}
8921 
8922 // And Register with Immediate 65535 and promote to long
8923 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8924 %{
8925   match(Set dst (ConvI2L (AndI src mask)));
8926 
8927   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8928   opcode(0x0F, 0xB7);
8929   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8930   ins_pipe(ialu_reg);
8931 %}
8932 
8933 // And Register with Immediate
8934 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8935 %{
8936   match(Set dst (AndI dst src));
8937   effect(KILL cr);
8938 
8939   format %{ "andl    $dst, $src\t# int" %}
8940   opcode(0x81, 0x04); /* Opcode 81 /4 */
8941   ins_encode(OpcSErm(dst, src), Con8or32(src));
8942   ins_pipe(ialu_reg);
8943 %}
8944 
8945 // And Register with Memory
8946 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8947 %{
8948   match(Set dst (AndI dst (LoadI src)));
8949   effect(KILL cr);
8950 
8951   ins_cost(125);
8952   format %{ "andl    $dst, $src\t# int" %}
8953   opcode(0x23);
8954   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8955   ins_pipe(ialu_reg_mem);
8956 %}
8957 
8958 // And Memory with Register
8959 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8960 %{
8961   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8962   effect(KILL cr);
8963 
8964   ins_cost(150);
8965   format %{ "andl    $dst, $src\t# int" %}
8966   opcode(0x21); /* Opcode 21 /r */
8967   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8968   ins_pipe(ialu_mem_reg);
8969 %}
8970 
8971 // And Memory with Immediate
8972 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
8973 %{
8974   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8975   effect(KILL cr);
8976 
8977   ins_cost(125);
8978   format %{ "andl    $dst, $src\t# int" %}
8979   opcode(0x81, 0x4); /* Opcode 81 /4 id */
8980   ins_encode(REX_mem(dst), OpcSE(src),
8981              RM_opc_mem(secondary, dst), Con8or32(src));
8982   ins_pipe(ialu_mem_imm);
8983 %}
8984 
8985 // Or Instructions
8986 // Or Register with Register
8987 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8988 %{
8989   match(Set dst (OrI dst src));
8990   effect(KILL cr);
8991 
8992   format %{ "orl     $dst, $src\t# int" %}
8993   opcode(0x0B);
8994   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8995   ins_pipe(ialu_reg_reg);
8996 %}
8997 
8998 // Or Register with Immediate
8999 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9000 %{
9001   match(Set dst (OrI dst src));
9002   effect(KILL cr);
9003 
9004   format %{ "orl     $dst, $src\t# int" %}
9005   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9006   ins_encode(OpcSErm(dst, src), Con8or32(src));
9007   ins_pipe(ialu_reg);
9008 %}
9009 
9010 // Or Register with Memory
9011 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9012 %{
9013   match(Set dst (OrI dst (LoadI src)));
9014   effect(KILL cr);
9015 
9016   ins_cost(125);
9017   format %{ "orl     $dst, $src\t# int" %}
9018   opcode(0x0B);
9019   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9020   ins_pipe(ialu_reg_mem);
9021 %}
9022 
9023 // Or Memory with Register
9024 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9025 %{
9026   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9027   effect(KILL cr);
9028 
9029   ins_cost(150);
9030   format %{ "orl     $dst, $src\t# int" %}
9031   opcode(0x09); /* Opcode 09 /r */
9032   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9033   ins_pipe(ialu_mem_reg);
9034 %}
9035 
9036 // Or Memory with Immediate
9037 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9038 %{
9039   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9040   effect(KILL cr);
9041 
9042   ins_cost(125);
9043   format %{ "orl     $dst, $src\t# int" %}
9044   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9045   ins_encode(REX_mem(dst), OpcSE(src),
9046              RM_opc_mem(secondary, dst), Con8or32(src));
9047   ins_pipe(ialu_mem_imm);
9048 %}
9049 
9050 // Xor Instructions
9051 // Xor Register with Register
9052 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9053 %{
9054   match(Set dst (XorI dst src));
9055   effect(KILL cr);
9056 
9057   format %{ "xorl    $dst, $src\t# int" %}
9058   opcode(0x33);
9059   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9060   ins_pipe(ialu_reg_reg);
9061 %}
9062 
9063 // Xor Register with Immediate -1
9064 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9065   match(Set dst (XorI dst imm));
9066 
9067   format %{ "not    $dst" %}
9068   ins_encode %{
9069      __ notl($dst$$Register);
9070   %}
9071   ins_pipe(ialu_reg);
9072 %}
9073 
9074 // Xor Register with Immediate
9075 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9076 %{
9077   match(Set dst (XorI dst src));
9078   effect(KILL cr);
9079 
9080   format %{ "xorl    $dst, $src\t# int" %}
9081   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9082   ins_encode(OpcSErm(dst, src), Con8or32(src));
9083   ins_pipe(ialu_reg);
9084 %}
9085 
9086 // Xor Register with Memory
9087 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9088 %{
9089   match(Set dst (XorI dst (LoadI src)));
9090   effect(KILL cr);
9091 
9092   ins_cost(125);
9093   format %{ "xorl    $dst, $src\t# int" %}
9094   opcode(0x33);
9095   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9096   ins_pipe(ialu_reg_mem);
9097 %}
9098 
9099 // Xor Memory with Register
9100 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9101 %{
9102   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9103   effect(KILL cr);
9104 
9105   ins_cost(150);
9106   format %{ "xorl    $dst, $src\t# int" %}
9107   opcode(0x31); /* Opcode 31 /r */
9108   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9109   ins_pipe(ialu_mem_reg);
9110 %}
9111 
9112 // Xor Memory with Immediate
9113 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9114 %{
9115   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9116   effect(KILL cr);
9117 
9118   ins_cost(125);
9119   format %{ "xorl    $dst, $src\t# int" %}
9120   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9121   ins_encode(REX_mem(dst), OpcSE(src),
9122              RM_opc_mem(secondary, dst), Con8or32(src));
9123   ins_pipe(ialu_mem_imm);
9124 %}
9125 
9126 
9127 // Long Logical Instructions
9128 
9129 // And Instructions
9130 // And Register with Register
9131 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9132 %{
9133   match(Set dst (AndL dst src));
9134   effect(KILL cr);
9135 
9136   format %{ "andq    $dst, $src\t# long" %}
9137   opcode(0x23);
9138   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9139   ins_pipe(ialu_reg_reg);
9140 %}
9141 
9142 // And Register with Immediate 255
9143 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9144 %{
9145   match(Set dst (AndL dst src));
9146 
9147   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9148   opcode(0x0F, 0xB6);
9149   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9150   ins_pipe(ialu_reg);
9151 %}
9152 
9153 // And Register with Immediate 65535
9154 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9155 %{
9156   match(Set dst (AndL dst src));
9157 
9158   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9159   opcode(0x0F, 0xB7);
9160   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9161   ins_pipe(ialu_reg);
9162 %}
9163 
9164 // And Register with Immediate
9165 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9166 %{
9167   match(Set dst (AndL dst src));
9168   effect(KILL cr);
9169 
9170   format %{ "andq    $dst, $src\t# long" %}
9171   opcode(0x81, 0x04); /* Opcode 81 /4 */
9172   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9173   ins_pipe(ialu_reg);
9174 %}
9175 
9176 // And Register with Memory
9177 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9178 %{
9179   match(Set dst (AndL dst (LoadL src)));
9180   effect(KILL cr);
9181 
9182   ins_cost(125);
9183   format %{ "andq    $dst, $src\t# long" %}
9184   opcode(0x23);
9185   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9186   ins_pipe(ialu_reg_mem);
9187 %}
9188 
9189 // And Memory with Register
9190 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9191 %{
9192   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9193   effect(KILL cr);
9194 
9195   ins_cost(150);
9196   format %{ "andq    $dst, $src\t# long" %}
9197   opcode(0x21); /* Opcode 21 /r */
9198   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9199   ins_pipe(ialu_mem_reg);
9200 %}
9201 
9202 // And Memory with Immediate
9203 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9204 %{
9205   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9206   effect(KILL cr);
9207 
9208   ins_cost(125);
9209   format %{ "andq    $dst, $src\t# long" %}
9210   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9211   ins_encode(REX_mem_wide(dst), OpcSE(src),
9212              RM_opc_mem(secondary, dst), Con8or32(src));
9213   ins_pipe(ialu_mem_imm);
9214 %}
9215 
9216 // Or Instructions
9217 // Or Register with Register
9218 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9219 %{
9220   match(Set dst (OrL dst src));
9221   effect(KILL cr);
9222 
9223   format %{ "orq     $dst, $src\t# long" %}
9224   opcode(0x0B);
9225   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9226   ins_pipe(ialu_reg_reg);
9227 %}
9228 
9229 // Use any_RegP to match R15 (TLS register) without spilling.
9230 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9231   match(Set dst (OrL dst (CastP2X src)));
9232   effect(KILL cr);
9233 
9234   format %{ "orq     $dst, $src\t# long" %}
9235   opcode(0x0B);
9236   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9237   ins_pipe(ialu_reg_reg);
9238 %}
9239 
9240 
9241 // Or Register with Immediate
9242 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9243 %{
9244   match(Set dst (OrL dst src));
9245   effect(KILL cr);
9246 
9247   format %{ "orq     $dst, $src\t# long" %}
9248   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9249   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9250   ins_pipe(ialu_reg);
9251 %}
9252 
9253 // Or Register with Memory
9254 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9255 %{
9256   match(Set dst (OrL dst (LoadL src)));
9257   effect(KILL cr);
9258 
9259   ins_cost(125);
9260   format %{ "orq     $dst, $src\t# long" %}
9261   opcode(0x0B);
9262   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9263   ins_pipe(ialu_reg_mem);
9264 %}
9265 
9266 // Or Memory with Register
9267 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9268 %{
9269   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9270   effect(KILL cr);
9271 
9272   ins_cost(150);
9273   format %{ "orq     $dst, $src\t# long" %}
9274   opcode(0x09); /* Opcode 09 /r */
9275   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9276   ins_pipe(ialu_mem_reg);
9277 %}
9278 
9279 // Or Memory with Immediate
9280 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9281 %{
9282   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9283   effect(KILL cr);
9284 
9285   ins_cost(125);
9286   format %{ "orq     $dst, $src\t# long" %}
9287   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9288   ins_encode(REX_mem_wide(dst), OpcSE(src),
9289              RM_opc_mem(secondary, dst), Con8or32(src));
9290   ins_pipe(ialu_mem_imm);
9291 %}
9292 
9293 // Xor Instructions
9294 // Xor Register with Register
9295 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9296 %{
9297   match(Set dst (XorL dst src));
9298   effect(KILL cr);
9299 
9300   format %{ "xorq    $dst, $src\t# long" %}
9301   opcode(0x33);
9302   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9303   ins_pipe(ialu_reg_reg);
9304 %}
9305 
9306 // Xor Register with Immediate -1
9307 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9308   match(Set dst (XorL dst imm));
9309 
9310   format %{ "notq   $dst" %}
9311   ins_encode %{
9312      __ notq($dst$$Register);
9313   %}
9314   ins_pipe(ialu_reg);
9315 %}
9316 
9317 // Xor Register with Immediate
9318 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9319 %{
9320   match(Set dst (XorL dst src));
9321   effect(KILL cr);
9322 
9323   format %{ "xorq    $dst, $src\t# long" %}
9324   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9325   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9326   ins_pipe(ialu_reg);
9327 %}
9328 
9329 // Xor Register with Memory
9330 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9331 %{
9332   match(Set dst (XorL dst (LoadL src)));
9333   effect(KILL cr);
9334 
9335   ins_cost(125);
9336   format %{ "xorq    $dst, $src\t# long" %}
9337   opcode(0x33);
9338   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9339   ins_pipe(ialu_reg_mem);
9340 %}
9341 
9342 // Xor Memory with Register
9343 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9344 %{
9345   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9346   effect(KILL cr);
9347 
9348   ins_cost(150);
9349   format %{ "xorq    $dst, $src\t# long" %}
9350   opcode(0x31); /* Opcode 31 /r */
9351   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9352   ins_pipe(ialu_mem_reg);
9353 %}
9354 
9355 // Xor Memory with Immediate
9356 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9357 %{
9358   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9359   effect(KILL cr);
9360 
9361   ins_cost(125);
9362   format %{ "xorq    $dst, $src\t# long" %}
9363   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9364   ins_encode(REX_mem_wide(dst), OpcSE(src),
9365              RM_opc_mem(secondary, dst), Con8or32(src));
9366   ins_pipe(ialu_mem_imm);
9367 %}
9368 
9369 // Convert Int to Boolean
9370 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9371 %{
9372   match(Set dst (Conv2B src));
9373   effect(KILL cr);
9374 
9375   format %{ "testl   $src, $src\t# ci2b\n\t"
9376             "setnz   $dst\n\t"
9377             "movzbl  $dst, $dst" %}
9378   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9379              setNZ_reg(dst),
9380              REX_reg_breg(dst, dst), // movzbl
9381              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9382   ins_pipe(pipe_slow); // XXX
9383 %}
9384 
9385 // Convert Pointer to Boolean
9386 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9387 %{
9388   match(Set dst (Conv2B src));
9389   effect(KILL cr);
9390 
9391   format %{ "testq   $src, $src\t# cp2b\n\t"
9392             "setnz   $dst\n\t"
9393             "movzbl  $dst, $dst" %}
9394   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9395              setNZ_reg(dst),
9396              REX_reg_breg(dst, dst), // movzbl
9397              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9398   ins_pipe(pipe_slow); // XXX
9399 %}
9400 
9401 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9402 %{
9403   match(Set dst (CmpLTMask p q));
9404   effect(KILL cr);
9405 
9406   ins_cost(400); // XXX
9407   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9408             "setlt   $dst\n\t"
9409             "movzbl  $dst, $dst\n\t"
9410             "negl    $dst" %}
9411   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9412              setLT_reg(dst),
9413              REX_reg_breg(dst, dst), // movzbl
9414              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9415              neg_reg(dst));
9416   ins_pipe(pipe_slow);
9417 %}
9418 
9419 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9420 %{
9421   match(Set dst (CmpLTMask dst zero));
9422   effect(KILL cr);
9423 
9424   ins_cost(100); // XXX
9425   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9426   opcode(0xC1, 0x7);  /* C1 /7 ib */
9427   ins_encode(reg_opc_imm(dst, 0x1F));
9428   ins_pipe(ialu_reg);
9429 %}
9430 
9431 
9432 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
9433 %{
9434   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9435   effect(TEMP tmp, KILL cr);
9436 
9437   ins_cost(400); // XXX
9438   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
9439             "sbbl    $tmp, $tmp\n\t"
9440             "andl    $tmp, $y\n\t"
9441             "addl    $p, $tmp" %}
9442   ins_encode %{
9443     Register Rp = $p$$Register;
9444     Register Rq = $q$$Register;
9445     Register Ry = $y$$Register;
9446     Register Rt = $tmp$$Register;
9447     __ subl(Rp, Rq);
9448     __ sbbl(Rt, Rt);
9449     __ andl(Rt, Ry);
9450     __ addl(Rp, Rt);
9451   %}
9452   ins_pipe(pipe_cmplt);
9453 %}
9454 
9455 //---------- FP Instructions------------------------------------------------
9456 
9457 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9458 %{
9459   match(Set cr (CmpF src1 src2));
9460 
9461   ins_cost(145);
9462   format %{ "ucomiss $src1, $src2\n\t"
9463             "jnp,s   exit\n\t"
9464             "pushfq\t# saw NaN, set CF\n\t"
9465             "andq    [rsp], #0xffffff2b\n\t"
9466             "popfq\n"
9467     "exit:" %}
9468   ins_encode %{
9469     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9470     emit_cmpfp_fixup(_masm);
9471   %}
9472   ins_pipe(pipe_slow);
9473 %}
9474 
9475 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9476   match(Set cr (CmpF src1 src2));
9477 
9478   ins_cost(100);
9479   format %{ "ucomiss $src1, $src2" %}
9480   ins_encode %{
9481     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9482   %}
9483   ins_pipe(pipe_slow);
9484 %}
9485 
9486 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9487 %{
9488   match(Set cr (CmpF src1 (LoadF src2)));
9489 
9490   ins_cost(145);
9491   format %{ "ucomiss $src1, $src2\n\t"
9492             "jnp,s   exit\n\t"
9493             "pushfq\t# saw NaN, set CF\n\t"
9494             "andq    [rsp], #0xffffff2b\n\t"
9495             "popfq\n"
9496     "exit:" %}
9497   ins_encode %{
9498     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9499     emit_cmpfp_fixup(_masm);
9500   %}
9501   ins_pipe(pipe_slow);
9502 %}
9503 
9504 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9505   match(Set cr (CmpF src1 (LoadF src2)));
9506 
9507   ins_cost(100);
9508   format %{ "ucomiss $src1, $src2" %}
9509   ins_encode %{
9510     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9511   %}
9512   ins_pipe(pipe_slow);
9513 %}
9514 
9515 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9516   match(Set cr (CmpF src con));
9517 
9518   ins_cost(145);
9519   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9520             "jnp,s   exit\n\t"
9521             "pushfq\t# saw NaN, set CF\n\t"
9522             "andq    [rsp], #0xffffff2b\n\t"
9523             "popfq\n"
9524     "exit:" %}
9525   ins_encode %{
9526     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9527     emit_cmpfp_fixup(_masm);
9528   %}
9529   ins_pipe(pipe_slow);
9530 %}
9531 
9532 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9533   match(Set cr (CmpF src con));
9534   ins_cost(100);
9535   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9536   ins_encode %{
9537     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9538   %}
9539   ins_pipe(pipe_slow);
9540 %}
9541 
9542 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9543 %{
9544   match(Set cr (CmpD src1 src2));
9545 
9546   ins_cost(145);
9547   format %{ "ucomisd $src1, $src2\n\t"
9548             "jnp,s   exit\n\t"
9549             "pushfq\t# saw NaN, set CF\n\t"
9550             "andq    [rsp], #0xffffff2b\n\t"
9551             "popfq\n"
9552     "exit:" %}
9553   ins_encode %{
9554     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9555     emit_cmpfp_fixup(_masm);
9556   %}
9557   ins_pipe(pipe_slow);
9558 %}
9559 
9560 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9561   match(Set cr (CmpD src1 src2));
9562 
9563   ins_cost(100);
9564   format %{ "ucomisd $src1, $src2 test" %}
9565   ins_encode %{
9566     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9567   %}
9568   ins_pipe(pipe_slow);
9569 %}
9570 
9571 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9572 %{
9573   match(Set cr (CmpD src1 (LoadD src2)));
9574 
9575   ins_cost(145);
9576   format %{ "ucomisd $src1, $src2\n\t"
9577             "jnp,s   exit\n\t"
9578             "pushfq\t# saw NaN, set CF\n\t"
9579             "andq    [rsp], #0xffffff2b\n\t"
9580             "popfq\n"
9581     "exit:" %}
9582   ins_encode %{
9583     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9584     emit_cmpfp_fixup(_masm);
9585   %}
9586   ins_pipe(pipe_slow);
9587 %}
9588 
9589 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
9590   match(Set cr (CmpD src1 (LoadD src2)));
9591 
9592   ins_cost(100);
9593   format %{ "ucomisd $src1, $src2" %}
9594   ins_encode %{
9595     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9596   %}
9597   ins_pipe(pipe_slow);
9598 %}
9599 
9600 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
9601   match(Set cr (CmpD src con));
9602 
9603   ins_cost(145);
9604   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9605             "jnp,s   exit\n\t"
9606             "pushfq\t# saw NaN, set CF\n\t"
9607             "andq    [rsp], #0xffffff2b\n\t"
9608             "popfq\n"
9609     "exit:" %}
9610   ins_encode %{
9611     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9612     emit_cmpfp_fixup(_masm);
9613   %}
9614   ins_pipe(pipe_slow);
9615 %}
9616 
9617 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
9618   match(Set cr (CmpD src con));
9619   ins_cost(100);
9620   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
9621   ins_encode %{
9622     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9623   %}
9624   ins_pipe(pipe_slow);
9625 %}
9626 
9627 // Compare into -1,0,1
9628 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9629 %{
9630   match(Set dst (CmpF3 src1 src2));
9631   effect(KILL cr);
9632 
9633   ins_cost(275);
9634   format %{ "ucomiss $src1, $src2\n\t"
9635             "movl    $dst, #-1\n\t"
9636             "jp,s    done\n\t"
9637             "jb,s    done\n\t"
9638             "setne   $dst\n\t"
9639             "movzbl  $dst, $dst\n"
9640     "done:" %}
9641   ins_encode %{
9642     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9643     emit_cmpfp3(_masm, $dst$$Register);
9644   %}
9645   ins_pipe(pipe_slow);
9646 %}
9647 
9648 // Compare into -1,0,1
9649 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9650 %{
9651   match(Set dst (CmpF3 src1 (LoadF src2)));
9652   effect(KILL cr);
9653 
9654   ins_cost(275);
9655   format %{ "ucomiss $src1, $src2\n\t"
9656             "movl    $dst, #-1\n\t"
9657             "jp,s    done\n\t"
9658             "jb,s    done\n\t"
9659             "setne   $dst\n\t"
9660             "movzbl  $dst, $dst\n"
9661     "done:" %}
9662   ins_encode %{
9663     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9664     emit_cmpfp3(_masm, $dst$$Register);
9665   %}
9666   ins_pipe(pipe_slow);
9667 %}
9668 
9669 // Compare into -1,0,1
9670 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
9671   match(Set dst (CmpF3 src con));
9672   effect(KILL cr);
9673 
9674   ins_cost(275);
9675   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9676             "movl    $dst, #-1\n\t"
9677             "jp,s    done\n\t"
9678             "jb,s    done\n\t"
9679             "setne   $dst\n\t"
9680             "movzbl  $dst, $dst\n"
9681     "done:" %}
9682   ins_encode %{
9683     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9684     emit_cmpfp3(_masm, $dst$$Register);
9685   %}
9686   ins_pipe(pipe_slow);
9687 %}
9688 
9689 // Compare into -1,0,1
9690 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9691 %{
9692   match(Set dst (CmpD3 src1 src2));
9693   effect(KILL cr);
9694 
9695   ins_cost(275);
9696   format %{ "ucomisd $src1, $src2\n\t"
9697             "movl    $dst, #-1\n\t"
9698             "jp,s    done\n\t"
9699             "jb,s    done\n\t"
9700             "setne   $dst\n\t"
9701             "movzbl  $dst, $dst\n"
9702     "done:" %}
9703   ins_encode %{
9704     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9705     emit_cmpfp3(_masm, $dst$$Register);
9706   %}
9707   ins_pipe(pipe_slow);
9708 %}
9709 
9710 // Compare into -1,0,1
9711 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9712 %{
9713   match(Set dst (CmpD3 src1 (LoadD src2)));
9714   effect(KILL cr);
9715 
9716   ins_cost(275);
9717   format %{ "ucomisd $src1, $src2\n\t"
9718             "movl    $dst, #-1\n\t"
9719             "jp,s    done\n\t"
9720             "jb,s    done\n\t"
9721             "setne   $dst\n\t"
9722             "movzbl  $dst, $dst\n"
9723     "done:" %}
9724   ins_encode %{
9725     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9726     emit_cmpfp3(_masm, $dst$$Register);
9727   %}
9728   ins_pipe(pipe_slow);
9729 %}
9730 
9731 // Compare into -1,0,1
9732 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
9733   match(Set dst (CmpD3 src con));
9734   effect(KILL cr);
9735 
9736   ins_cost(275);
9737   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9738             "movl    $dst, #-1\n\t"
9739             "jp,s    done\n\t"
9740             "jb,s    done\n\t"
9741             "setne   $dst\n\t"
9742             "movzbl  $dst, $dst\n"
9743     "done:" %}
9744   ins_encode %{
9745     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9746     emit_cmpfp3(_masm, $dst$$Register);
9747   %}
9748   ins_pipe(pipe_slow);
9749 %}
9750 
9751 // -----------Trig and Trancendental Instructions------------------------------
9752 instruct cosD_reg(regD dst) %{
9753   match(Set dst (CosD dst));
9754 
9755   format %{ "dcos   $dst\n\t" %}
9756   opcode(0xD9, 0xFF);
9757   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9758   ins_pipe( pipe_slow );
9759 %}
9760 
9761 instruct sinD_reg(regD dst) %{
9762   match(Set dst (SinD dst));
9763 
9764   format %{ "dsin   $dst\n\t" %}
9765   opcode(0xD9, 0xFE);
9766   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9767   ins_pipe( pipe_slow );
9768 %}
9769 
9770 instruct tanD_reg(regD dst) %{
9771   match(Set dst (TanD dst));
9772 
9773   format %{ "dtan   $dst\n\t" %}
9774   ins_encode( Push_SrcXD(dst),
9775               Opcode(0xD9), Opcode(0xF2),   //fptan
9776               Opcode(0xDD), Opcode(0xD8),   //fstp st
9777               Push_ResultXD(dst) );
9778   ins_pipe( pipe_slow );
9779 %}
9780 
9781 instruct log10D_reg(regD dst) %{
9782   // The source and result Double operands in XMM registers
9783   match(Set dst (Log10D dst));
9784   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9785   // fyl2x        ; compute log_10(2) * log_2(x)
9786   format %{ "fldlg2\t\t\t#Log10\n\t"
9787             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9788          %}
9789    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9790               Push_SrcXD(dst),
9791               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9792               Push_ResultXD(dst));
9793 
9794   ins_pipe( pipe_slow );
9795 %}
9796 
9797 instruct logD_reg(regD dst) %{
9798   // The source and result Double operands in XMM registers
9799   match(Set dst (LogD dst));
9800   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9801   // fyl2x        ; compute log_e(2) * log_2(x)
9802   format %{ "fldln2\t\t\t#Log_e\n\t"
9803             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9804          %}
9805   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9806               Push_SrcXD(dst),
9807               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9808               Push_ResultXD(dst));
9809   ins_pipe( pipe_slow );
9810 %}
9811 
9812 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9813   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9814   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9815   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9816   ins_encode %{
9817     __ subptr(rsp, 8);
9818     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9819     __ fld_d(Address(rsp, 0));
9820     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9821     __ fld_d(Address(rsp, 0));
9822     __ fast_pow();
9823     __ fstp_d(Address(rsp, 0));
9824     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9825     __ addptr(rsp, 8);
9826   %}
9827   ins_pipe( pipe_slow );
9828 %}
9829 
9830 instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9831   match(Set dst (ExpD src));
9832   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9833   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9834   ins_encode %{
9835     __ subptr(rsp, 8);
9836     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9837     __ fld_d(Address(rsp, 0));
9838     __ fast_exp();
9839     __ fstp_d(Address(rsp, 0));
9840     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9841     __ addptr(rsp, 8);
9842   %}
9843   ins_pipe( pipe_slow );
9844 %}
9845 
9846 //----------Arithmetic Conversion Instructions---------------------------------
9847 
9848 instruct roundFloat_nop(regF dst)
9849 %{
9850   match(Set dst (RoundFloat dst));
9851 
9852   ins_cost(0);
9853   ins_encode();
9854   ins_pipe(empty);
9855 %}
9856 
9857 instruct roundDouble_nop(regD dst)
9858 %{
9859   match(Set dst (RoundDouble dst));
9860 
9861   ins_cost(0);
9862   ins_encode();
9863   ins_pipe(empty);
9864 %}
9865 
9866 instruct convF2D_reg_reg(regD dst, regF src)
9867 %{
9868   match(Set dst (ConvF2D src));
9869 
9870   format %{ "cvtss2sd $dst, $src" %}
9871   ins_encode %{
9872     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
9873   %}
9874   ins_pipe(pipe_slow); // XXX
9875 %}
9876 
9877 instruct convF2D_reg_mem(regD dst, memory src)
9878 %{
9879   match(Set dst (ConvF2D (LoadF src)));
9880 
9881   format %{ "cvtss2sd $dst, $src" %}
9882   ins_encode %{
9883     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
9884   %}
9885   ins_pipe(pipe_slow); // XXX
9886 %}
9887 
9888 instruct convD2F_reg_reg(regF dst, regD src)
9889 %{
9890   match(Set dst (ConvD2F src));
9891 
9892   format %{ "cvtsd2ss $dst, $src" %}
9893   ins_encode %{
9894     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
9895   %}
9896   ins_pipe(pipe_slow); // XXX
9897 %}
9898 
9899 instruct convD2F_reg_mem(regF dst, memory src)
9900 %{
9901   match(Set dst (ConvD2F (LoadD src)));
9902 
9903   format %{ "cvtsd2ss $dst, $src" %}
9904   ins_encode %{
9905     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
9906   %}
9907   ins_pipe(pipe_slow); // XXX
9908 %}
9909 
9910 // XXX do mem variants
9911 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
9912 %{
9913   match(Set dst (ConvF2I src));
9914   effect(KILL cr);
9915 
9916   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
9917             "cmpl    $dst, #0x80000000\n\t"
9918             "jne,s   done\n\t"
9919             "subq    rsp, #8\n\t"
9920             "movss   [rsp], $src\n\t"
9921             "call    f2i_fixup\n\t"
9922             "popq    $dst\n"
9923     "done:   "%}
9924   ins_encode %{
9925     Label done;
9926     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
9927     __ cmpl($dst$$Register, 0x80000000);
9928     __ jccb(Assembler::notEqual, done);
9929     __ subptr(rsp, 8);
9930     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9931     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
9932     __ pop($dst$$Register);
9933     __ bind(done);
9934   %}
9935   ins_pipe(pipe_slow);
9936 %}
9937 
9938 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
9939 %{
9940   match(Set dst (ConvF2L src));
9941   effect(KILL cr);
9942 
9943   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
9944             "cmpq    $dst, [0x8000000000000000]\n\t"
9945             "jne,s   done\n\t"
9946             "subq    rsp, #8\n\t"
9947             "movss   [rsp], $src\n\t"
9948             "call    f2l_fixup\n\t"
9949             "popq    $dst\n"
9950     "done:   "%}
9951   ins_encode %{
9952     Label done;
9953     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
9954     __ cmp64($dst$$Register,
9955              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9956     __ jccb(Assembler::notEqual, done);
9957     __ subptr(rsp, 8);
9958     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9959     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
9960     __ pop($dst$$Register);
9961     __ bind(done);
9962   %}
9963   ins_pipe(pipe_slow);
9964 %}
9965 
9966 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
9967 %{
9968   match(Set dst (ConvD2I src));
9969   effect(KILL cr);
9970 
9971   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
9972             "cmpl    $dst, #0x80000000\n\t"
9973             "jne,s   done\n\t"
9974             "subq    rsp, #8\n\t"
9975             "movsd   [rsp], $src\n\t"
9976             "call    d2i_fixup\n\t"
9977             "popq    $dst\n"
9978     "done:   "%}
9979   ins_encode %{
9980     Label done;
9981     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
9982     __ cmpl($dst$$Register, 0x80000000);
9983     __ jccb(Assembler::notEqual, done);
9984     __ subptr(rsp, 8);
9985     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9986     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
9987     __ pop($dst$$Register);
9988     __ bind(done);
9989   %}
9990   ins_pipe(pipe_slow);
9991 %}
9992 
9993 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
9994 %{
9995   match(Set dst (ConvD2L src));
9996   effect(KILL cr);
9997 
9998   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
9999             "cmpq    $dst, [0x8000000000000000]\n\t"
10000             "jne,s   done\n\t"
10001             "subq    rsp, #8\n\t"
10002             "movsd   [rsp], $src\n\t"
10003             "call    d2l_fixup\n\t"
10004             "popq    $dst\n"
10005     "done:   "%}
10006   ins_encode %{
10007     Label done;
10008     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10009     __ cmp64($dst$$Register,
10010              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10011     __ jccb(Assembler::notEqual, done);
10012     __ subptr(rsp, 8);
10013     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10014     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10015     __ pop($dst$$Register);
10016     __ bind(done);
10017   %}
10018   ins_pipe(pipe_slow);
10019 %}
10020 
10021 instruct convI2F_reg_reg(regF dst, rRegI src)
10022 %{
10023   predicate(!UseXmmI2F);
10024   match(Set dst (ConvI2F src));
10025 
10026   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10027   ins_encode %{
10028     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10029   %}
10030   ins_pipe(pipe_slow); // XXX
10031 %}
10032 
10033 instruct convI2F_reg_mem(regF dst, memory src)
10034 %{
10035   match(Set dst (ConvI2F (LoadI src)));
10036 
10037   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10038   ins_encode %{
10039     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10040   %}
10041   ins_pipe(pipe_slow); // XXX
10042 %}
10043 
10044 instruct convI2D_reg_reg(regD dst, rRegI src)
10045 %{
10046   predicate(!UseXmmI2D);
10047   match(Set dst (ConvI2D src));
10048 
10049   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10050   ins_encode %{
10051     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10052   %}
10053   ins_pipe(pipe_slow); // XXX
10054 %}
10055 
10056 instruct convI2D_reg_mem(regD dst, memory src)
10057 %{
10058   match(Set dst (ConvI2D (LoadI src)));
10059 
10060   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10061   ins_encode %{
10062     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10063   %}
10064   ins_pipe(pipe_slow); // XXX
10065 %}
10066 
10067 instruct convXI2F_reg(regF dst, rRegI src)
10068 %{
10069   predicate(UseXmmI2F);
10070   match(Set dst (ConvI2F src));
10071 
10072   format %{ "movdl $dst, $src\n\t"
10073             "cvtdq2psl $dst, $dst\t# i2f" %}
10074   ins_encode %{
10075     __ movdl($dst$$XMMRegister, $src$$Register);
10076     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10077   %}
10078   ins_pipe(pipe_slow); // XXX
10079 %}
10080 
10081 instruct convXI2D_reg(regD dst, rRegI src)
10082 %{
10083   predicate(UseXmmI2D);
10084   match(Set dst (ConvI2D src));
10085 
10086   format %{ "movdl $dst, $src\n\t"
10087             "cvtdq2pdl $dst, $dst\t# i2d" %}
10088   ins_encode %{
10089     __ movdl($dst$$XMMRegister, $src$$Register);
10090     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10091   %}
10092   ins_pipe(pipe_slow); // XXX
10093 %}
10094 
10095 instruct convL2F_reg_reg(regF dst, rRegL src)
10096 %{
10097   match(Set dst (ConvL2F src));
10098 
10099   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10100   ins_encode %{
10101     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10102   %}
10103   ins_pipe(pipe_slow); // XXX
10104 %}
10105 
10106 instruct convL2F_reg_mem(regF dst, memory src)
10107 %{
10108   match(Set dst (ConvL2F (LoadL src)));
10109 
10110   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10111   ins_encode %{
10112     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10113   %}
10114   ins_pipe(pipe_slow); // XXX
10115 %}
10116 
10117 instruct convL2D_reg_reg(regD dst, rRegL src)
10118 %{
10119   match(Set dst (ConvL2D src));
10120 
10121   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10122   ins_encode %{
10123     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10124   %}
10125   ins_pipe(pipe_slow); // XXX
10126 %}
10127 
10128 instruct convL2D_reg_mem(regD dst, memory src)
10129 %{
10130   match(Set dst (ConvL2D (LoadL src)));
10131 
10132   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10133   ins_encode %{
10134     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10135   %}
10136   ins_pipe(pipe_slow); // XXX
10137 %}
10138 
10139 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10140 %{
10141   match(Set dst (ConvI2L src));
10142 
10143   ins_cost(125);
10144   format %{ "movslq  $dst, $src\t# i2l" %}
10145   ins_encode %{
10146     __ movslq($dst$$Register, $src$$Register);
10147   %}
10148   ins_pipe(ialu_reg_reg);
10149 %}
10150 
10151 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10152 // %{
10153 //   match(Set dst (ConvI2L src));
10154 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10155 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10156 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10157 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10158 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10159 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10160 
10161 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10162 //   ins_encode(enc_copy(dst, src));
10163 // //   opcode(0x63); // needs REX.W
10164 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10165 //   ins_pipe(ialu_reg_reg);
10166 // %}
10167 
10168 // Zero-extend convert int to long
10169 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10170 %{
10171   match(Set dst (AndL (ConvI2L src) mask));
10172 
10173   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10174   ins_encode %{
10175     if ($dst$$reg != $src$$reg) {
10176       __ movl($dst$$Register, $src$$Register);
10177     }
10178   %}
10179   ins_pipe(ialu_reg_reg);
10180 %}
10181 
10182 // Zero-extend convert int to long
10183 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10184 %{
10185   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10186 
10187   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10188   ins_encode %{
10189     __ movl($dst$$Register, $src$$Address);
10190   %}
10191   ins_pipe(ialu_reg_mem);
10192 %}
10193 
10194 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10195 %{
10196   match(Set dst (AndL src mask));
10197 
10198   format %{ "movl    $dst, $src\t# zero-extend long" %}
10199   ins_encode %{
10200     __ movl($dst$$Register, $src$$Register);
10201   %}
10202   ins_pipe(ialu_reg_reg);
10203 %}
10204 
10205 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10206 %{
10207   match(Set dst (ConvL2I src));
10208 
10209   format %{ "movl    $dst, $src\t# l2i" %}
10210   ins_encode %{
10211     __ movl($dst$$Register, $src$$Register);
10212   %}
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 
10217 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10218   match(Set dst (MoveF2I src));
10219   effect(DEF dst, USE src);
10220 
10221   ins_cost(125);
10222   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10223   ins_encode %{
10224     __ movl($dst$$Register, Address(rsp, $src$$disp));
10225   %}
10226   ins_pipe(ialu_reg_mem);
10227 %}
10228 
10229 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10230   match(Set dst (MoveI2F src));
10231   effect(DEF dst, USE src);
10232 
10233   ins_cost(125);
10234   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10235   ins_encode %{
10236     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10237   %}
10238   ins_pipe(pipe_slow);
10239 %}
10240 
10241 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10242   match(Set dst (MoveD2L src));
10243   effect(DEF dst, USE src);
10244 
10245   ins_cost(125);
10246   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10247   ins_encode %{
10248     __ movq($dst$$Register, Address(rsp, $src$$disp));
10249   %}
10250   ins_pipe(ialu_reg_mem);
10251 %}
10252 
10253 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10254   predicate(!UseXmmLoadAndClearUpper);
10255   match(Set dst (MoveL2D src));
10256   effect(DEF dst, USE src);
10257 
10258   ins_cost(125);
10259   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10260   ins_encode %{
10261     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10262   %}
10263   ins_pipe(pipe_slow);
10264 %}
10265 
10266 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10267   predicate(UseXmmLoadAndClearUpper);
10268   match(Set dst (MoveL2D src));
10269   effect(DEF dst, USE src);
10270 
10271   ins_cost(125);
10272   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10273   ins_encode %{
10274     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10275   %}
10276   ins_pipe(pipe_slow);
10277 %}
10278 
10279 
10280 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10281   match(Set dst (MoveF2I src));
10282   effect(DEF dst, USE src);
10283 
10284   ins_cost(95); // XXX
10285   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10286   ins_encode %{
10287     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10288   %}
10289   ins_pipe(pipe_slow);
10290 %}
10291 
10292 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10293   match(Set dst (MoveI2F src));
10294   effect(DEF dst, USE src);
10295 
10296   ins_cost(100);
10297   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10298   ins_encode %{
10299     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10300   %}
10301   ins_pipe( ialu_mem_reg );
10302 %}
10303 
10304 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10305   match(Set dst (MoveD2L src));
10306   effect(DEF dst, USE src);
10307 
10308   ins_cost(95); // XXX
10309   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10310   ins_encode %{
10311     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10312   %}
10313   ins_pipe(pipe_slow);
10314 %}
10315 
10316 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10317   match(Set dst (MoveL2D src));
10318   effect(DEF dst, USE src);
10319 
10320   ins_cost(100);
10321   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10322   ins_encode %{
10323     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10324   %}
10325   ins_pipe(ialu_mem_reg);
10326 %}
10327 
10328 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10329   match(Set dst (MoveF2I src));
10330   effect(DEF dst, USE src);
10331   ins_cost(85);
10332   format %{ "movd    $dst,$src\t# MoveF2I" %}
10333   ins_encode %{
10334     __ movdl($dst$$Register, $src$$XMMRegister);
10335   %}
10336   ins_pipe( pipe_slow );
10337 %}
10338 
10339 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10340   match(Set dst (MoveD2L src));
10341   effect(DEF dst, USE src);
10342   ins_cost(85);
10343   format %{ "movd    $dst,$src\t# MoveD2L" %}
10344   ins_encode %{
10345     __ movdq($dst$$Register, $src$$XMMRegister);
10346   %}
10347   ins_pipe( pipe_slow );
10348 %}
10349 
10350 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10351   match(Set dst (MoveI2F src));
10352   effect(DEF dst, USE src);
10353   ins_cost(100);
10354   format %{ "movd    $dst,$src\t# MoveI2F" %}
10355   ins_encode %{
10356     __ movdl($dst$$XMMRegister, $src$$Register);
10357   %}
10358   ins_pipe( pipe_slow );
10359 %}
10360 
10361 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10362   match(Set dst (MoveL2D src));
10363   effect(DEF dst, USE src);
10364   ins_cost(100);
10365   format %{ "movd    $dst,$src\t# MoveL2D" %}
10366   ins_encode %{
10367      __ movdq($dst$$XMMRegister, $src$$Register);
10368   %}
10369   ins_pipe( pipe_slow );
10370 %}
10371 
10372 
10373 // =======================================================================
10374 // fast clearing of an array
10375 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10376                   rFlagsReg cr)
10377 %{
10378   predicate(!UseFastStosb);
10379   match(Set dummy (ClearArray cnt base));
10380   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10381 
10382   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10383             "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
10384   ins_encode %{ 
10385     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10386   %}
10387   ins_pipe(pipe_slow);
10388 %}
10389 
10390 instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10391                         rFlagsReg cr)
10392 %{
10393   predicate(UseFastStosb);
10394   match(Set dummy (ClearArray cnt base));
10395   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10396   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10397             "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10398             "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
10399   ins_encode %{ 
10400     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10401   %}
10402   ins_pipe( pipe_slow );
10403 %}
10404 
10405 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10406                         rax_RegI result, regD tmp1, rFlagsReg cr)
10407 %{
10408   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10409   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10410 
10411   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10412   ins_encode %{
10413     __ string_compare($str1$$Register, $str2$$Register,
10414                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10415                       $tmp1$$XMMRegister);
10416   %}
10417   ins_pipe( pipe_slow );
10418 %}
10419 
10420 // fast search of substring with known size.
10421 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10422                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10423 %{
10424   predicate(UseSSE42Intrinsics);
10425   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10426   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10427 
10428   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10429   ins_encode %{
10430     int icnt2 = (int)$int_cnt2$$constant;
10431     if (icnt2 >= 8) {
10432       // IndexOf for constant substrings with size >= 8 elements
10433       // which don't need to be loaded through stack.
10434       __ string_indexofC8($str1$$Register, $str2$$Register,
10435                           $cnt1$$Register, $cnt2$$Register,
10436                           icnt2, $result$$Register,
10437                           $vec$$XMMRegister, $tmp$$Register);
10438     } else {
10439       // Small strings are loaded through stack if they cross page boundary.
10440       __ string_indexof($str1$$Register, $str2$$Register,
10441                         $cnt1$$Register, $cnt2$$Register,
10442                         icnt2, $result$$Register,
10443                         $vec$$XMMRegister, $tmp$$Register);
10444     }
10445   %}
10446   ins_pipe( pipe_slow );
10447 %}
10448 
10449 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10450                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10451 %{
10452   predicate(UseSSE42Intrinsics);
10453   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10454   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10455 
10456   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10457   ins_encode %{
10458     __ string_indexof($str1$$Register, $str2$$Register,
10459                       $cnt1$$Register, $cnt2$$Register,
10460                       (-1), $result$$Register,
10461                       $vec$$XMMRegister, $tmp$$Register);
10462   %}
10463   ins_pipe( pipe_slow );
10464 %}
10465 
10466 // fast string equals
10467 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10468                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10469 %{
10470   match(Set result (StrEquals (Binary str1 str2) cnt));
10471   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10472 
10473   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10474   ins_encode %{
10475     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10476                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10477                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10478   %}
10479   ins_pipe( pipe_slow );
10480 %}
10481 
10482 // fast array equals
10483 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10484                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10485 %{
10486   match(Set result (AryEq ary1 ary2));
10487   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10488   //ins_cost(300);
10489 
10490   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10491   ins_encode %{
10492     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10493                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10494                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10495   %}
10496   ins_pipe( pipe_slow );
10497 %}
10498 
10499 // encode char[] to byte[] in ISO_8859_1
10500 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
10501                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
10502                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
10503   match(Set result (EncodeISOArray src (Binary dst len)));
10504   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
10505 
10506   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
10507   ins_encode %{
10508     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
10509                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
10510                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
10511   %}
10512   ins_pipe( pipe_slow );
10513 %}
10514 
10515 
10516 //----------Control Flow Instructions------------------------------------------
10517 // Signed compare Instructions
10518 
10519 // XXX more variants!!
10520 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10521 %{
10522   match(Set cr (CmpI op1 op2));
10523   effect(DEF cr, USE op1, USE op2);
10524 
10525   format %{ "cmpl    $op1, $op2" %}
10526   opcode(0x3B);  /* Opcode 3B /r */
10527   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10528   ins_pipe(ialu_cr_reg_reg);
10529 %}
10530 
10531 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10532 %{
10533   match(Set cr (CmpI op1 op2));
10534 
10535   format %{ "cmpl    $op1, $op2" %}
10536   opcode(0x81, 0x07); /* Opcode 81 /7 */
10537   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10538   ins_pipe(ialu_cr_reg_imm);
10539 %}
10540 
10541 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10542 %{
10543   match(Set cr (CmpI op1 (LoadI op2)));
10544 
10545   ins_cost(500); // XXX
10546   format %{ "cmpl    $op1, $op2" %}
10547   opcode(0x3B); /* Opcode 3B /r */
10548   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10549   ins_pipe(ialu_cr_reg_mem);
10550 %}
10551 
10552 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10553 %{
10554   match(Set cr (CmpI src zero));
10555 
10556   format %{ "testl   $src, $src" %}
10557   opcode(0x85);
10558   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10559   ins_pipe(ialu_cr_reg_imm);
10560 %}
10561 
10562 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10563 %{
10564   match(Set cr (CmpI (AndI src con) zero));
10565 
10566   format %{ "testl   $src, $con" %}
10567   opcode(0xF7, 0x00);
10568   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10569   ins_pipe(ialu_cr_reg_imm);
10570 %}
10571 
10572 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10573 %{
10574   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10575 
10576   format %{ "testl   $src, $mem" %}
10577   opcode(0x85);
10578   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10579   ins_pipe(ialu_cr_reg_mem);
10580 %}
10581 
10582 // Unsigned compare Instructions; really, same as signed except they
10583 // produce an rFlagsRegU instead of rFlagsReg.
10584 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10585 %{
10586   match(Set cr (CmpU op1 op2));
10587 
10588   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10589   opcode(0x3B); /* Opcode 3B /r */
10590   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10591   ins_pipe(ialu_cr_reg_reg);
10592 %}
10593 
10594 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10595 %{
10596   match(Set cr (CmpU op1 op2));
10597 
10598   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10599   opcode(0x81,0x07); /* Opcode 81 /7 */
10600   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10601   ins_pipe(ialu_cr_reg_imm);
10602 %}
10603 
10604 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10605 %{
10606   match(Set cr (CmpU op1 (LoadI op2)));
10607 
10608   ins_cost(500); // XXX
10609   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10610   opcode(0x3B); /* Opcode 3B /r */
10611   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10612   ins_pipe(ialu_cr_reg_mem);
10613 %}
10614 
10615 // // // Cisc-spilled version of cmpU_rReg
10616 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10617 // //%{
10618 // //  match(Set cr (CmpU (LoadI op1) op2));
10619 // //
10620 // //  format %{ "CMPu   $op1,$op2" %}
10621 // //  ins_cost(500);
10622 // //  opcode(0x39);  /* Opcode 39 /r */
10623 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10624 // //%}
10625 
10626 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10627 %{
10628   match(Set cr (CmpU src zero));
10629 
10630   format %{ "testl  $src, $src\t# unsigned" %}
10631   opcode(0x85);
10632   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10633   ins_pipe(ialu_cr_reg_imm);
10634 %}
10635 
10636 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10637 %{
10638   match(Set cr (CmpP op1 op2));
10639 
10640   format %{ "cmpq    $op1, $op2\t# ptr" %}
10641   opcode(0x3B); /* Opcode 3B /r */
10642   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10643   ins_pipe(ialu_cr_reg_reg);
10644 %}
10645 
10646 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10647 %{
10648   match(Set cr (CmpP op1 (LoadP op2)));
10649 
10650   ins_cost(500); // XXX
10651   format %{ "cmpq    $op1, $op2\t# ptr" %}
10652   opcode(0x3B); /* Opcode 3B /r */
10653   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10654   ins_pipe(ialu_cr_reg_mem);
10655 %}
10656 
10657 // // // Cisc-spilled version of cmpP_rReg
10658 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10659 // //%{
10660 // //  match(Set cr (CmpP (LoadP op1) op2));
10661 // //
10662 // //  format %{ "CMPu   $op1,$op2" %}
10663 // //  ins_cost(500);
10664 // //  opcode(0x39);  /* Opcode 39 /r */
10665 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10666 // //%}
10667 
10668 // XXX this is generalized by compP_rReg_mem???
10669 // Compare raw pointer (used in out-of-heap check).
10670 // Only works because non-oop pointers must be raw pointers
10671 // and raw pointers have no anti-dependencies.
10672 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10673 %{
10674   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
10675   match(Set cr (CmpP op1 (LoadP op2)));
10676 
10677   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10678   opcode(0x3B); /* Opcode 3B /r */
10679   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10680   ins_pipe(ialu_cr_reg_mem);
10681 %}
10682 
10683 // This will generate a signed flags result. This should be OK since
10684 // any compare to a zero should be eq/neq.
10685 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10686 %{
10687   match(Set cr (CmpP src zero));
10688 
10689   format %{ "testq   $src, $src\t# ptr" %}
10690   opcode(0x85);
10691   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10692   ins_pipe(ialu_cr_reg_imm);
10693 %}
10694 
10695 // This will generate a signed flags result. This should be OK since
10696 // any compare to a zero should be eq/neq.
10697 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
10698 %{
10699   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
10700   match(Set cr (CmpP (LoadP op) zero));
10701 
10702   ins_cost(500); // XXX
10703   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10704   opcode(0xF7); /* Opcode F7 /0 */
10705   ins_encode(REX_mem_wide(op),
10706              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10707   ins_pipe(ialu_cr_reg_imm);
10708 %}
10709 
10710 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
10711 %{
10712   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
10713   match(Set cr (CmpP (LoadP mem) zero));
10714 
10715   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
10716   ins_encode %{
10717     __ cmpq(r12, $mem$$Address);
10718   %}
10719   ins_pipe(ialu_cr_reg_mem);
10720 %}
10721 
10722 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
10723 %{
10724   match(Set cr (CmpN op1 op2));
10725 
10726   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10727   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
10728   ins_pipe(ialu_cr_reg_reg);
10729 %}
10730 
10731 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
10732 %{
10733   match(Set cr (CmpN src (LoadN mem)));
10734 
10735   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
10736   ins_encode %{
10737     __ cmpl($src$$Register, $mem$$Address);
10738   %}
10739   ins_pipe(ialu_cr_reg_mem);
10740 %}
10741 
10742 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
10743   match(Set cr (CmpN op1 op2));
10744 
10745   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10746   ins_encode %{
10747     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
10748   %}
10749   ins_pipe(ialu_cr_reg_imm);
10750 %}
10751 
10752 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
10753 %{
10754   match(Set cr (CmpN src (LoadN mem)));
10755 
10756   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
10757   ins_encode %{
10758     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
10759   %}
10760   ins_pipe(ialu_cr_reg_mem);
10761 %}
10762 
10763 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
10764   match(Set cr (CmpN op1 op2));
10765 
10766   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
10767   ins_encode %{
10768     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
10769   %}
10770   ins_pipe(ialu_cr_reg_imm);
10771 %}
10772 
10773 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
10774 %{
10775   match(Set cr (CmpN src (LoadNKlass mem)));
10776 
10777   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
10778   ins_encode %{
10779     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
10780   %}
10781   ins_pipe(ialu_cr_reg_mem);
10782 %}
10783 
10784 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
10785   match(Set cr (CmpN src zero));
10786 
10787   format %{ "testl   $src, $src\t# compressed ptr" %}
10788   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
10789   ins_pipe(ialu_cr_reg_imm);
10790 %}
10791 
10792 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
10793 %{
10794   predicate(Universe::narrow_oop_base() != NULL);
10795   match(Set cr (CmpN (LoadN mem) zero));
10796 
10797   ins_cost(500); // XXX
10798   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
10799   ins_encode %{
10800     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
10801   %}
10802   ins_pipe(ialu_cr_reg_mem);
10803 %}
10804 
10805 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
10806 %{
10807   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
10808   match(Set cr (CmpN (LoadN mem) zero));
10809 
10810   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
10811   ins_encode %{
10812     __ cmpl(r12, $mem$$Address);
10813   %}
10814   ins_pipe(ialu_cr_reg_mem);
10815 %}
10816 
10817 // Yanked all unsigned pointer compare operations.
10818 // Pointer compares are done with CmpP which is already unsigned.
10819 
10820 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10821 %{
10822   match(Set cr (CmpL op1 op2));
10823 
10824   format %{ "cmpq    $op1, $op2" %}
10825   opcode(0x3B);  /* Opcode 3B /r */
10826   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10827   ins_pipe(ialu_cr_reg_reg);
10828 %}
10829 
10830 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10831 %{
10832   match(Set cr (CmpL op1 op2));
10833 
10834   format %{ "cmpq    $op1, $op2" %}
10835   opcode(0x81, 0x07); /* Opcode 81 /7 */
10836   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10837   ins_pipe(ialu_cr_reg_imm);
10838 %}
10839 
10840 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10841 %{
10842   match(Set cr (CmpL op1 (LoadL op2)));
10843 
10844   format %{ "cmpq    $op1, $op2" %}
10845   opcode(0x3B); /* Opcode 3B /r */
10846   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10847   ins_pipe(ialu_cr_reg_mem);
10848 %}
10849 
10850 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10851 %{
10852   match(Set cr (CmpL src zero));
10853 
10854   format %{ "testq   $src, $src" %}
10855   opcode(0x85);
10856   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10857   ins_pipe(ialu_cr_reg_imm);
10858 %}
10859 
10860 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
10861 %{
10862   match(Set cr (CmpL (AndL src con) zero));
10863 
10864   format %{ "testq   $src, $con\t# long" %}
10865   opcode(0xF7, 0x00);
10866   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
10867   ins_pipe(ialu_cr_reg_imm);
10868 %}
10869 
10870 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
10871 %{
10872   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
10873 
10874   format %{ "testq   $src, $mem" %}
10875   opcode(0x85);
10876   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
10877   ins_pipe(ialu_cr_reg_mem);
10878 %}
10879 
10880 // Manifest a CmpL result in an integer register.  Very painful.
10881 // This is the test to avoid.
10882 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
10883 %{
10884   match(Set dst (CmpL3 src1 src2));
10885   effect(KILL flags);
10886 
10887   ins_cost(275); // XXX
10888   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
10889             "movl    $dst, -1\n\t"
10890             "jl,s    done\n\t"
10891             "setne   $dst\n\t"
10892             "movzbl  $dst, $dst\n\t"
10893     "done:" %}
10894   ins_encode(cmpl3_flag(src1, src2, dst));
10895   ins_pipe(pipe_slow);
10896 %}
10897 
10898 //----------Max and Min--------------------------------------------------------
10899 // Min Instructions
10900 
10901 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
10902 %{
10903   effect(USE_DEF dst, USE src, USE cr);
10904 
10905   format %{ "cmovlgt $dst, $src\t# min" %}
10906   opcode(0x0F, 0x4F);
10907   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10908   ins_pipe(pipe_cmov_reg);
10909 %}
10910 
10911 
10912 instruct minI_rReg(rRegI dst, rRegI src)
10913 %{
10914   match(Set dst (MinI dst src));
10915 
10916   ins_cost(200);
10917   expand %{
10918     rFlagsReg cr;
10919     compI_rReg(cr, dst, src);
10920     cmovI_reg_g(dst, src, cr);
10921   %}
10922 %}
10923 
10924 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
10925 %{
10926   effect(USE_DEF dst, USE src, USE cr);
10927 
10928   format %{ "cmovllt $dst, $src\t# max" %}
10929   opcode(0x0F, 0x4C);
10930   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10931   ins_pipe(pipe_cmov_reg);
10932 %}
10933 
10934 
10935 instruct maxI_rReg(rRegI dst, rRegI src)
10936 %{
10937   match(Set dst (MaxI dst src));
10938 
10939   ins_cost(200);
10940   expand %{
10941     rFlagsReg cr;
10942     compI_rReg(cr, dst, src);
10943     cmovI_reg_l(dst, src, cr);
10944   %}
10945 %}
10946 
10947 // ============================================================================
10948 // Branch Instructions
10949 
10950 // Jump Direct - Label defines a relative address from JMP+1
10951 instruct jmpDir(label labl)
10952 %{
10953   match(Goto);
10954   effect(USE labl);
10955 
10956   ins_cost(300);
10957   format %{ "jmp     $labl" %}
10958   size(5);
10959   ins_encode %{
10960     Label* L = $labl$$label;
10961     __ jmp(*L, false); // Always long jump
10962   %}
10963   ins_pipe(pipe_jmp);
10964 %}
10965 
10966 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10967 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
10968 %{
10969   match(If cop cr);
10970   effect(USE labl);
10971 
10972   ins_cost(300);
10973   format %{ "j$cop     $labl" %}
10974   size(6);
10975   ins_encode %{
10976     Label* L = $labl$$label;
10977     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10978   %}
10979   ins_pipe(pipe_jcc);
10980 %}
10981 
10982 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10983 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
10984 %{
10985   match(CountedLoopEnd cop cr);
10986   effect(USE labl);
10987 
10988   ins_cost(300);
10989   format %{ "j$cop     $labl\t# loop end" %}
10990   size(6);
10991   ins_encode %{
10992     Label* L = $labl$$label;
10993     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10994   %}
10995   ins_pipe(pipe_jcc);
10996 %}
10997 
10998 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10999 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11000   match(CountedLoopEnd cop cmp);
11001   effect(USE labl);
11002 
11003   ins_cost(300);
11004   format %{ "j$cop,u   $labl\t# loop end" %}
11005   size(6);
11006   ins_encode %{
11007     Label* L = $labl$$label;
11008     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11009   %}
11010   ins_pipe(pipe_jcc);
11011 %}
11012 
11013 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11014   match(CountedLoopEnd cop cmp);
11015   effect(USE labl);
11016 
11017   ins_cost(200);
11018   format %{ "j$cop,u   $labl\t# loop end" %}
11019   size(6);
11020   ins_encode %{
11021     Label* L = $labl$$label;
11022     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11023   %}
11024   ins_pipe(pipe_jcc);
11025 %}
11026 
11027 // Jump Direct Conditional - using unsigned comparison
11028 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11029   match(If cop cmp);
11030   effect(USE labl);
11031 
11032   ins_cost(300);
11033   format %{ "j$cop,u  $labl" %}
11034   size(6);
11035   ins_encode %{
11036     Label* L = $labl$$label;
11037     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11038   %}
11039   ins_pipe(pipe_jcc);
11040 %}
11041 
11042 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11043   match(If cop cmp);
11044   effect(USE labl);
11045 
11046   ins_cost(200);
11047   format %{ "j$cop,u  $labl" %}
11048   size(6);
11049   ins_encode %{
11050     Label* L = $labl$$label;
11051     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11052   %}
11053   ins_pipe(pipe_jcc);
11054 %}
11055 
11056 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11057   match(If cop cmp);
11058   effect(USE labl);
11059 
11060   ins_cost(200);
11061   format %{ $$template
11062     if ($cop$$cmpcode == Assembler::notEqual) {
11063       $$emit$$"jp,u   $labl\n\t"
11064       $$emit$$"j$cop,u   $labl"
11065     } else {
11066       $$emit$$"jp,u   done\n\t"
11067       $$emit$$"j$cop,u   $labl\n\t"
11068       $$emit$$"done:"
11069     }
11070   %}
11071   ins_encode %{
11072     Label* l = $labl$$label;
11073     if ($cop$$cmpcode == Assembler::notEqual) {
11074       __ jcc(Assembler::parity, *l, false);
11075       __ jcc(Assembler::notEqual, *l, false);
11076     } else if ($cop$$cmpcode == Assembler::equal) {
11077       Label done;
11078       __ jccb(Assembler::parity, done);
11079       __ jcc(Assembler::equal, *l, false);
11080       __ bind(done);
11081     } else {
11082        ShouldNotReachHere();
11083     }
11084   %}
11085   ins_pipe(pipe_jcc);
11086 %}
11087 
11088 // ============================================================================
11089 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11090 // superklass array for an instance of the superklass.  Set a hidden
11091 // internal cache on a hit (cache is checked with exposed code in
11092 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11093 // encoding ALSO sets flags.
11094 
11095 instruct partialSubtypeCheck(rdi_RegP result,
11096                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11097                              rFlagsReg cr)
11098 %{
11099   match(Set result (PartialSubtypeCheck sub super));
11100   effect(KILL rcx, KILL cr);
11101 
11102   ins_cost(1100);  // slightly larger than the next version
11103   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11104             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11105             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11106             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11107             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11108             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11109             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11110     "miss:\t" %}
11111 
11112   opcode(0x1); // Force a XOR of RDI
11113   ins_encode(enc_PartialSubtypeCheck());
11114   ins_pipe(pipe_slow);
11115 %}
11116 
11117 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11118                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11119                                      immP0 zero,
11120                                      rdi_RegP result)
11121 %{
11122   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11123   effect(KILL rcx, KILL result);
11124 
11125   ins_cost(1000);
11126   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11127             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11128             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11129             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11130             "jne,s   miss\t\t# Missed: flags nz\n\t"
11131             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11132     "miss:\t" %}
11133 
11134   opcode(0x0); // No need to XOR RDI
11135   ins_encode(enc_PartialSubtypeCheck());
11136   ins_pipe(pipe_slow);
11137 %}
11138 
11139 // ============================================================================
11140 // Branch Instructions -- short offset versions
11141 //
11142 // These instructions are used to replace jumps of a long offset (the default
11143 // match) with jumps of a shorter offset.  These instructions are all tagged
11144 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11145 // match rules in general matching.  Instead, the ADLC generates a conversion
11146 // method in the MachNode which can be used to do in-place replacement of the
11147 // long variant with the shorter variant.  The compiler will determine if a
11148 // branch can be taken by the is_short_branch_offset() predicate in the machine
11149 // specific code section of the file.
11150 
11151 // Jump Direct - Label defines a relative address from JMP+1
11152 instruct jmpDir_short(label labl) %{
11153   match(Goto);
11154   effect(USE labl);
11155 
11156   ins_cost(300);
11157   format %{ "jmp,s   $labl" %}
11158   size(2);
11159   ins_encode %{
11160     Label* L = $labl$$label;
11161     __ jmpb(*L);
11162   %}
11163   ins_pipe(pipe_jmp);
11164   ins_short_branch(1);
11165 %}
11166 
11167 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11168 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11169   match(If cop cr);
11170   effect(USE labl);
11171 
11172   ins_cost(300);
11173   format %{ "j$cop,s   $labl" %}
11174   size(2);
11175   ins_encode %{
11176     Label* L = $labl$$label;
11177     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11178   %}
11179   ins_pipe(pipe_jcc);
11180   ins_short_branch(1);
11181 %}
11182 
11183 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11184 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11185   match(CountedLoopEnd cop cr);
11186   effect(USE labl);
11187 
11188   ins_cost(300);
11189   format %{ "j$cop,s   $labl\t# loop end" %}
11190   size(2);
11191   ins_encode %{
11192     Label* L = $labl$$label;
11193     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11194   %}
11195   ins_pipe(pipe_jcc);
11196   ins_short_branch(1);
11197 %}
11198 
11199 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11200 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11201   match(CountedLoopEnd cop cmp);
11202   effect(USE labl);
11203 
11204   ins_cost(300);
11205   format %{ "j$cop,us  $labl\t# loop end" %}
11206   size(2);
11207   ins_encode %{
11208     Label* L = $labl$$label;
11209     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11210   %}
11211   ins_pipe(pipe_jcc);
11212   ins_short_branch(1);
11213 %}
11214 
11215 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11216   match(CountedLoopEnd cop cmp);
11217   effect(USE labl);
11218 
11219   ins_cost(300);
11220   format %{ "j$cop,us  $labl\t# loop end" %}
11221   size(2);
11222   ins_encode %{
11223     Label* L = $labl$$label;
11224     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11225   %}
11226   ins_pipe(pipe_jcc);
11227   ins_short_branch(1);
11228 %}
11229 
11230 // Jump Direct Conditional - using unsigned comparison
11231 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11232   match(If cop cmp);
11233   effect(USE labl);
11234 
11235   ins_cost(300);
11236   format %{ "j$cop,us  $labl" %}
11237   size(2);
11238   ins_encode %{
11239     Label* L = $labl$$label;
11240     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11241   %}
11242   ins_pipe(pipe_jcc);
11243   ins_short_branch(1);
11244 %}
11245 
11246 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11247   match(If cop cmp);
11248   effect(USE labl);
11249 
11250   ins_cost(300);
11251   format %{ "j$cop,us  $labl" %}
11252   size(2);
11253   ins_encode %{
11254     Label* L = $labl$$label;
11255     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11256   %}
11257   ins_pipe(pipe_jcc);
11258   ins_short_branch(1);
11259 %}
11260 
11261 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11262   match(If cop cmp);
11263   effect(USE labl);
11264 
11265   ins_cost(300);
11266   format %{ $$template
11267     if ($cop$$cmpcode == Assembler::notEqual) {
11268       $$emit$$"jp,u,s   $labl\n\t"
11269       $$emit$$"j$cop,u,s   $labl"
11270     } else {
11271       $$emit$$"jp,u,s   done\n\t"
11272       $$emit$$"j$cop,u,s  $labl\n\t"
11273       $$emit$$"done:"
11274     }
11275   %}
11276   size(4);
11277   ins_encode %{
11278     Label* l = $labl$$label;
11279     if ($cop$$cmpcode == Assembler::notEqual) {
11280       __ jccb(Assembler::parity, *l);
11281       __ jccb(Assembler::notEqual, *l);
11282     } else if ($cop$$cmpcode == Assembler::equal) {
11283       Label done;
11284       __ jccb(Assembler::parity, done);
11285       __ jccb(Assembler::equal, *l);
11286       __ bind(done);
11287     } else {
11288        ShouldNotReachHere();
11289     }
11290   %}
11291   ins_pipe(pipe_jcc);
11292   ins_short_branch(1);
11293 %}
11294 
11295 // ============================================================================
11296 // inlined locking and unlocking
11297 
11298 instruct cmpFastLock(rFlagsReg cr,
11299                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11300 %{
11301   match(Set cr (FastLock object box));
11302   effect(TEMP tmp, TEMP scr, USE_KILL box);
11303 
11304   ins_cost(300);
11305   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11306   ins_encode(Fast_Lock(object, box, tmp, scr));
11307   ins_pipe(pipe_slow);
11308 %}
11309 
11310 instruct cmpFastUnlock(rFlagsReg cr,
11311                        rRegP object, rax_RegP box, rRegP tmp)
11312 %{
11313   match(Set cr (FastUnlock object box));
11314   effect(TEMP tmp, USE_KILL box);
11315 
11316   ins_cost(300);
11317   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11318   ins_encode(Fast_Unlock(object, box, tmp));
11319   ins_pipe(pipe_slow);
11320 %}
11321 
11322 
11323 // ============================================================================
11324 // Safepoint Instructions
11325 instruct safePoint_poll(rFlagsReg cr)
11326 %{
11327   predicate(!Assembler::is_polling_page_far());
11328   match(SafePoint);
11329   effect(KILL cr);
11330 
11331   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11332             "# Safepoint: poll for GC" %}
11333   ins_cost(125);
11334   ins_encode %{
11335     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11336     __ testl(rax, addr);
11337   %}
11338   ins_pipe(ialu_reg_mem);
11339 %}
11340 
11341 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11342 %{
11343   predicate(Assembler::is_polling_page_far());
11344   match(SafePoint poll);
11345   effect(KILL cr, USE poll);
11346 
11347   format %{ "testl  rax, [$poll]\t"
11348             "# Safepoint: poll for GC" %}
11349   ins_cost(125);
11350   ins_encode %{
11351     __ relocate(relocInfo::poll_type);
11352     __ testl(rax, Address($poll$$Register, 0));
11353   %}
11354   ins_pipe(ialu_reg_mem);
11355 %}
11356 
11357 // ============================================================================
11358 // Procedure Call/Return Instructions
11359 // Call Java Static Instruction
11360 // Note: If this code changes, the corresponding ret_addr_offset() and
11361 //       compute_padding() functions will have to be adjusted.
11362 instruct CallStaticJavaDirect(method meth) %{
11363   match(CallStaticJava);
11364   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11365   effect(USE meth);
11366 
11367   ins_cost(300);
11368   format %{ "call,static " %}
11369   opcode(0xE8); /* E8 cd */
11370   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
11371   ins_pipe(pipe_slow);
11372   ins_alignment(4);
11373 %}
11374 
11375 // Call Java Static Instruction (method handle version)
11376 // Note: If this code changes, the corresponding ret_addr_offset() and
11377 //       compute_padding() functions will have to be adjusted.
11378 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11379   match(CallStaticJava);
11380   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11381   effect(USE meth);
11382   // RBP is saved by all callees (for interpreter stack correction).
11383   // We use it here for a similar purpose, in {preserve,restore}_SP.
11384 
11385   ins_cost(300);
11386   format %{ "call,static/MethodHandle " %}
11387   opcode(0xE8); /* E8 cd */
11388   ins_encode(clear_avx, preserve_SP,
11389              Java_Static_Call(meth),
11390              restore_SP,
11391              call_epilog);
11392   ins_pipe(pipe_slow);
11393   ins_alignment(4);
11394 %}
11395 
11396 // Call Java Dynamic Instruction
11397 // Note: If this code changes, the corresponding ret_addr_offset() and
11398 //       compute_padding() functions will have to be adjusted.
11399 instruct CallDynamicJavaDirect(method meth)
11400 %{
11401   match(CallDynamicJava);
11402   effect(USE meth);
11403 
11404   ins_cost(300);
11405   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11406             "call,dynamic " %}
11407   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
11408   ins_pipe(pipe_slow);
11409   ins_alignment(4);
11410 %}
11411 
11412 // Call Runtime Instruction
11413 instruct CallRuntimeDirect(method meth)
11414 %{
11415   match(CallRuntime);
11416   effect(USE meth);
11417 
11418   ins_cost(300);
11419   format %{ "call,runtime " %}
11420   ins_encode(clear_avx, Java_To_Runtime(meth));
11421   ins_pipe(pipe_slow);
11422 %}
11423 
11424 // Call runtime without safepoint
11425 instruct CallLeafDirect(method meth)
11426 %{
11427   match(CallLeaf);
11428   effect(USE meth);
11429 
11430   ins_cost(300);
11431   format %{ "call_leaf,runtime " %}
11432   ins_encode(clear_avx, Java_To_Runtime(meth));
11433   ins_pipe(pipe_slow);
11434 %}
11435 
11436 // Call runtime without safepoint
11437 instruct CallLeafNoFPDirect(method meth)
11438 %{
11439   match(CallLeafNoFP);
11440   effect(USE meth);
11441 
11442   ins_cost(300);
11443   format %{ "call_leaf_nofp,runtime " %}
11444   ins_encode(Java_To_Runtime(meth));
11445   ins_pipe(pipe_slow);
11446 %}
11447 
11448 // Return Instruction
11449 // Remove the return address & jump to it.
11450 // Notice: We always emit a nop after a ret to make sure there is room
11451 // for safepoint patching
11452 instruct Ret()
11453 %{
11454   match(Return);
11455 
11456   format %{ "ret" %}
11457   opcode(0xC3);
11458   ins_encode(OpcP);
11459   ins_pipe(pipe_jmp);
11460 %}
11461 
11462 // Tail Call; Jump from runtime stub to Java code.
11463 // Also known as an 'interprocedural jump'.
11464 // Target of jump will eventually return to caller.
11465 // TailJump below removes the return address.
11466 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11467 %{
11468   match(TailCall jump_target method_oop);
11469 
11470   ins_cost(300);
11471   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11472   opcode(0xFF, 0x4); /* Opcode FF /4 */
11473   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11474   ins_pipe(pipe_jmp);
11475 %}
11476 
11477 // Tail Jump; remove the return address; jump to target.
11478 // TailCall above leaves the return address around.
11479 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11480 %{
11481   match(TailJump jump_target ex_oop);
11482 
11483   ins_cost(300);
11484   format %{ "popq    rdx\t# pop return address\n\t"
11485             "jmp     $jump_target" %}
11486   opcode(0xFF, 0x4); /* Opcode FF /4 */
11487   ins_encode(Opcode(0x5a), // popq rdx
11488              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11489   ins_pipe(pipe_jmp);
11490 %}
11491 
11492 // Create exception oop: created by stack-crawling runtime code.
11493 // Created exception is now available to this handler, and is setup
11494 // just prior to jumping to this handler.  No code emitted.
11495 instruct CreateException(rax_RegP ex_oop)
11496 %{
11497   match(Set ex_oop (CreateEx));
11498 
11499   size(0);
11500   // use the following format syntax
11501   format %{ "# exception oop is in rax; no code emitted" %}
11502   ins_encode();
11503   ins_pipe(empty);
11504 %}
11505 
11506 // Rethrow exception:
11507 // The exception oop will come in the first argument position.
11508 // Then JUMP (not call) to the rethrow stub code.
11509 instruct RethrowException()
11510 %{
11511   match(Rethrow);
11512 
11513   // use the following format syntax
11514   format %{ "jmp     rethrow_stub" %}
11515   ins_encode(enc_rethrow);
11516   ins_pipe(pipe_jmp);
11517 %}
11518 
11519 
11520 // ============================================================================
11521 // This name is KNOWN by the ADLC and cannot be changed.
11522 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
11523 // for this guy.
11524 instruct tlsLoadP(r15_RegP dst) %{
11525   match(Set dst (ThreadLocal));
11526   effect(DEF dst);
11527 
11528   size(0);
11529   format %{ "# TLS is in R15" %}
11530   ins_encode( /*empty encoding*/ );
11531   ins_pipe(ialu_reg_reg);
11532 %}
11533 
11534 
11535 //----------PEEPHOLE RULES-----------------------------------------------------
11536 // These must follow all instruction definitions as they use the names
11537 // defined in the instructions definitions.
11538 //
11539 // peepmatch ( root_instr_name [preceding_instruction]* );
11540 //
11541 // peepconstraint %{
11542 // (instruction_number.operand_name relational_op instruction_number.operand_name
11543 //  [, ...] );
11544 // // instruction numbers are zero-based using left to right order in peepmatch
11545 //
11546 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11547 // // provide an instruction_number.operand_name for each operand that appears
11548 // // in the replacement instruction's match rule
11549 //
11550 // ---------VM FLAGS---------------------------------------------------------
11551 //
11552 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11553 //
11554 // Each peephole rule is given an identifying number starting with zero and
11555 // increasing by one in the order seen by the parser.  An individual peephole
11556 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11557 // on the command-line.
11558 //
11559 // ---------CURRENT LIMITATIONS----------------------------------------------
11560 //
11561 // Only match adjacent instructions in same basic block
11562 // Only equality constraints
11563 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11564 // Only one replacement instruction
11565 //
11566 // ---------EXAMPLE----------------------------------------------------------
11567 //
11568 // // pertinent parts of existing instructions in architecture description
11569 // instruct movI(rRegI dst, rRegI src)
11570 // %{
11571 //   match(Set dst (CopyI src));
11572 // %}
11573 //
11574 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11575 // %{
11576 //   match(Set dst (AddI dst src));
11577 //   effect(KILL cr);
11578 // %}
11579 //
11580 // // Change (inc mov) to lea
11581 // peephole %{
11582 //   // increment preceeded by register-register move
11583 //   peepmatch ( incI_rReg movI );
11584 //   // require that the destination register of the increment
11585 //   // match the destination register of the move
11586 //   peepconstraint ( 0.dst == 1.dst );
11587 //   // construct a replacement instruction that sets
11588 //   // the destination to ( move's source register + one )
11589 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11590 // %}
11591 //
11592 
11593 // Implementation no longer uses movX instructions since
11594 // machine-independent system no longer uses CopyX nodes.
11595 //
11596 // peephole
11597 // %{
11598 //   peepmatch (incI_rReg movI);
11599 //   peepconstraint (0.dst == 1.dst);
11600 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11601 // %}
11602 
11603 // peephole
11604 // %{
11605 //   peepmatch (decI_rReg movI);
11606 //   peepconstraint (0.dst == 1.dst);
11607 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11608 // %}
11609 
11610 // peephole
11611 // %{
11612 //   peepmatch (addI_rReg_imm movI);
11613 //   peepconstraint (0.dst == 1.dst);
11614 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11615 // %}
11616 
11617 // peephole
11618 // %{
11619 //   peepmatch (incL_rReg movL);
11620 //   peepconstraint (0.dst == 1.dst);
11621 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11622 // %}
11623 
11624 // peephole
11625 // %{
11626 //   peepmatch (decL_rReg movL);
11627 //   peepconstraint (0.dst == 1.dst);
11628 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11629 // %}
11630 
11631 // peephole
11632 // %{
11633 //   peepmatch (addL_rReg_imm movL);
11634 //   peepconstraint (0.dst == 1.dst);
11635 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11636 // %}
11637 
11638 // peephole
11639 // %{
11640 //   peepmatch (addP_rReg_imm movP);
11641 //   peepconstraint (0.dst == 1.dst);
11642 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11643 // %}
11644 
11645 // // Change load of spilled value to only a spill
11646 // instruct storeI(memory mem, rRegI src)
11647 // %{
11648 //   match(Set mem (StoreI mem src));
11649 // %}
11650 //
11651 // instruct loadI(rRegI dst, memory mem)
11652 // %{
11653 //   match(Set dst (LoadI mem));
11654 // %}
11655 //
11656 
11657 peephole
11658 %{
11659   peepmatch (loadI storeI);
11660   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11661   peepreplace (storeI(1.mem 1.mem 1.src));
11662 %}
11663 
11664 peephole
11665 %{
11666   peepmatch (loadL storeL);
11667   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11668   peepreplace (storeL(1.mem 1.mem 1.src));
11669 %}
11670 
11671 //----------SMARTSPILL RULES---------------------------------------------------
11672 // These must follow all instruction definitions as they use the names
11673 // defined in the instructions definitions.