1 //
   2 // Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Class for all pointer registers (including RSP)
 170 reg_class any_reg(RAX, RAX_H,
 171                   RDX, RDX_H,
 172                   RBP, RBP_H,
 173                   RDI, RDI_H,
 174                   RSI, RSI_H,
 175                   RCX, RCX_H,
 176                   RBX, RBX_H,
 177                   RSP, RSP_H,
 178                   R8,  R8_H,
 179                   R9,  R9_H,
 180                   R10, R10_H,
 181                   R11, R11_H,
 182                   R12, R12_H,
 183                   R13, R13_H,
 184                   R14, R14_H,
 185                   R15, R15_H);
 186 
 187 // Class for all pointer registers except RSP
 188 reg_class ptr_reg(RAX, RAX_H,
 189                   RDX, RDX_H,
 190                   RBP, RBP_H,
 191                   RDI, RDI_H,
 192                   RSI, RSI_H,
 193                   RCX, RCX_H,
 194                   RBX, RBX_H,
 195                   R8,  R8_H,
 196                   R9,  R9_H,
 197                   R10, R10_H,
 198                   R11, R11_H,
 199                   R13, R13_H,
 200                   R14, R14_H);
 201 
 202 // Class for all pointer registers except RAX and RSP
 203 reg_class ptr_no_rax_reg(RDX, RDX_H,
 204                          RBP, RBP_H,
 205                          RDI, RDI_H,
 206                          RSI, RSI_H,
 207                          RCX, RCX_H,
 208                          RBX, RBX_H,
 209                          R8,  R8_H,
 210                          R9,  R9_H,
 211                          R10, R10_H,
 212                          R11, R11_H,
 213                          R13, R13_H,
 214                          R14, R14_H);
 215 
 216 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 217                          RAX, RAX_H,
 218                          RDI, RDI_H,
 219                          RSI, RSI_H,
 220                          RCX, RCX_H,
 221                          RBX, RBX_H,
 222                          R8,  R8_H,
 223                          R9,  R9_H,
 224                          R10, R10_H,
 225                          R11, R11_H,
 226                          R13, R13_H,
 227                          R14, R14_H);
 228 
 229 // Class for all pointer registers except RAX, RBX and RSP
 230 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 231                              RBP, RBP_H,
 232                              RDI, RDI_H,
 233                              RSI, RSI_H,
 234                              RCX, RCX_H,
 235                              R8,  R8_H,
 236                              R9,  R9_H,
 237                              R10, R10_H,
 238                              R11, R11_H,
 239                              R13, R13_H,
 240                              R14, R14_H);
 241 
 242 // Singleton class for RAX pointer register
 243 reg_class ptr_rax_reg(RAX, RAX_H);
 244 
 245 // Singleton class for RBX pointer register
 246 reg_class ptr_rbx_reg(RBX, RBX_H);
 247 
 248 // Singleton class for RSI pointer register
 249 reg_class ptr_rsi_reg(RSI, RSI_H);
 250 
 251 // Singleton class for RDI pointer register
 252 reg_class ptr_rdi_reg(RDI, RDI_H);
 253 
 254 // Singleton class for RBP pointer register
 255 reg_class ptr_rbp_reg(RBP, RBP_H);
 256 
 257 // Singleton class for stack pointer
 258 reg_class ptr_rsp_reg(RSP, RSP_H);
 259 
 260 // Singleton class for TLS pointer
 261 reg_class ptr_r15_reg(R15, R15_H);
 262 
 263 // Class for all long registers (except RSP)
 264 reg_class long_reg(RAX, RAX_H,
 265                    RDX, RDX_H,
 266                    RBP, RBP_H,
 267                    RDI, RDI_H,
 268                    RSI, RSI_H,
 269                    RCX, RCX_H,
 270                    RBX, RBX_H,
 271                    R8,  R8_H,
 272                    R9,  R9_H,
 273                    R10, R10_H,
 274                    R11, R11_H,
 275                    R13, R13_H,
 276                    R14, R14_H);
 277 
 278 // Class for all long registers except RAX, RDX (and RSP)
 279 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 280                               RDI, RDI_H,
 281                               RSI, RSI_H,
 282                               RCX, RCX_H,
 283                               RBX, RBX_H,
 284                               R8,  R8_H,
 285                               R9,  R9_H,
 286                               R10, R10_H,
 287                               R11, R11_H,
 288                               R13, R13_H,
 289                               R14, R14_H);
 290 
 291 // Class for all long registers except RCX (and RSP)
 292 reg_class long_no_rcx_reg(RBP, RBP_H,
 293                           RDI, RDI_H,
 294                           RSI, RSI_H,
 295                           RAX, RAX_H,
 296                           RDX, RDX_H,
 297                           RBX, RBX_H,
 298                           R8,  R8_H,
 299                           R9,  R9_H,
 300                           R10, R10_H,
 301                           R11, R11_H,
 302                           R13, R13_H,
 303                           R14, R14_H);
 304 
 305 // Class for all long registers except RAX (and RSP)
 306 reg_class long_no_rax_reg(RBP, RBP_H,
 307                           RDX, RDX_H,
 308                           RDI, RDI_H,
 309                           RSI, RSI_H,
 310                           RCX, RCX_H,
 311                           RBX, RBX_H,
 312                           R8,  R8_H,
 313                           R9,  R9_H,
 314                           R10, R10_H,
 315                           R11, R11_H,
 316                           R13, R13_H,
 317                           R14, R14_H);
 318 
 319 // Singleton class for RAX long register
 320 reg_class long_rax_reg(RAX, RAX_H);
 321 
 322 // Singleton class for RCX long register
 323 reg_class long_rcx_reg(RCX, RCX_H);
 324 
 325 // Singleton class for RDX long register
 326 reg_class long_rdx_reg(RDX, RDX_H);
 327 
 328 // Class for all int registers (except RSP)
 329 reg_class int_reg(RAX,
 330                   RDX,
 331                   RBP,
 332                   RDI,
 333                   RSI,
 334                   RCX,
 335                   RBX,
 336                   R8,
 337                   R9,
 338                   R10,
 339                   R11,
 340                   R13,
 341                   R14);
 342 
 343 // Class for all int registers except RCX (and RSP)
 344 reg_class int_no_rcx_reg(RAX,
 345                          RDX,
 346                          RBP,
 347                          RDI,
 348                          RSI,
 349                          RBX,
 350                          R8,
 351                          R9,
 352                          R10,
 353                          R11,
 354                          R13,
 355                          R14);
 356 
 357 // Class for all int registers except RAX, RDX (and RSP)
 358 reg_class int_no_rax_rdx_reg(RBP,
 359                              RDI,
 360                              RSI,
 361                              RCX,
 362                              RBX,
 363                              R8,
 364                              R9,
 365                              R10,
 366                              R11,
 367                              R13,
 368                              R14);
 369 
 370 // Singleton class for RAX int register
 371 reg_class int_rax_reg(RAX);
 372 
 373 // Singleton class for RBX int register
 374 reg_class int_rbx_reg(RBX);
 375 
 376 // Singleton class for RCX int register
 377 reg_class int_rcx_reg(RCX);
 378 
 379 // Singleton class for RCX int register
 380 reg_class int_rdx_reg(RDX);
 381 
 382 // Singleton class for RCX int register
 383 reg_class int_rdi_reg(RDI);
 384 
 385 // Singleton class for instruction pointer
 386 // reg_class ip_reg(RIP);
 387 
 388 %}
 389 
 390 //----------SOURCE BLOCK-------------------------------------------------------
 391 // This is a block of C++ code which provides values, functions, and
 392 // definitions necessary in the rest of the architecture description
 393 source %{
 394 #define   RELOC_IMM64    Assembler::imm_operand
 395 #define   RELOC_DISP32   Assembler::disp32_operand
 396 
 397 #define __ _masm.
 398 
 399 static int preserve_SP_size() {
 400   return 3;  // rex.w, op, rm(reg/reg)
 401 }
 402 static int clear_avx_size() {
 403   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 404 }
 405 
 406 // !!!!! Special hack to get all types of calls to specify the byte offset
 407 //       from the start of the call to the point where the return address
 408 //       will point.
 409 int MachCallStaticJavaNode::ret_addr_offset()
 410 {
 411   int offset = 5; // 5 bytes from start of call to where return address points
 412   offset += clear_avx_size();
 413   if (_method_handle_invoke)
 414     offset += preserve_SP_size();
 415   return offset;
 416 }
 417 
 418 int MachCallDynamicJavaNode::ret_addr_offset()
 419 {
 420   int offset = 15; // 15 bytes from start of call to where return address points
 421   offset += clear_avx_size();
 422   return offset;
 423 }
 424 
 425 int MachCallRuntimeNode::ret_addr_offset() {
 426   int offset = 13; // movq r10,#addr; callq (r10)
 427   offset += clear_avx_size();
 428   return offset;
 429 }
 430 
 431 // Indicate if the safepoint node needs the polling page as an input,
 432 // it does if the polling page is more than disp32 away.
 433 bool SafePointNode::needs_polling_address_input()
 434 {
 435   return Assembler::is_polling_page_far();
 436 }
 437 
 438 //
 439 // Compute padding required for nodes which need alignment
 440 //
 441 
 442 // The address of the call instruction needs to be 4-byte aligned to
 443 // ensure that it does not span a cache line so that it can be patched.
 444 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 445 {
 446   current_offset += clear_avx_size(); // skip vzeroupper
 447   current_offset += 1; // skip call opcode byte
 448   return round_to(current_offset, alignment_required()) - current_offset;
 449 }
 450 
 451 // The address of the call instruction needs to be 4-byte aligned to
 452 // ensure that it does not span a cache line so that it can be patched.
 453 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 454 {
 455   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 456   current_offset += clear_avx_size(); // skip vzeroupper
 457   current_offset += 1; // skip call opcode byte
 458   return round_to(current_offset, alignment_required()) - current_offset;
 459 }
 460 
 461 // The address of the call instruction needs to be 4-byte aligned to
 462 // ensure that it does not span a cache line so that it can be patched.
 463 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 464 {
 465   current_offset += clear_avx_size(); // skip vzeroupper
 466   current_offset += 11; // skip movq instruction + call opcode byte
 467   return round_to(current_offset, alignment_required()) - current_offset;
 468 }
 469 
 470 // EMIT_RM()
 471 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 472   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 473   cbuf.insts()->emit_int8(c);
 474 }
 475 
 476 // EMIT_CC()
 477 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 478   unsigned char c = (unsigned char) (f1 | f2);
 479   cbuf.insts()->emit_int8(c);
 480 }
 481 
 482 // EMIT_OPCODE()
 483 void emit_opcode(CodeBuffer &cbuf, int code) {
 484   cbuf.insts()->emit_int8((unsigned char) code);
 485 }
 486 
 487 // EMIT_OPCODE() w/ relocation information
 488 void emit_opcode(CodeBuffer &cbuf,
 489                  int code, relocInfo::relocType reloc, int offset, int format)
 490 {
 491   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 492   emit_opcode(cbuf, code);
 493 }
 494 
 495 // EMIT_D8()
 496 void emit_d8(CodeBuffer &cbuf, int d8) {
 497   cbuf.insts()->emit_int8((unsigned char) d8);
 498 }
 499 
 500 // EMIT_D16()
 501 void emit_d16(CodeBuffer &cbuf, int d16) {
 502   cbuf.insts()->emit_int16(d16);
 503 }
 504 
 505 // EMIT_D32()
 506 void emit_d32(CodeBuffer &cbuf, int d32) {
 507   cbuf.insts()->emit_int32(d32);
 508 }
 509 
 510 // EMIT_D64()
 511 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 512   cbuf.insts()->emit_int64(d64);
 513 }
 514 
 515 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 516 void emit_d32_reloc(CodeBuffer& cbuf,
 517                     int d32,
 518                     relocInfo::relocType reloc,
 519                     int format)
 520 {
 521   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 522   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 523   cbuf.insts()->emit_int32(d32);
 524 }
 525 
 526 // emit 32 bit value and construct relocation entry from RelocationHolder
 527 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 528 #ifdef ASSERT
 529   if (rspec.reloc()->type() == relocInfo::oop_type &&
 530       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 531     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 532     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 533   }
 534 #endif
 535   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 536   cbuf.insts()->emit_int32(d32);
 537 }
 538 
 539 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 540   address next_ip = cbuf.insts_end() + 4;
 541   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 542                  external_word_Relocation::spec(addr),
 543                  RELOC_DISP32);
 544 }
 545 
 546 
 547 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 548 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 549   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 550   cbuf.insts()->emit_int64(d64);
 551 }
 552 
 553 // emit 64 bit value and construct relocation entry from RelocationHolder
 554 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 555 #ifdef ASSERT
 556   if (rspec.reloc()->type() == relocInfo::oop_type &&
 557       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 558     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 559     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 560            "cannot embed scavengable oops in code");
 561   }
 562 #endif
 563   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 564   cbuf.insts()->emit_int64(d64);
 565 }
 566 
 567 // Access stack slot for load or store
 568 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 569 {
 570   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 571   if (-0x80 <= disp && disp < 0x80) {
 572     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 573     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 574     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 575   } else {
 576     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 577     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 578     emit_d32(cbuf, disp);     // Displacement // R/M byte
 579   }
 580 }
 581 
 582    // rRegI ereg, memory mem) %{    // emit_reg_mem
 583 void encode_RegMem(CodeBuffer &cbuf,
 584                    int reg,
 585                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 586 {
 587   assert(disp_reloc == relocInfo::none, "cannot have disp");
 588   int regenc = reg & 7;
 589   int baseenc = base & 7;
 590   int indexenc = index & 7;
 591 
 592   // There is no index & no scale, use form without SIB byte
 593   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 594     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 595     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 596       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 597     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 598       // If 8-bit displacement, mode 0x1
 599       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 600       emit_d8(cbuf, disp);
 601     } else {
 602       // If 32-bit displacement
 603       if (base == -1) { // Special flag for absolute address
 604         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 605         if (disp_reloc != relocInfo::none) {
 606           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 607         } else {
 608           emit_d32(cbuf, disp);
 609         }
 610       } else {
 611         // Normal base + offset
 612         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 613         if (disp_reloc != relocInfo::none) {
 614           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 615         } else {
 616           emit_d32(cbuf, disp);
 617         }
 618       }
 619     }
 620   } else {
 621     // Else, encode with the SIB byte
 622     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 623     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 624       // If no displacement
 625       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 626       emit_rm(cbuf, scale, indexenc, baseenc);
 627     } else {
 628       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 629         // If 8-bit displacement, mode 0x1
 630         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 631         emit_rm(cbuf, scale, indexenc, baseenc);
 632         emit_d8(cbuf, disp);
 633       } else {
 634         // If 32-bit displacement
 635         if (base == 0x04 ) {
 636           emit_rm(cbuf, 0x2, regenc, 0x4);
 637           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 638         } else {
 639           emit_rm(cbuf, 0x2, regenc, 0x4);
 640           emit_rm(cbuf, scale, indexenc, baseenc); // *
 641         }
 642         if (disp_reloc != relocInfo::none) {
 643           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 644         } else {
 645           emit_d32(cbuf, disp);
 646         }
 647       }
 648     }
 649   }
 650 }
 651 
 652 // This could be in MacroAssembler but it's fairly C2 specific
 653 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 654   Label exit;
 655   __ jccb(Assembler::noParity, exit);
 656   __ pushf();
 657   //
 658   // comiss/ucomiss instructions set ZF,PF,CF flags and
 659   // zero OF,AF,SF for NaN values.
 660   // Fixup flags by zeroing ZF,PF so that compare of NaN
 661   // values returns 'less than' result (CF is set).
 662   // Leave the rest of flags unchanged.
 663   //
 664   //    7 6 5 4 3 2 1 0
 665   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 666   //    0 0 1 0 1 0 1 1   (0x2B)
 667   //
 668   __ andq(Address(rsp, 0), 0xffffff2b);
 669   __ popf();
 670   __ bind(exit);
 671 }
 672 
 673 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 674   Label done;
 675   __ movl(dst, -1);
 676   __ jcc(Assembler::parity, done);
 677   __ jcc(Assembler::below, done);
 678   __ setb(Assembler::notEqual, dst);
 679   __ movzbl(dst, dst);
 680   __ bind(done);
 681 }
 682 
 683 
 684 //=============================================================================
 685 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 686 
 687 int Compile::ConstantTable::calculate_table_base_offset() const {
 688   return 0;  // absolute addressing, no offset
 689 }
 690 
 691 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 692   // Empty encoding
 693 }
 694 
 695 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 696   return 0;
 697 }
 698 
 699 #ifndef PRODUCT
 700 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 701   st->print("# MachConstantBaseNode (empty encoding)");
 702 }
 703 #endif
 704 
 705 
 706 //=============================================================================
 707 #ifndef PRODUCT
 708 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 709   Compile* C = ra_->C;
 710 
 711   int framesize = C->frame_slots() << LogBytesPerInt;
 712   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 713   // Remove wordSize for return addr which is already pushed.
 714   framesize -= wordSize;
 715 
 716   if (C->need_stack_bang(framesize)) {
 717     framesize -= wordSize;
 718     st->print("# stack bang");
 719     st->print("\n\t");
 720     st->print("pushq   rbp\t# Save rbp");
 721     if (framesize) {
 722       st->print("\n\t");
 723       st->print("subq    rsp, #%d\t# Create frame",framesize);
 724     }
 725   } else {
 726     st->print("subq    rsp, #%d\t# Create frame",framesize);
 727     st->print("\n\t");
 728     framesize -= wordSize;
 729     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 730   }
 731 
 732   if (VerifyStackAtCalls) {
 733     st->print("\n\t");
 734     framesize -= wordSize;
 735     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 736 #ifdef ASSERT
 737     st->print("\n\t");
 738     st->print("# stack alignment check");
 739 #endif
 740   }
 741   st->cr();
 742 }
 743 #endif
 744 
 745 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 746   Compile* C = ra_->C;
 747   MacroAssembler _masm(&cbuf);
 748 
 749   int framesize = C->frame_slots() << LogBytesPerInt;
 750 
 751   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 752 
 753   C->set_frame_complete(cbuf.insts_size());
 754 
 755   if (C->has_mach_constant_base_node()) {
 756     // NOTE: We set the table base offset here because users might be
 757     // emitted before MachConstantBaseNode.
 758     Compile::ConstantTable& constant_table = C->constant_table();
 759     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 760   }
 761 }
 762 
 763 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 764 {
 765   return MachNode::size(ra_); // too many variables; just compute it
 766                               // the hard way
 767 }
 768 
 769 int MachPrologNode::reloc() const
 770 {
 771   return 0; // a large enough number
 772 }
 773 
 774 //=============================================================================
 775 #ifndef PRODUCT
 776 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 777 {
 778   Compile* C = ra_->C;
 779   if (C->max_vector_size() > 16) {
 780     st->print("vzeroupper");
 781     st->cr(); st->print("\t");
 782   }
 783 
 784   int framesize = C->frame_slots() << LogBytesPerInt;
 785   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 786   // Remove word for return adr already pushed
 787   // and RBP
 788   framesize -= 2*wordSize;
 789 
 790   if (framesize) {
 791     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 792     st->print("\t");
 793   }
 794 
 795   st->print_cr("popq   rbp");
 796   if (do_polling() && C->is_method_compilation()) {
 797     st->print("\t");
 798     if (Assembler::is_polling_page_far()) {
 799       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 800                    "testl  rax, [rscratch1]\t"
 801                    "# Safepoint: poll for GC");
 802     } else {
 803       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 804                    "# Safepoint: poll for GC");
 805     }
 806   }
 807 }
 808 #endif
 809 
 810 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 811 {
 812   Compile* C = ra_->C;
 813   if (C->max_vector_size() > 16) {
 814     // Clear upper bits of YMM registers when current compiled code uses
 815     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 816     MacroAssembler _masm(&cbuf);
 817     __ vzeroupper();
 818   }
 819 
 820   int framesize = C->frame_slots() << LogBytesPerInt;
 821   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 822   // Remove word for return adr already pushed
 823   // and RBP
 824   framesize -= 2*wordSize;
 825 
 826   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 827 
 828   if (framesize) {
 829     emit_opcode(cbuf, Assembler::REX_W);
 830     if (framesize < 0x80) {
 831       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 832       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 833       emit_d8(cbuf, framesize);
 834     } else {
 835       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 836       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 837       emit_d32(cbuf, framesize);
 838     }
 839   }
 840 
 841   // popq rbp
 842   emit_opcode(cbuf, 0x58 | RBP_enc);
 843 
 844   if (do_polling() && C->is_method_compilation()) {
 845     MacroAssembler _masm(&cbuf);
 846     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 847     if (Assembler::is_polling_page_far()) {
 848       __ lea(rscratch1, polling_page);
 849       __ relocate(relocInfo::poll_return_type);
 850       __ testl(rax, Address(rscratch1, 0));
 851     } else {
 852       __ testl(rax, polling_page);
 853     }
 854   }
 855 }
 856 
 857 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 858 {
 859   return MachNode::size(ra_); // too many variables; just compute it
 860                               // the hard way
 861 }
 862 
 863 int MachEpilogNode::reloc() const
 864 {
 865   return 2; // a large enough number
 866 }
 867 
 868 const Pipeline* MachEpilogNode::pipeline() const
 869 {
 870   return MachNode::pipeline_class();
 871 }
 872 
 873 int MachEpilogNode::safepoint_offset() const
 874 {
 875   return 0;
 876 }
 877 
 878 //=============================================================================
 879 
 880 enum RC {
 881   rc_bad,
 882   rc_int,
 883   rc_float,
 884   rc_stack
 885 };
 886 
 887 static enum RC rc_class(OptoReg::Name reg)
 888 {
 889   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 890 
 891   if (OptoReg::is_stack(reg)) return rc_stack;
 892 
 893   VMReg r = OptoReg::as_VMReg(reg);
 894 
 895   if (r->is_Register()) return rc_int;
 896 
 897   assert(r->is_XMMRegister(), "must be");
 898   return rc_float;
 899 }
 900 
 901 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 902 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 903                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 904 
 905 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 906                             int stack_offset, int reg, uint ireg, outputStream* st);
 907 
 908 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 909                                       int dst_offset, uint ireg, outputStream* st) {
 910   if (cbuf) {
 911     MacroAssembler _masm(cbuf);
 912     switch (ireg) {
 913     case Op_VecS:
 914       __ movq(Address(rsp, -8), rax);
 915       __ movl(rax, Address(rsp, src_offset));
 916       __ movl(Address(rsp, dst_offset), rax);
 917       __ movq(rax, Address(rsp, -8));
 918       break;
 919     case Op_VecD:
 920       __ pushq(Address(rsp, src_offset));
 921       __ popq (Address(rsp, dst_offset));
 922       break;
 923     case Op_VecX:
 924       __ pushq(Address(rsp, src_offset));
 925       __ popq (Address(rsp, dst_offset));
 926       __ pushq(Address(rsp, src_offset+8));
 927       __ popq (Address(rsp, dst_offset+8));
 928       break;
 929     case Op_VecY:
 930       __ vmovdqu(Address(rsp, -32), xmm0);
 931       __ vmovdqu(xmm0, Address(rsp, src_offset));
 932       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 933       __ vmovdqu(xmm0, Address(rsp, -32));
 934       break;
 935     default:
 936       ShouldNotReachHere();
 937     }
 938 #ifndef PRODUCT
 939   } else {
 940     switch (ireg) {
 941     case Op_VecS:
 942       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 943                 "movl    rax, [rsp + #%d]\n\t"
 944                 "movl    [rsp + #%d], rax\n\t"
 945                 "movq    rax, [rsp - #8]",
 946                 src_offset, dst_offset);
 947       break;
 948     case Op_VecD:
 949       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 950                 "popq    [rsp + #%d]",
 951                 src_offset, dst_offset);
 952       break;
 953      case Op_VecX:
 954       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 955                 "popq    [rsp + #%d]\n\t"
 956                 "pushq   [rsp + #%d]\n\t"
 957                 "popq    [rsp + #%d]",
 958                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 959       break;
 960     case Op_VecY:
 961       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 962                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 963                 "vmovdqu [rsp + #%d], xmm0\n\t"
 964                 "vmovdqu xmm0, [rsp - #32]",
 965                 src_offset, dst_offset);
 966       break;
 967     default:
 968       ShouldNotReachHere();
 969     }
 970 #endif
 971   }
 972 }
 973 
 974 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 975                                        PhaseRegAlloc* ra_,
 976                                        bool do_size,
 977                                        outputStream* st) const {
 978   assert(cbuf != NULL || st  != NULL, "sanity");
 979   // Get registers to move
 980   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 981   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 982   OptoReg::Name dst_second = ra_->get_reg_second(this);
 983   OptoReg::Name dst_first = ra_->get_reg_first(this);
 984 
 985   enum RC src_second_rc = rc_class(src_second);
 986   enum RC src_first_rc = rc_class(src_first);
 987   enum RC dst_second_rc = rc_class(dst_second);
 988   enum RC dst_first_rc = rc_class(dst_first);
 989 
 990   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 991          "must move at least 1 register" );
 992 
 993   if (src_first == dst_first && src_second == dst_second) {
 994     // Self copy, no move
 995     return 0;
 996   }
 997   if (bottom_type()->isa_vect() != NULL) {
 998     uint ireg = ideal_reg();
 999     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1000     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1001     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1002       // mem -> mem
1003       int src_offset = ra_->reg2offset(src_first);
1004       int dst_offset = ra_->reg2offset(dst_first);
1005       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1006     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1007       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1008     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1009       int stack_offset = ra_->reg2offset(dst_first);
1010       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1011     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1012       int stack_offset = ra_->reg2offset(src_first);
1013       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1014     } else {
1015       ShouldNotReachHere();
1016     }
1017     return 0;
1018   }
1019   if (src_first_rc == rc_stack) {
1020     // mem ->
1021     if (dst_first_rc == rc_stack) {
1022       // mem -> mem
1023       assert(src_second != dst_first, "overlap");
1024       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1025           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1026         // 64-bit
1027         int src_offset = ra_->reg2offset(src_first);
1028         int dst_offset = ra_->reg2offset(dst_first);
1029         if (cbuf) {
1030           MacroAssembler _masm(cbuf);
1031           __ pushq(Address(rsp, src_offset));
1032           __ popq (Address(rsp, dst_offset));
1033 #ifndef PRODUCT
1034         } else {
1035           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                     "popq    [rsp + #%d]",
1037                      src_offset, dst_offset);
1038 #endif
1039         }
1040       } else {
1041         // 32-bit
1042         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1043         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1044         // No pushl/popl, so:
1045         int src_offset = ra_->reg2offset(src_first);
1046         int dst_offset = ra_->reg2offset(dst_first);
1047         if (cbuf) {
1048           MacroAssembler _masm(cbuf);
1049           __ movq(Address(rsp, -8), rax);
1050           __ movl(rax, Address(rsp, src_offset));
1051           __ movl(Address(rsp, dst_offset), rax);
1052           __ movq(rax, Address(rsp, -8));
1053 #ifndef PRODUCT
1054         } else {
1055           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1056                     "movl    rax, [rsp + #%d]\n\t"
1057                     "movl    [rsp + #%d], rax\n\t"
1058                     "movq    rax, [rsp - #8]",
1059                      src_offset, dst_offset);
1060 #endif
1061         }
1062       }
1063       return 0;
1064     } else if (dst_first_rc == rc_int) {
1065       // mem -> gpr
1066       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1067           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1068         // 64-bit
1069         int offset = ra_->reg2offset(src_first);
1070         if (cbuf) {
1071           MacroAssembler _masm(cbuf);
1072           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1073 #ifndef PRODUCT
1074         } else {
1075           st->print("movq    %s, [rsp + #%d]\t# spill",
1076                      Matcher::regName[dst_first],
1077                      offset);
1078 #endif
1079         }
1080       } else {
1081         // 32-bit
1082         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1083         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1084         int offset = ra_->reg2offset(src_first);
1085         if (cbuf) {
1086           MacroAssembler _masm(cbuf);
1087           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1088 #ifndef PRODUCT
1089         } else {
1090           st->print("movl    %s, [rsp + #%d]\t# spill",
1091                      Matcher::regName[dst_first],
1092                      offset);
1093 #endif
1094         }
1095       }
1096       return 0;
1097     } else if (dst_first_rc == rc_float) {
1098       // mem-> xmm
1099       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1100           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1101         // 64-bit
1102         int offset = ra_->reg2offset(src_first);
1103         if (cbuf) {
1104           MacroAssembler _masm(cbuf);
1105           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1106 #ifndef PRODUCT
1107         } else {
1108           st->print("%s  %s, [rsp + #%d]\t# spill",
1109                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1110                      Matcher::regName[dst_first],
1111                      offset);
1112 #endif
1113         }
1114       } else {
1115         // 32-bit
1116         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1117         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1118         int offset = ra_->reg2offset(src_first);
1119         if (cbuf) {
1120           MacroAssembler _masm(cbuf);
1121           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1122 #ifndef PRODUCT
1123         } else {
1124           st->print("movss   %s, [rsp + #%d]\t# spill",
1125                      Matcher::regName[dst_first],
1126                      offset);
1127 #endif
1128         }
1129       }
1130       return 0;
1131     }
1132   } else if (src_first_rc == rc_int) {
1133     // gpr ->
1134     if (dst_first_rc == rc_stack) {
1135       // gpr -> mem
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int offset = ra_->reg2offset(dst_first);
1140         if (cbuf) {
1141           MacroAssembler _masm(cbuf);
1142           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1143 #ifndef PRODUCT
1144         } else {
1145           st->print("movq    [rsp + #%d], %s\t# spill",
1146                      offset,
1147                      Matcher::regName[src_first]);
1148 #endif
1149         }
1150       } else {
1151         // 32-bit
1152         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1153         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1154         int offset = ra_->reg2offset(dst_first);
1155         if (cbuf) {
1156           MacroAssembler _masm(cbuf);
1157           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1158 #ifndef PRODUCT
1159         } else {
1160           st->print("movl    [rsp + #%d], %s\t# spill",
1161                      offset,
1162                      Matcher::regName[src_first]);
1163 #endif
1164         }
1165       }
1166       return 0;
1167     } else if (dst_first_rc == rc_int) {
1168       // gpr -> gpr
1169       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1170           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1171         // 64-bit
1172         if (cbuf) {
1173           MacroAssembler _masm(cbuf);
1174           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1175                   as_Register(Matcher::_regEncode[src_first]));
1176 #ifndef PRODUCT
1177         } else {
1178           st->print("movq    %s, %s\t# spill",
1179                      Matcher::regName[dst_first],
1180                      Matcher::regName[src_first]);
1181 #endif
1182         }
1183         return 0;
1184       } else {
1185         // 32-bit
1186         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1187         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1188         if (cbuf) {
1189           MacroAssembler _masm(cbuf);
1190           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1191                   as_Register(Matcher::_regEncode[src_first]));
1192 #ifndef PRODUCT
1193         } else {
1194           st->print("movl    %s, %s\t# spill",
1195                      Matcher::regName[dst_first],
1196                      Matcher::regName[src_first]);
1197 #endif
1198         }
1199         return 0;
1200       }
1201     } else if (dst_first_rc == rc_float) {
1202       // gpr -> xmm
1203       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1204           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1205         // 64-bit
1206         if (cbuf) {
1207           MacroAssembler _masm(cbuf);
1208           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1209 #ifndef PRODUCT
1210         } else {
1211           st->print("movdq   %s, %s\t# spill",
1212                      Matcher::regName[dst_first],
1213                      Matcher::regName[src_first]);
1214 #endif
1215         }
1216       } else {
1217         // 32-bit
1218         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1219         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1220         if (cbuf) {
1221           MacroAssembler _masm(cbuf);
1222           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1223 #ifndef PRODUCT
1224         } else {
1225           st->print("movdl   %s, %s\t# spill",
1226                      Matcher::regName[dst_first],
1227                      Matcher::regName[src_first]);
1228 #endif
1229         }
1230       }
1231       return 0;
1232     }
1233   } else if (src_first_rc == rc_float) {
1234     // xmm ->
1235     if (dst_first_rc == rc_stack) {
1236       // xmm -> mem
1237       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1238           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1239         // 64-bit
1240         int offset = ra_->reg2offset(dst_first);
1241         if (cbuf) {
1242           MacroAssembler _masm(cbuf);
1243           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1244 #ifndef PRODUCT
1245         } else {
1246           st->print("movsd   [rsp + #%d], %s\t# spill",
1247                      offset,
1248                      Matcher::regName[src_first]);
1249 #endif
1250         }
1251       } else {
1252         // 32-bit
1253         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1254         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1255         int offset = ra_->reg2offset(dst_first);
1256         if (cbuf) {
1257           MacroAssembler _masm(cbuf);
1258           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1259 #ifndef PRODUCT
1260         } else {
1261           st->print("movss   [rsp + #%d], %s\t# spill",
1262                      offset,
1263                      Matcher::regName[src_first]);
1264 #endif
1265         }
1266       }
1267       return 0;
1268     } else if (dst_first_rc == rc_int) {
1269       // xmm -> gpr
1270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1272         // 64-bit
1273         if (cbuf) {
1274           MacroAssembler _masm(cbuf);
1275           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1276 #ifndef PRODUCT
1277         } else {
1278           st->print("movdq   %s, %s\t# spill",
1279                      Matcher::regName[dst_first],
1280                      Matcher::regName[src_first]);
1281 #endif
1282         }
1283       } else {
1284         // 32-bit
1285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1287         if (cbuf) {
1288           MacroAssembler _masm(cbuf);
1289           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1290 #ifndef PRODUCT
1291         } else {
1292           st->print("movdl   %s, %s\t# spill",
1293                      Matcher::regName[dst_first],
1294                      Matcher::regName[src_first]);
1295 #endif
1296         }
1297       }
1298       return 0;
1299     } else if (dst_first_rc == rc_float) {
1300       // xmm -> xmm
1301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1303         // 64-bit
1304         if (cbuf) {
1305           MacroAssembler _masm(cbuf);
1306           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1307 #ifndef PRODUCT
1308         } else {
1309           st->print("%s  %s, %s\t# spill",
1310                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1311                      Matcher::regName[dst_first],
1312                      Matcher::regName[src_first]);
1313 #endif
1314         }
1315       } else {
1316         // 32-bit
1317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1319         if (cbuf) {
1320           MacroAssembler _masm(cbuf);
1321           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1322 #ifndef PRODUCT
1323         } else {
1324           st->print("%s  %s, %s\t# spill",
1325                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1326                      Matcher::regName[dst_first],
1327                      Matcher::regName[src_first]);
1328 #endif
1329         }
1330       }
1331       return 0;
1332     }
1333   }
1334 
1335   assert(0," foo ");
1336   Unimplemented();
1337   return 0;
1338 }
1339 
1340 #ifndef PRODUCT
1341 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1342   implementation(NULL, ra_, false, st);
1343 }
1344 #endif
1345 
1346 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1347   implementation(&cbuf, ra_, false, NULL);
1348 }
1349 
1350 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1351   return MachNode::size(ra_);
1352 }
1353 
1354 //=============================================================================
1355 #ifndef PRODUCT
1356 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1357 {
1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359   int reg = ra_->get_reg_first(this);
1360   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1361             Matcher::regName[reg], offset);
1362 }
1363 #endif
1364 
1365 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1366 {
1367   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1368   int reg = ra_->get_encode(this);
1369   if (offset >= 0x80) {
1370     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1371     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1372     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1373     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1374     emit_d32(cbuf, offset);
1375   } else {
1376     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1377     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1378     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1379     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1380     emit_d8(cbuf, offset);
1381   }
1382 }
1383 
1384 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1385 {
1386   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1387   return (offset < 0x80) ? 5 : 8; // REX
1388 }
1389 
1390 //=============================================================================
1391 #ifndef PRODUCT
1392 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1393 {
1394   if (UseCompressedKlassPointers) {
1395     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1396     if (Universe::narrow_klass_shift() != 0) {
1397       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1398     }
1399     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1400   } else {
1401     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1402                  "# Inline cache check");
1403   }
1404   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1405   st->print_cr("\tnop\t# nops to align entry point");
1406 }
1407 #endif
1408 
1409 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1410 {
1411   MacroAssembler masm(&cbuf);
1412   uint insts_size = cbuf.insts_size();
1413   if (UseCompressedKlassPointers) {
1414     masm.load_klass(rscratch1, j_rarg0);
1415     masm.cmpptr(rax, rscratch1);
1416   } else {
1417     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1418   }
1419 
1420   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1421 
1422   /* WARNING these NOPs are critical so that verified entry point is properly
1423      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1424   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1425   if (OptoBreakpoint) {
1426     // Leave space for int3
1427     nops_cnt -= 1;
1428   }
1429   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1430   if (nops_cnt > 0)
1431     masm.nop(nops_cnt);
1432 }
1433 
1434 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1435 {
1436   return MachNode::size(ra_); // too many variables; just compute it
1437                               // the hard way
1438 }
1439 
1440 
1441 //=============================================================================
1442 uint size_exception_handler()
1443 {
1444   // NativeCall instruction size is the same as NativeJump.
1445   // Note that this value is also credited (in output.cpp) to
1446   // the size of the code section.
1447   return NativeJump::instruction_size;
1448 }
1449 
1450 // Emit exception handler code.
1451 int emit_exception_handler(CodeBuffer& cbuf)
1452 {
1453 
1454   // Note that the code buffer's insts_mark is always relative to insts.
1455   // That's why we must use the macroassembler to generate a handler.
1456   MacroAssembler _masm(&cbuf);
1457   address base =
1458   __ start_a_stub(size_exception_handler());
1459   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1460   int offset = __ offset();
1461   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1462   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1463   __ end_a_stub();
1464   return offset;
1465 }
1466 
1467 uint size_deopt_handler()
1468 {
1469   // three 5 byte instructions
1470   return 15;
1471 }
1472 
1473 // Emit deopt handler code.
1474 int emit_deopt_handler(CodeBuffer& cbuf)
1475 {
1476 
1477   // Note that the code buffer's insts_mark is always relative to insts.
1478   // That's why we must use the macroassembler to generate a handler.
1479   MacroAssembler _masm(&cbuf);
1480   address base =
1481   __ start_a_stub(size_deopt_handler());
1482   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1483   int offset = __ offset();
1484   address the_pc = (address) __ pc();
1485   Label next;
1486   // push a "the_pc" on the stack without destroying any registers
1487   // as they all may be live.
1488 
1489   // push address of "next"
1490   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1491   __ bind(next);
1492   // adjust it so it matches "the_pc"
1493   __ subptr(Address(rsp, 0), __ offset() - offset);
1494   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1495   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1496   __ end_a_stub();
1497   return offset;
1498 }
1499 
1500 int Matcher::regnum_to_fpu_offset(int regnum)
1501 {
1502   return regnum - 32; // The FP registers are in the second chunk
1503 }
1504 
1505 // This is UltraSparc specific, true just means we have fast l2f conversion
1506 const bool Matcher::convL2FSupported(void) {
1507   return true;
1508 }
1509 
1510 // Is this branch offset short enough that a short branch can be used?
1511 //
1512 // NOTE: If the platform does not provide any short branch variants, then
1513 //       this method should return false for offset 0.
1514 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1515   // The passed offset is relative to address of the branch.
1516   // On 86 a branch displacement is calculated relative to address
1517   // of a next instruction.
1518   offset -= br_size;
1519 
1520   // the short version of jmpConUCF2 contains multiple branches,
1521   // making the reach slightly less
1522   if (rule == jmpConUCF2_rule)
1523     return (-126 <= offset && offset <= 125);
1524   return (-128 <= offset && offset <= 127);
1525 }
1526 
1527 const bool Matcher::isSimpleConstant64(jlong value) {
1528   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1529   //return value == (int) value;  // Cf. storeImmL and immL32.
1530 
1531   // Probably always true, even if a temp register is required.
1532   return true;
1533 }
1534 
1535 // The ecx parameter to rep stosq for the ClearArray node is in words.
1536 const bool Matcher::init_array_count_is_in_bytes = false;
1537 
1538 // Threshold size for cleararray.
1539 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1540 
1541 // No additional cost for CMOVL.
1542 const int Matcher::long_cmove_cost() { return 0; }
1543 
1544 // No CMOVF/CMOVD with SSE2
1545 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1546 
1547 // Should the Matcher clone shifts on addressing modes, expecting them
1548 // to be subsumed into complex addressing expressions or compute them
1549 // into registers?  True for Intel but false for most RISCs
1550 const bool Matcher::clone_shift_expressions = true;
1551 
1552 // Do we need to mask the count passed to shift instructions or does
1553 // the cpu only look at the lower 5/6 bits anyway?
1554 const bool Matcher::need_masked_shift_count = false;
1555 
1556 bool Matcher::narrow_oop_use_complex_address() {
1557   assert(UseCompressedOops, "only for compressed oops code");
1558   return (LogMinObjAlignmentInBytes <= 3);
1559 }
1560 
1561 bool Matcher::narrow_klass_use_complex_address() {
1562   assert(UseCompressedKlassPointers, "only for compressed klass code");
1563   return (LogKlassAlignmentInBytes <= 3);
1564 }
1565 
1566 // Is it better to copy float constants, or load them directly from
1567 // memory?  Intel can load a float constant from a direct address,
1568 // requiring no extra registers.  Most RISCs will have to materialize
1569 // an address into a register first, so they would do better to copy
1570 // the constant from stack.
1571 const bool Matcher::rematerialize_float_constants = true; // XXX
1572 
1573 // If CPU can load and store mis-aligned doubles directly then no
1574 // fixup is needed.  Else we split the double into 2 integer pieces
1575 // and move it piece-by-piece.  Only happens when passing doubles into
1576 // C code as the Java calling convention forces doubles to be aligned.
1577 const bool Matcher::misaligned_doubles_ok = true;
1578 
1579 // No-op on amd64
1580 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1581 
1582 // Advertise here if the CPU requires explicit rounding operations to
1583 // implement the UseStrictFP mode.
1584 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1585 
1586 // Are floats conerted to double when stored to stack during deoptimization?
1587 // On x64 it is stored without convertion so we can use normal access.
1588 bool Matcher::float_in_double() { return false; }
1589 
1590 // Do ints take an entire long register or just half?
1591 const bool Matcher::int_in_long = true;
1592 
1593 // Return whether or not this register is ever used as an argument.
1594 // This function is used on startup to build the trampoline stubs in
1595 // generateOptoStub.  Registers not mentioned will be killed by the VM
1596 // call in the trampoline, and arguments in those registers not be
1597 // available to the callee.
1598 bool Matcher::can_be_java_arg(int reg)
1599 {
1600   return
1601     reg ==  RDI_num || reg == RDI_H_num ||
1602     reg ==  RSI_num || reg == RSI_H_num ||
1603     reg ==  RDX_num || reg == RDX_H_num ||
1604     reg ==  RCX_num || reg == RCX_H_num ||
1605     reg ==   R8_num || reg ==  R8_H_num ||
1606     reg ==   R9_num || reg ==  R9_H_num ||
1607     reg ==  R12_num || reg == R12_H_num ||
1608     reg == XMM0_num || reg == XMM0b_num ||
1609     reg == XMM1_num || reg == XMM1b_num ||
1610     reg == XMM2_num || reg == XMM2b_num ||
1611     reg == XMM3_num || reg == XMM3b_num ||
1612     reg == XMM4_num || reg == XMM4b_num ||
1613     reg == XMM5_num || reg == XMM5b_num ||
1614     reg == XMM6_num || reg == XMM6b_num ||
1615     reg == XMM7_num || reg == XMM7b_num;
1616 }
1617 
1618 bool Matcher::is_spillable_arg(int reg)
1619 {
1620   return can_be_java_arg(reg);
1621 }
1622 
1623 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1624   // In 64 bit mode a code which use multiply when
1625   // devisor is constant is faster than hardware
1626   // DIV instruction (it uses MulHiL).
1627   return false;
1628 }
1629 
1630 // Register for DIVI projection of divmodI
1631 RegMask Matcher::divI_proj_mask() {
1632   return INT_RAX_REG_mask();
1633 }
1634 
1635 // Register for MODI projection of divmodI
1636 RegMask Matcher::modI_proj_mask() {
1637   return INT_RDX_REG_mask();
1638 }
1639 
1640 // Register for DIVL projection of divmodL
1641 RegMask Matcher::divL_proj_mask() {
1642   return LONG_RAX_REG_mask();
1643 }
1644 
1645 // Register for MODL projection of divmodL
1646 RegMask Matcher::modL_proj_mask() {
1647   return LONG_RDX_REG_mask();
1648 }
1649 
1650 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1651   return PTR_RBP_REG_mask();
1652 }
1653 
1654 %}
1655 
1656 //----------ENCODING BLOCK-----------------------------------------------------
1657 // This block specifies the encoding classes used by the compiler to
1658 // output byte streams.  Encoding classes are parameterized macros
1659 // used by Machine Instruction Nodes in order to generate the bit
1660 // encoding of the instruction.  Operands specify their base encoding
1661 // interface with the interface keyword.  There are currently
1662 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1663 // COND_INTER.  REG_INTER causes an operand to generate a function
1664 // which returns its register number when queried.  CONST_INTER causes
1665 // an operand to generate a function which returns the value of the
1666 // constant when queried.  MEMORY_INTER causes an operand to generate
1667 // four functions which return the Base Register, the Index Register,
1668 // the Scale Value, and the Offset Value of the operand when queried.
1669 // COND_INTER causes an operand to generate six functions which return
1670 // the encoding code (ie - encoding bits for the instruction)
1671 // associated with each basic boolean condition for a conditional
1672 // instruction.
1673 //
1674 // Instructions specify two basic values for encoding.  Again, a
1675 // function is available to check if the constant displacement is an
1676 // oop. They use the ins_encode keyword to specify their encoding
1677 // classes (which must be a sequence of enc_class names, and their
1678 // parameters, specified in the encoding block), and they use the
1679 // opcode keyword to specify, in order, their primary, secondary, and
1680 // tertiary opcode.  Only the opcode sections which a particular
1681 // instruction needs for encoding need to be specified.
1682 encode %{
1683   // Build emit functions for each basic byte or larger field in the
1684   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1685   // from C++ code in the enc_class source block.  Emit functions will
1686   // live in the main source block for now.  In future, we can
1687   // generalize this by adding a syntax that specifies the sizes of
1688   // fields in an order, so that the adlc can build the emit functions
1689   // automagically
1690 
1691   // Emit primary opcode
1692   enc_class OpcP
1693   %{
1694     emit_opcode(cbuf, $primary);
1695   %}
1696 
1697   // Emit secondary opcode
1698   enc_class OpcS
1699   %{
1700     emit_opcode(cbuf, $secondary);
1701   %}
1702 
1703   // Emit tertiary opcode
1704   enc_class OpcT
1705   %{
1706     emit_opcode(cbuf, $tertiary);
1707   %}
1708 
1709   // Emit opcode directly
1710   enc_class Opcode(immI d8)
1711   %{
1712     emit_opcode(cbuf, $d8$$constant);
1713   %}
1714 
1715   // Emit size prefix
1716   enc_class SizePrefix
1717   %{
1718     emit_opcode(cbuf, 0x66);
1719   %}
1720 
1721   enc_class reg(rRegI reg)
1722   %{
1723     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1724   %}
1725 
1726   enc_class reg_reg(rRegI dst, rRegI src)
1727   %{
1728     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1729   %}
1730 
1731   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1732   %{
1733     emit_opcode(cbuf, $opcode$$constant);
1734     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1735   %}
1736 
1737   enc_class cdql_enc(no_rax_rdx_RegI div)
1738   %{
1739     // Full implementation of Java idiv and irem; checks for
1740     // special case as described in JVM spec., p.243 & p.271.
1741     //
1742     //         normal case                           special case
1743     //
1744     // input : rax: dividend                         min_int
1745     //         reg: divisor                          -1
1746     //
1747     // output: rax: quotient  (= rax idiv reg)       min_int
1748     //         rdx: remainder (= rax irem reg)       0
1749     //
1750     //  Code sequnce:
1751     //
1752     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1753     //    5:   75 07/08                jne    e <normal>
1754     //    7:   33 d2                   xor    %edx,%edx
1755     //  [div >= 8 -> offset + 1]
1756     //  [REX_B]
1757     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1758     //    c:   74 03/04                je     11 <done>
1759     // 000000000000000e <normal>:
1760     //    e:   99                      cltd
1761     //  [div >= 8 -> offset + 1]
1762     //  [REX_B]
1763     //    f:   f7 f9                   idiv   $div
1764     // 0000000000000011 <done>:
1765 
1766     // cmp    $0x80000000,%eax
1767     emit_opcode(cbuf, 0x3d);
1768     emit_d8(cbuf, 0x00);
1769     emit_d8(cbuf, 0x00);
1770     emit_d8(cbuf, 0x00);
1771     emit_d8(cbuf, 0x80);
1772 
1773     // jne    e <normal>
1774     emit_opcode(cbuf, 0x75);
1775     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1776 
1777     // xor    %edx,%edx
1778     emit_opcode(cbuf, 0x33);
1779     emit_d8(cbuf, 0xD2);
1780 
1781     // cmp    $0xffffffffffffffff,%ecx
1782     if ($div$$reg >= 8) {
1783       emit_opcode(cbuf, Assembler::REX_B);
1784     }
1785     emit_opcode(cbuf, 0x83);
1786     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1787     emit_d8(cbuf, 0xFF);
1788 
1789     // je     11 <done>
1790     emit_opcode(cbuf, 0x74);
1791     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1792 
1793     // <normal>
1794     // cltd
1795     emit_opcode(cbuf, 0x99);
1796 
1797     // idivl (note: must be emitted by the user of this rule)
1798     // <done>
1799   %}
1800 
1801   enc_class cdqq_enc(no_rax_rdx_RegL div)
1802   %{
1803     // Full implementation of Java ldiv and lrem; checks for
1804     // special case as described in JVM spec., p.243 & p.271.
1805     //
1806     //         normal case                           special case
1807     //
1808     // input : rax: dividend                         min_long
1809     //         reg: divisor                          -1
1810     //
1811     // output: rax: quotient  (= rax idiv reg)       min_long
1812     //         rdx: remainder (= rax irem reg)       0
1813     //
1814     //  Code sequnce:
1815     //
1816     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1817     //    7:   00 00 80
1818     //    a:   48 39 d0                cmp    %rdx,%rax
1819     //    d:   75 08                   jne    17 <normal>
1820     //    f:   33 d2                   xor    %edx,%edx
1821     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1822     //   15:   74 05                   je     1c <done>
1823     // 0000000000000017 <normal>:
1824     //   17:   48 99                   cqto
1825     //   19:   48 f7 f9                idiv   $div
1826     // 000000000000001c <done>:
1827 
1828     // mov    $0x8000000000000000,%rdx
1829     emit_opcode(cbuf, Assembler::REX_W);
1830     emit_opcode(cbuf, 0xBA);
1831     emit_d8(cbuf, 0x00);
1832     emit_d8(cbuf, 0x00);
1833     emit_d8(cbuf, 0x00);
1834     emit_d8(cbuf, 0x00);
1835     emit_d8(cbuf, 0x00);
1836     emit_d8(cbuf, 0x00);
1837     emit_d8(cbuf, 0x00);
1838     emit_d8(cbuf, 0x80);
1839 
1840     // cmp    %rdx,%rax
1841     emit_opcode(cbuf, Assembler::REX_W);
1842     emit_opcode(cbuf, 0x39);
1843     emit_d8(cbuf, 0xD0);
1844 
1845     // jne    17 <normal>
1846     emit_opcode(cbuf, 0x75);
1847     emit_d8(cbuf, 0x08);
1848 
1849     // xor    %edx,%edx
1850     emit_opcode(cbuf, 0x33);
1851     emit_d8(cbuf, 0xD2);
1852 
1853     // cmp    $0xffffffffffffffff,$div
1854     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1855     emit_opcode(cbuf, 0x83);
1856     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1857     emit_d8(cbuf, 0xFF);
1858 
1859     // je     1e <done>
1860     emit_opcode(cbuf, 0x74);
1861     emit_d8(cbuf, 0x05);
1862 
1863     // <normal>
1864     // cqto
1865     emit_opcode(cbuf, Assembler::REX_W);
1866     emit_opcode(cbuf, 0x99);
1867 
1868     // idivq (note: must be emitted by the user of this rule)
1869     // <done>
1870   %}
1871 
1872   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1873   enc_class OpcSE(immI imm)
1874   %{
1875     // Emit primary opcode and set sign-extend bit
1876     // Check for 8-bit immediate, and set sign extend bit in opcode
1877     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1878       emit_opcode(cbuf, $primary | 0x02);
1879     } else {
1880       // 32-bit immediate
1881       emit_opcode(cbuf, $primary);
1882     }
1883   %}
1884 
1885   enc_class OpcSErm(rRegI dst, immI imm)
1886   %{
1887     // OpcSEr/m
1888     int dstenc = $dst$$reg;
1889     if (dstenc >= 8) {
1890       emit_opcode(cbuf, Assembler::REX_B);
1891       dstenc -= 8;
1892     }
1893     // Emit primary opcode and set sign-extend bit
1894     // Check for 8-bit immediate, and set sign extend bit in opcode
1895     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1896       emit_opcode(cbuf, $primary | 0x02);
1897     } else {
1898       // 32-bit immediate
1899       emit_opcode(cbuf, $primary);
1900     }
1901     // Emit r/m byte with secondary opcode, after primary opcode.
1902     emit_rm(cbuf, 0x3, $secondary, dstenc);
1903   %}
1904 
1905   enc_class OpcSErm_wide(rRegL dst, immI imm)
1906   %{
1907     // OpcSEr/m
1908     int dstenc = $dst$$reg;
1909     if (dstenc < 8) {
1910       emit_opcode(cbuf, Assembler::REX_W);
1911     } else {
1912       emit_opcode(cbuf, Assembler::REX_WB);
1913       dstenc -= 8;
1914     }
1915     // Emit primary opcode and set sign-extend bit
1916     // Check for 8-bit immediate, and set sign extend bit in opcode
1917     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1918       emit_opcode(cbuf, $primary | 0x02);
1919     } else {
1920       // 32-bit immediate
1921       emit_opcode(cbuf, $primary);
1922     }
1923     // Emit r/m byte with secondary opcode, after primary opcode.
1924     emit_rm(cbuf, 0x3, $secondary, dstenc);
1925   %}
1926 
1927   enc_class Con8or32(immI imm)
1928   %{
1929     // Check for 8-bit immediate, and set sign extend bit in opcode
1930     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1931       $$$emit8$imm$$constant;
1932     } else {
1933       // 32-bit immediate
1934       $$$emit32$imm$$constant;
1935     }
1936   %}
1937 
1938   enc_class opc2_reg(rRegI dst)
1939   %{
1940     // BSWAP
1941     emit_cc(cbuf, $secondary, $dst$$reg);
1942   %}
1943 
1944   enc_class opc3_reg(rRegI dst)
1945   %{
1946     // BSWAP
1947     emit_cc(cbuf, $tertiary, $dst$$reg);
1948   %}
1949 
1950   enc_class reg_opc(rRegI div)
1951   %{
1952     // INC, DEC, IDIV, IMOD, JMP indirect, ...
1953     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
1954   %}
1955 
1956   enc_class enc_cmov(cmpOp cop)
1957   %{
1958     // CMOV
1959     $$$emit8$primary;
1960     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1961   %}
1962 
1963   enc_class enc_PartialSubtypeCheck()
1964   %{
1965     Register Rrdi = as_Register(RDI_enc); // result register
1966     Register Rrax = as_Register(RAX_enc); // super class
1967     Register Rrcx = as_Register(RCX_enc); // killed
1968     Register Rrsi = as_Register(RSI_enc); // sub class
1969     Label miss;
1970     const bool set_cond_codes = true;
1971 
1972     MacroAssembler _masm(&cbuf);
1973     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1974                                      NULL, &miss,
1975                                      /*set_cond_codes:*/ true);
1976     if ($primary) {
1977       __ xorptr(Rrdi, Rrdi);
1978     }
1979     __ bind(miss);
1980   %}
1981 
1982   enc_class clear_avx %{
1983     debug_only(int off0 = cbuf.insts_size());
1984     if (ra_->C->max_vector_size() > 16) {
1985       // Clear upper bits of YMM registers when current compiled code uses
1986       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1987       MacroAssembler _masm(&cbuf);
1988       __ vzeroupper();
1989     }
1990     debug_only(int off1 = cbuf.insts_size());
1991     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1992   %}
1993 
1994   enc_class Java_To_Runtime(method meth) %{
1995     // No relocation needed
1996     MacroAssembler _masm(&cbuf);
1997     __ mov64(r10, (int64_t) $meth$$method);
1998     __ call(r10);
1999   %}
2000 
2001   enc_class Java_To_Interpreter(method meth)
2002   %{
2003     // CALL Java_To_Interpreter
2004     // This is the instruction starting address for relocation info.
2005     cbuf.set_insts_mark();
2006     $$$emit8$primary;
2007     // CALL directly to the runtime
2008     emit_d32_reloc(cbuf,
2009                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2010                    runtime_call_Relocation::spec(),
2011                    RELOC_DISP32);
2012   %}
2013 
2014   enc_class Java_Static_Call(method meth)
2015   %{
2016     // JAVA STATIC CALL
2017     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2018     // determine who we intended to call.
2019     cbuf.set_insts_mark();
2020     $$$emit8$primary;
2021 
2022     if (!_method) {
2023       emit_d32_reloc(cbuf,
2024                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2025                      runtime_call_Relocation::spec(),
2026                      RELOC_DISP32);
2027     } else if (_optimized_virtual) {
2028       emit_d32_reloc(cbuf,
2029                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2030                      opt_virtual_call_Relocation::spec(),
2031                      RELOC_DISP32);
2032     } else {
2033       emit_d32_reloc(cbuf,
2034                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2035                      static_call_Relocation::spec(),
2036                      RELOC_DISP32);
2037     }
2038     if (_method) {
2039       // Emit stub for static call.
2040       CompiledStaticCall::emit_to_interp_stub(cbuf);
2041     }
2042   %}
2043 
2044   enc_class Java_Dynamic_Call(method meth) %{
2045     MacroAssembler _masm(&cbuf);
2046     __ ic_call((address)$meth$$method);
2047   %}
2048 
2049   enc_class Java_Compiled_Call(method meth)
2050   %{
2051     // JAVA COMPILED CALL
2052     int disp = in_bytes(Method:: from_compiled_offset());
2053 
2054     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2055     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2056 
2057     // callq *disp(%rax)
2058     cbuf.set_insts_mark();
2059     $$$emit8$primary;
2060     if (disp < 0x80) {
2061       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2062       emit_d8(cbuf, disp); // Displacement
2063     } else {
2064       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2065       emit_d32(cbuf, disp); // Displacement
2066     }
2067   %}
2068 
2069   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2070   %{
2071     // SAL, SAR, SHR
2072     int dstenc = $dst$$reg;
2073     if (dstenc >= 8) {
2074       emit_opcode(cbuf, Assembler::REX_B);
2075       dstenc -= 8;
2076     }
2077     $$$emit8$primary;
2078     emit_rm(cbuf, 0x3, $secondary, dstenc);
2079     $$$emit8$shift$$constant;
2080   %}
2081 
2082   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2083   %{
2084     // SAL, SAR, SHR
2085     int dstenc = $dst$$reg;
2086     if (dstenc < 8) {
2087       emit_opcode(cbuf, Assembler::REX_W);
2088     } else {
2089       emit_opcode(cbuf, Assembler::REX_WB);
2090       dstenc -= 8;
2091     }
2092     $$$emit8$primary;
2093     emit_rm(cbuf, 0x3, $secondary, dstenc);
2094     $$$emit8$shift$$constant;
2095   %}
2096 
2097   enc_class load_immI(rRegI dst, immI src)
2098   %{
2099     int dstenc = $dst$$reg;
2100     if (dstenc >= 8) {
2101       emit_opcode(cbuf, Assembler::REX_B);
2102       dstenc -= 8;
2103     }
2104     emit_opcode(cbuf, 0xB8 | dstenc);
2105     $$$emit32$src$$constant;
2106   %}
2107 
2108   enc_class load_immL(rRegL dst, immL src)
2109   %{
2110     int dstenc = $dst$$reg;
2111     if (dstenc < 8) {
2112       emit_opcode(cbuf, Assembler::REX_W);
2113     } else {
2114       emit_opcode(cbuf, Assembler::REX_WB);
2115       dstenc -= 8;
2116     }
2117     emit_opcode(cbuf, 0xB8 | dstenc);
2118     emit_d64(cbuf, $src$$constant);
2119   %}
2120 
2121   enc_class load_immUL32(rRegL dst, immUL32 src)
2122   %{
2123     // same as load_immI, but this time we care about zeroes in the high word
2124     int dstenc = $dst$$reg;
2125     if (dstenc >= 8) {
2126       emit_opcode(cbuf, Assembler::REX_B);
2127       dstenc -= 8;
2128     }
2129     emit_opcode(cbuf, 0xB8 | dstenc);
2130     $$$emit32$src$$constant;
2131   %}
2132 
2133   enc_class load_immL32(rRegL dst, immL32 src)
2134   %{
2135     int dstenc = $dst$$reg;
2136     if (dstenc < 8) {
2137       emit_opcode(cbuf, Assembler::REX_W);
2138     } else {
2139       emit_opcode(cbuf, Assembler::REX_WB);
2140       dstenc -= 8;
2141     }
2142     emit_opcode(cbuf, 0xC7);
2143     emit_rm(cbuf, 0x03, 0x00, dstenc);
2144     $$$emit32$src$$constant;
2145   %}
2146 
2147   enc_class load_immP31(rRegP dst, immP32 src)
2148   %{
2149     // same as load_immI, but this time we care about zeroes in the high word
2150     int dstenc = $dst$$reg;
2151     if (dstenc >= 8) {
2152       emit_opcode(cbuf, Assembler::REX_B);
2153       dstenc -= 8;
2154     }
2155     emit_opcode(cbuf, 0xB8 | dstenc);
2156     $$$emit32$src$$constant;
2157   %}
2158 
2159   enc_class load_immP(rRegP dst, immP src)
2160   %{
2161     int dstenc = $dst$$reg;
2162     if (dstenc < 8) {
2163       emit_opcode(cbuf, Assembler::REX_W);
2164     } else {
2165       emit_opcode(cbuf, Assembler::REX_WB);
2166       dstenc -= 8;
2167     }
2168     emit_opcode(cbuf, 0xB8 | dstenc);
2169     // This next line should be generated from ADLC
2170     if ($src->constant_reloc() != relocInfo::none) {
2171       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2172     } else {
2173       emit_d64(cbuf, $src$$constant);
2174     }
2175   %}
2176 
2177   enc_class Con32(immI src)
2178   %{
2179     // Output immediate
2180     $$$emit32$src$$constant;
2181   %}
2182 
2183   enc_class Con32F_as_bits(immF src)
2184   %{
2185     // Output Float immediate bits
2186     jfloat jf = $src$$constant;
2187     jint jf_as_bits = jint_cast(jf);
2188     emit_d32(cbuf, jf_as_bits);
2189   %}
2190 
2191   enc_class Con16(immI src)
2192   %{
2193     // Output immediate
2194     $$$emit16$src$$constant;
2195   %}
2196 
2197   // How is this different from Con32??? XXX
2198   enc_class Con_d32(immI src)
2199   %{
2200     emit_d32(cbuf,$src$$constant);
2201   %}
2202 
2203   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2204     // Output immediate memory reference
2205     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2206     emit_d32(cbuf, 0x00);
2207   %}
2208 
2209   enc_class lock_prefix()
2210   %{
2211     if (os::is_MP()) {
2212       emit_opcode(cbuf, 0xF0); // lock
2213     }
2214   %}
2215 
2216   enc_class REX_mem(memory mem)
2217   %{
2218     if ($mem$$base >= 8) {
2219       if ($mem$$index < 8) {
2220         emit_opcode(cbuf, Assembler::REX_B);
2221       } else {
2222         emit_opcode(cbuf, Assembler::REX_XB);
2223       }
2224     } else {
2225       if ($mem$$index >= 8) {
2226         emit_opcode(cbuf, Assembler::REX_X);
2227       }
2228     }
2229   %}
2230 
2231   enc_class REX_mem_wide(memory mem)
2232   %{
2233     if ($mem$$base >= 8) {
2234       if ($mem$$index < 8) {
2235         emit_opcode(cbuf, Assembler::REX_WB);
2236       } else {
2237         emit_opcode(cbuf, Assembler::REX_WXB);
2238       }
2239     } else {
2240       if ($mem$$index < 8) {
2241         emit_opcode(cbuf, Assembler::REX_W);
2242       } else {
2243         emit_opcode(cbuf, Assembler::REX_WX);
2244       }
2245     }
2246   %}
2247 
2248   // for byte regs
2249   enc_class REX_breg(rRegI reg)
2250   %{
2251     if ($reg$$reg >= 4) {
2252       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2253     }
2254   %}
2255 
2256   // for byte regs
2257   enc_class REX_reg_breg(rRegI dst, rRegI src)
2258   %{
2259     if ($dst$$reg < 8) {
2260       if ($src$$reg >= 4) {
2261         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2262       }
2263     } else {
2264       if ($src$$reg < 8) {
2265         emit_opcode(cbuf, Assembler::REX_R);
2266       } else {
2267         emit_opcode(cbuf, Assembler::REX_RB);
2268       }
2269     }
2270   %}
2271 
2272   // for byte regs
2273   enc_class REX_breg_mem(rRegI reg, memory mem)
2274   %{
2275     if ($reg$$reg < 8) {
2276       if ($mem$$base < 8) {
2277         if ($mem$$index >= 8) {
2278           emit_opcode(cbuf, Assembler::REX_X);
2279         } else if ($reg$$reg >= 4) {
2280           emit_opcode(cbuf, Assembler::REX);
2281         }
2282       } else {
2283         if ($mem$$index < 8) {
2284           emit_opcode(cbuf, Assembler::REX_B);
2285         } else {
2286           emit_opcode(cbuf, Assembler::REX_XB);
2287         }
2288       }
2289     } else {
2290       if ($mem$$base < 8) {
2291         if ($mem$$index < 8) {
2292           emit_opcode(cbuf, Assembler::REX_R);
2293         } else {
2294           emit_opcode(cbuf, Assembler::REX_RX);
2295         }
2296       } else {
2297         if ($mem$$index < 8) {
2298           emit_opcode(cbuf, Assembler::REX_RB);
2299         } else {
2300           emit_opcode(cbuf, Assembler::REX_RXB);
2301         }
2302       }
2303     }
2304   %}
2305 
2306   enc_class REX_reg(rRegI reg)
2307   %{
2308     if ($reg$$reg >= 8) {
2309       emit_opcode(cbuf, Assembler::REX_B);
2310     }
2311   %}
2312 
2313   enc_class REX_reg_wide(rRegI reg)
2314   %{
2315     if ($reg$$reg < 8) {
2316       emit_opcode(cbuf, Assembler::REX_W);
2317     } else {
2318       emit_opcode(cbuf, Assembler::REX_WB);
2319     }
2320   %}
2321 
2322   enc_class REX_reg_reg(rRegI dst, rRegI src)
2323   %{
2324     if ($dst$$reg < 8) {
2325       if ($src$$reg >= 8) {
2326         emit_opcode(cbuf, Assembler::REX_B);
2327       }
2328     } else {
2329       if ($src$$reg < 8) {
2330         emit_opcode(cbuf, Assembler::REX_R);
2331       } else {
2332         emit_opcode(cbuf, Assembler::REX_RB);
2333       }
2334     }
2335   %}
2336 
2337   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2338   %{
2339     if ($dst$$reg < 8) {
2340       if ($src$$reg < 8) {
2341         emit_opcode(cbuf, Assembler::REX_W);
2342       } else {
2343         emit_opcode(cbuf, Assembler::REX_WB);
2344       }
2345     } else {
2346       if ($src$$reg < 8) {
2347         emit_opcode(cbuf, Assembler::REX_WR);
2348       } else {
2349         emit_opcode(cbuf, Assembler::REX_WRB);
2350       }
2351     }
2352   %}
2353 
2354   enc_class REX_reg_mem(rRegI reg, memory mem)
2355   %{
2356     if ($reg$$reg < 8) {
2357       if ($mem$$base < 8) {
2358         if ($mem$$index >= 8) {
2359           emit_opcode(cbuf, Assembler::REX_X);
2360         }
2361       } else {
2362         if ($mem$$index < 8) {
2363           emit_opcode(cbuf, Assembler::REX_B);
2364         } else {
2365           emit_opcode(cbuf, Assembler::REX_XB);
2366         }
2367       }
2368     } else {
2369       if ($mem$$base < 8) {
2370         if ($mem$$index < 8) {
2371           emit_opcode(cbuf, Assembler::REX_R);
2372         } else {
2373           emit_opcode(cbuf, Assembler::REX_RX);
2374         }
2375       } else {
2376         if ($mem$$index < 8) {
2377           emit_opcode(cbuf, Assembler::REX_RB);
2378         } else {
2379           emit_opcode(cbuf, Assembler::REX_RXB);
2380         }
2381       }
2382     }
2383   %}
2384 
2385   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2386   %{
2387     if ($reg$$reg < 8) {
2388       if ($mem$$base < 8) {
2389         if ($mem$$index < 8) {
2390           emit_opcode(cbuf, Assembler::REX_W);
2391         } else {
2392           emit_opcode(cbuf, Assembler::REX_WX);
2393         }
2394       } else {
2395         if ($mem$$index < 8) {
2396           emit_opcode(cbuf, Assembler::REX_WB);
2397         } else {
2398           emit_opcode(cbuf, Assembler::REX_WXB);
2399         }
2400       }
2401     } else {
2402       if ($mem$$base < 8) {
2403         if ($mem$$index < 8) {
2404           emit_opcode(cbuf, Assembler::REX_WR);
2405         } else {
2406           emit_opcode(cbuf, Assembler::REX_WRX);
2407         }
2408       } else {
2409         if ($mem$$index < 8) {
2410           emit_opcode(cbuf, Assembler::REX_WRB);
2411         } else {
2412           emit_opcode(cbuf, Assembler::REX_WRXB);
2413         }
2414       }
2415     }
2416   %}
2417 
2418   enc_class reg_mem(rRegI ereg, memory mem)
2419   %{
2420     // High registers handle in encode_RegMem
2421     int reg = $ereg$$reg;
2422     int base = $mem$$base;
2423     int index = $mem$$index;
2424     int scale = $mem$$scale;
2425     int disp = $mem$$disp;
2426     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2427 
2428     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2429   %}
2430 
2431   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2432   %{
2433     int rm_byte_opcode = $rm_opcode$$constant;
2434 
2435     // High registers handle in encode_RegMem
2436     int base = $mem$$base;
2437     int index = $mem$$index;
2438     int scale = $mem$$scale;
2439     int displace = $mem$$disp;
2440 
2441     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2442                                             // working with static
2443                                             // globals
2444     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2445                   disp_reloc);
2446   %}
2447 
2448   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2449   %{
2450     int reg_encoding = $dst$$reg;
2451     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2452     int index        = 0x04;            // 0x04 indicates no index
2453     int scale        = 0x00;            // 0x00 indicates no scale
2454     int displace     = $src1$$constant; // 0x00 indicates no displacement
2455     relocInfo::relocType disp_reloc = relocInfo::none;
2456     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2457                   disp_reloc);
2458   %}
2459 
2460   enc_class neg_reg(rRegI dst)
2461   %{
2462     int dstenc = $dst$$reg;
2463     if (dstenc >= 8) {
2464       emit_opcode(cbuf, Assembler::REX_B);
2465       dstenc -= 8;
2466     }
2467     // NEG $dst
2468     emit_opcode(cbuf, 0xF7);
2469     emit_rm(cbuf, 0x3, 0x03, dstenc);
2470   %}
2471 
2472   enc_class neg_reg_wide(rRegI dst)
2473   %{
2474     int dstenc = $dst$$reg;
2475     if (dstenc < 8) {
2476       emit_opcode(cbuf, Assembler::REX_W);
2477     } else {
2478       emit_opcode(cbuf, Assembler::REX_WB);
2479       dstenc -= 8;
2480     }
2481     // NEG $dst
2482     emit_opcode(cbuf, 0xF7);
2483     emit_rm(cbuf, 0x3, 0x03, dstenc);
2484   %}
2485 
2486   enc_class setLT_reg(rRegI dst)
2487   %{
2488     int dstenc = $dst$$reg;
2489     if (dstenc >= 8) {
2490       emit_opcode(cbuf, Assembler::REX_B);
2491       dstenc -= 8;
2492     } else if (dstenc >= 4) {
2493       emit_opcode(cbuf, Assembler::REX);
2494     }
2495     // SETLT $dst
2496     emit_opcode(cbuf, 0x0F);
2497     emit_opcode(cbuf, 0x9C);
2498     emit_rm(cbuf, 0x3, 0x0, dstenc);
2499   %}
2500 
2501   enc_class setNZ_reg(rRegI dst)
2502   %{
2503     int dstenc = $dst$$reg;
2504     if (dstenc >= 8) {
2505       emit_opcode(cbuf, Assembler::REX_B);
2506       dstenc -= 8;
2507     } else if (dstenc >= 4) {
2508       emit_opcode(cbuf, Assembler::REX);
2509     }
2510     // SETNZ $dst
2511     emit_opcode(cbuf, 0x0F);
2512     emit_opcode(cbuf, 0x95);
2513     emit_rm(cbuf, 0x3, 0x0, dstenc);
2514   %}
2515 
2516 
2517   // Compare the lonogs and set -1, 0, or 1 into dst
2518   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2519   %{
2520     int src1enc = $src1$$reg;
2521     int src2enc = $src2$$reg;
2522     int dstenc = $dst$$reg;
2523 
2524     // cmpq $src1, $src2
2525     if (src1enc < 8) {
2526       if (src2enc < 8) {
2527         emit_opcode(cbuf, Assembler::REX_W);
2528       } else {
2529         emit_opcode(cbuf, Assembler::REX_WB);
2530       }
2531     } else {
2532       if (src2enc < 8) {
2533         emit_opcode(cbuf, Assembler::REX_WR);
2534       } else {
2535         emit_opcode(cbuf, Assembler::REX_WRB);
2536       }
2537     }
2538     emit_opcode(cbuf, 0x3B);
2539     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2540 
2541     // movl $dst, -1
2542     if (dstenc >= 8) {
2543       emit_opcode(cbuf, Assembler::REX_B);
2544     }
2545     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2546     emit_d32(cbuf, -1);
2547 
2548     // jl,s done
2549     emit_opcode(cbuf, 0x7C);
2550     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2551 
2552     // setne $dst
2553     if (dstenc >= 4) {
2554       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2555     }
2556     emit_opcode(cbuf, 0x0F);
2557     emit_opcode(cbuf, 0x95);
2558     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2559 
2560     // movzbl $dst, $dst
2561     if (dstenc >= 4) {
2562       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2563     }
2564     emit_opcode(cbuf, 0x0F);
2565     emit_opcode(cbuf, 0xB6);
2566     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2567   %}
2568 
2569   enc_class Push_ResultXD(regD dst) %{
2570     MacroAssembler _masm(&cbuf);
2571     __ fstp_d(Address(rsp, 0));
2572     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2573     __ addptr(rsp, 8);
2574   %}
2575 
2576   enc_class Push_SrcXD(regD src) %{
2577     MacroAssembler _masm(&cbuf);
2578     __ subptr(rsp, 8);
2579     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2580     __ fld_d(Address(rsp, 0));
2581   %}
2582 
2583 
2584   // obj: object to lock
2585   // box: box address (header location) -- killed
2586   // tmp: rax -- killed
2587   // scr: rbx -- killed
2588   //
2589   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2590   // from i486.ad.  See that file for comments.
2591   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2592   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2593 
2594 
2595   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2596   %{
2597     Register objReg = as_Register((int)$obj$$reg);
2598     Register boxReg = as_Register((int)$box$$reg);
2599     Register tmpReg = as_Register($tmp$$reg);
2600     Register scrReg = as_Register($scr$$reg);
2601     MacroAssembler masm(&cbuf);
2602 
2603     // Verify uniqueness of register assignments -- necessary but not sufficient
2604     assert (objReg != boxReg && objReg != tmpReg &&
2605             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2606 
2607     if (_counters != NULL) {
2608       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2609     }
2610     if (EmitSync & 1) {
2611         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2612         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2613         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2614     } else
2615     if (EmitSync & 2) {
2616         Label DONE_LABEL;
2617         if (UseBiasedLocking) {
2618            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2619           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2620         }
2621         // QQQ was movl...
2622         masm.movptr(tmpReg, 0x1);
2623         masm.orptr(tmpReg, Address(objReg, 0));
2624         masm.movptr(Address(boxReg, 0), tmpReg);
2625         if (os::is_MP()) {
2626           masm.lock();
2627         }
2628         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2629         masm.jcc(Assembler::equal, DONE_LABEL);
2630 
2631         // Recursive locking
2632         masm.subptr(tmpReg, rsp);
2633         masm.andptr(tmpReg, 7 - os::vm_page_size());
2634         masm.movptr(Address(boxReg, 0), tmpReg);
2635 
2636         masm.bind(DONE_LABEL);
2637         masm.nop(); // avoid branch to branch
2638     } else {
2639         Label DONE_LABEL, IsInflated, Egress;
2640 
2641         masm.movptr(tmpReg, Address(objReg, 0)) ;
2642         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2643         masm.jcc   (Assembler::notZero, IsInflated) ;
2644 
2645         // it's stack-locked, biased or neutral
2646         // TODO: optimize markword triage order to reduce the number of
2647         // conditional branches in the most common cases.
2648         // Beware -- there's a subtle invariant that fetch of the markword
2649         // at [FETCH], below, will never observe a biased encoding (*101b).
2650         // If this invariant is not held we'll suffer exclusion (safety) failure.
2651 
2652         if (UseBiasedLocking && !UseOptoBiasInlining) {
2653           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2654           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2655         }
2656 
2657         // was q will it destroy high?
2658         masm.orl   (tmpReg, 1) ;
2659         masm.movptr(Address(boxReg, 0), tmpReg) ;
2660         if (os::is_MP()) { masm.lock(); }
2661         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2662         if (_counters != NULL) {
2663            masm.cond_inc32(Assembler::equal,
2664                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2665         }
2666         masm.jcc   (Assembler::equal, DONE_LABEL);
2667 
2668         // Recursive locking
2669         masm.subptr(tmpReg, rsp);
2670         masm.andptr(tmpReg, 7 - os::vm_page_size());
2671         masm.movptr(Address(boxReg, 0), tmpReg);
2672         if (_counters != NULL) {
2673            masm.cond_inc32(Assembler::equal,
2674                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2675         }
2676         masm.jmp   (DONE_LABEL) ;
2677 
2678         masm.bind  (IsInflated) ;
2679         // It's inflated
2680 
2681         // TODO: someday avoid the ST-before-CAS penalty by
2682         // relocating (deferring) the following ST.
2683         // We should also think about trying a CAS without having
2684         // fetched _owner.  If the CAS is successful we may
2685         // avoid an RTO->RTS upgrade on the $line.
2686         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2687         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2688 
2689         masm.mov    (boxReg, tmpReg) ;
2690         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2691         masm.testptr(tmpReg, tmpReg) ;
2692         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2693 
2694         // It's inflated and appears unlocked
2695         if (os::is_MP()) { masm.lock(); }
2696         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2697         // Intentional fall-through into DONE_LABEL ...
2698 
2699         masm.bind  (DONE_LABEL) ;
2700         masm.nop   () ;                 // avoid jmp to jmp
2701     }
2702   %}
2703 
2704   // obj: object to unlock
2705   // box: box address (displaced header location), killed
2706   // RBX: killed tmp; cannot be obj nor box
2707   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2708   %{
2709 
2710     Register objReg = as_Register($obj$$reg);
2711     Register boxReg = as_Register($box$$reg);
2712     Register tmpReg = as_Register($tmp$$reg);
2713     MacroAssembler masm(&cbuf);
2714 
2715     if (EmitSync & 4) {
2716        masm.cmpptr(rsp, 0) ;
2717     } else
2718     if (EmitSync & 8) {
2719        Label DONE_LABEL;
2720        if (UseBiasedLocking) {
2721          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2722        }
2723 
2724        // Check whether the displaced header is 0
2725        //(=> recursive unlock)
2726        masm.movptr(tmpReg, Address(boxReg, 0));
2727        masm.testptr(tmpReg, tmpReg);
2728        masm.jcc(Assembler::zero, DONE_LABEL);
2729 
2730        // If not recursive lock, reset the header to displaced header
2731        if (os::is_MP()) {
2732          masm.lock();
2733        }
2734        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2735        masm.bind(DONE_LABEL);
2736        masm.nop(); // avoid branch to branch
2737     } else {
2738        Label DONE_LABEL, Stacked, CheckSucc ;
2739 
2740        if (UseBiasedLocking && !UseOptoBiasInlining) {
2741          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2742        }
2743 
2744        masm.movptr(tmpReg, Address(objReg, 0)) ;
2745        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
2746        masm.jcc   (Assembler::zero, DONE_LABEL) ;
2747        masm.testl (tmpReg, 0x02) ;
2748        masm.jcc   (Assembler::zero, Stacked) ;
2749 
2750        // It's inflated
2751        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2752        masm.xorptr(boxReg, r15_thread) ;
2753        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2754        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
2755        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2756        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2757        masm.jcc   (Assembler::notZero, CheckSucc) ;
2758        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2759        masm.jmp   (DONE_LABEL) ;
2760 
2761        if ((EmitSync & 65536) == 0) {
2762          Label LSuccess, LGoSlowPath ;
2763          masm.bind  (CheckSucc) ;
2764          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2765          masm.jcc   (Assembler::zero, LGoSlowPath) ;
2766 
2767          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
2768          // the explicit ST;MEMBAR combination, but masm doesn't currently support
2769          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
2770          // are all faster when the write buffer is populated.
2771          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2772          if (os::is_MP()) {
2773             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
2774          }
2775          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2776          masm.jcc   (Assembler::notZero, LSuccess) ;
2777 
2778          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
2779          if (os::is_MP()) { masm.lock(); }
2780          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
2781          masm.jcc   (Assembler::notEqual, LSuccess) ;
2782          // Intentional fall-through into slow-path
2783 
2784          masm.bind  (LGoSlowPath) ;
2785          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
2786          masm.jmp   (DONE_LABEL) ;
2787 
2788          masm.bind  (LSuccess) ;
2789          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
2790          masm.jmp   (DONE_LABEL) ;
2791        }
2792 
2793        masm.bind  (Stacked) ;
2794        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
2795        if (os::is_MP()) { masm.lock(); }
2796        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2797 
2798        if (EmitSync & 65536) {
2799           masm.bind (CheckSucc) ;
2800        }
2801        masm.bind(DONE_LABEL);
2802        if (EmitSync & 32768) {
2803           masm.nop();                      // avoid branch to branch
2804        }
2805     }
2806   %}
2807 
2808 
2809   enc_class enc_rethrow()
2810   %{
2811     cbuf.set_insts_mark();
2812     emit_opcode(cbuf, 0xE9); // jmp entry
2813     emit_d32_reloc(cbuf,
2814                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2815                    runtime_call_Relocation::spec(),
2816                    RELOC_DISP32);
2817   %}
2818 
2819 %}
2820 
2821 
2822 
2823 //----------FRAME--------------------------------------------------------------
2824 // Definition of frame structure and management information.
2825 //
2826 //  S T A C K   L A Y O U T    Allocators stack-slot number
2827 //                             |   (to get allocators register number
2828 //  G  Owned by    |        |  v    add OptoReg::stack0())
2829 //  r   CALLER     |        |
2830 //  o     |        +--------+      pad to even-align allocators stack-slot
2831 //  w     V        |  pad0  |        numbers; owned by CALLER
2832 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2833 //  h     ^        |   in   |  5
2834 //        |        |  args  |  4   Holes in incoming args owned by SELF
2835 //  |     |        |        |  3
2836 //  |     |        +--------+
2837 //  V     |        | old out|      Empty on Intel, window on Sparc
2838 //        |    old |preserve|      Must be even aligned.
2839 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2840 //        |        |   in   |  3   area for Intel ret address
2841 //     Owned by    |preserve|      Empty on Sparc.
2842 //       SELF      +--------+
2843 //        |        |  pad2  |  2   pad to align old SP
2844 //        |        +--------+  1
2845 //        |        | locks  |  0
2846 //        |        +--------+----> OptoReg::stack0(), even aligned
2847 //        |        |  pad1  | 11   pad to align new SP
2848 //        |        +--------+
2849 //        |        |        | 10
2850 //        |        | spills |  9   spills
2851 //        V        |        |  8   (pad0 slot for callee)
2852 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2853 //        ^        |  out   |  7
2854 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2855 //     Owned by    +--------+
2856 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2857 //        |    new |preserve|      Must be even-aligned.
2858 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2859 //        |        |        |
2860 //
2861 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2862 //         known from SELF's arguments and the Java calling convention.
2863 //         Region 6-7 is determined per call site.
2864 // Note 2: If the calling convention leaves holes in the incoming argument
2865 //         area, those holes are owned by SELF.  Holes in the outgoing area
2866 //         are owned by the CALLEE.  Holes should not be nessecary in the
2867 //         incoming area, as the Java calling convention is completely under
2868 //         the control of the AD file.  Doubles can be sorted and packed to
2869 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2870 //         varargs C calling conventions.
2871 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2872 //         even aligned with pad0 as needed.
2873 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2874 //         region 6-11 is even aligned; it may be padded out more so that
2875 //         the region from SP to FP meets the minimum stack alignment.
2876 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2877 //         alignment.  Region 11, pad1, may be dynamically extended so that
2878 //         SP meets the minimum alignment.
2879 
2880 frame
2881 %{
2882   // What direction does stack grow in (assumed to be same for C & Java)
2883   stack_direction(TOWARDS_LOW);
2884 
2885   // These three registers define part of the calling convention
2886   // between compiled code and the interpreter.
2887   inline_cache_reg(RAX);                // Inline Cache Register
2888   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2889                                         // calling interpreter
2890 
2891   // Optional: name the operand used by cisc-spilling to access
2892   // [stack_pointer + offset]
2893   cisc_spilling_operand_name(indOffset32);
2894 
2895   // Number of stack slots consumed by locking an object
2896   sync_stack_slots(2);
2897 
2898   // Compiled code's Frame Pointer
2899   frame_pointer(RSP);
2900 
2901   // Interpreter stores its frame pointer in a register which is
2902   // stored to the stack by I2CAdaptors.
2903   // I2CAdaptors convert from interpreted java to compiled java.
2904   interpreter_frame_pointer(RBP);
2905 
2906   // Stack alignment requirement
2907   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2908 
2909   // Number of stack slots between incoming argument block and the start of
2910   // a new frame.  The PROLOG must add this many slots to the stack.  The
2911   // EPILOG must remove this many slots.  amd64 needs two slots for
2912   // return address.
2913   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2914 
2915   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2916   // for calls to C.  Supports the var-args backing area for register parms.
2917   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2918 
2919   // The after-PROLOG location of the return address.  Location of
2920   // return address specifies a type (REG or STACK) and a number
2921   // representing the register number (i.e. - use a register name) or
2922   // stack slot.
2923   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2924   // Otherwise, it is above the locks and verification slot and alignment word
2925   return_addr(STACK - 2 +
2926               round_to((Compile::current()->in_preserve_stack_slots() +
2927                         Compile::current()->fixed_slots()),
2928                        stack_alignment_in_slots()));
2929 
2930   // Body of function which returns an integer array locating
2931   // arguments either in registers or in stack slots.  Passed an array
2932   // of ideal registers called "sig" and a "length" count.  Stack-slot
2933   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2934   // arguments for a CALLEE.  Incoming stack arguments are
2935   // automatically biased by the preserve_stack_slots field above.
2936 
2937   calling_convention
2938   %{
2939     // No difference between ingoing/outgoing just pass false
2940     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2941   %}
2942 
2943   c_calling_convention
2944   %{
2945     // This is obviously always outgoing
2946     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
2947   %}
2948 
2949   // Location of compiled Java return values.  Same as C for now.
2950   return_value
2951   %{
2952     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2953            "only return normal values");
2954 
2955     static const int lo[Op_RegL + 1] = {
2956       0,
2957       0,
2958       RAX_num,  // Op_RegN
2959       RAX_num,  // Op_RegI
2960       RAX_num,  // Op_RegP
2961       XMM0_num, // Op_RegF
2962       XMM0_num, // Op_RegD
2963       RAX_num   // Op_RegL
2964     };
2965     static const int hi[Op_RegL + 1] = {
2966       0,
2967       0,
2968       OptoReg::Bad, // Op_RegN
2969       OptoReg::Bad, // Op_RegI
2970       RAX_H_num,    // Op_RegP
2971       OptoReg::Bad, // Op_RegF
2972       XMM0b_num,    // Op_RegD
2973       RAX_H_num     // Op_RegL
2974     };
2975     // Excluded flags and vector registers.
2976     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
2977     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2978   %}
2979 %}
2980 
2981 //----------ATTRIBUTES---------------------------------------------------------
2982 //----------Operand Attributes-------------------------------------------------
2983 op_attrib op_cost(0);        // Required cost attribute
2984 
2985 //----------Instruction Attributes---------------------------------------------
2986 ins_attrib ins_cost(100);       // Required cost attribute
2987 ins_attrib ins_size(8);         // Required size attribute (in bits)
2988 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2989                                 // a non-matching short branch variant
2990                                 // of some long branch?
2991 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2992                                 // be a power of 2) specifies the
2993                                 // alignment that some part of the
2994                                 // instruction (not necessarily the
2995                                 // start) requires.  If > 1, a
2996                                 // compute_padding() function must be
2997                                 // provided for the instruction
2998 
2999 //----------OPERANDS-----------------------------------------------------------
3000 // Operand definitions must precede instruction definitions for correct parsing
3001 // in the ADLC because operands constitute user defined types which are used in
3002 // instruction definitions.
3003 
3004 //----------Simple Operands----------------------------------------------------
3005 // Immediate Operands
3006 // Integer Immediate
3007 operand immI()
3008 %{
3009   match(ConI);
3010 
3011   op_cost(10);
3012   format %{ %}
3013   interface(CONST_INTER);
3014 %}
3015 
3016 // Constant for test vs zero
3017 operand immI0()
3018 %{
3019   predicate(n->get_int() == 0);
3020   match(ConI);
3021 
3022   op_cost(0);
3023   format %{ %}
3024   interface(CONST_INTER);
3025 %}
3026 
3027 // Constant for increment
3028 operand immI1()
3029 %{
3030   predicate(n->get_int() == 1);
3031   match(ConI);
3032 
3033   op_cost(0);
3034   format %{ %}
3035   interface(CONST_INTER);
3036 %}
3037 
3038 // Constant for decrement
3039 operand immI_M1()
3040 %{
3041   predicate(n->get_int() == -1);
3042   match(ConI);
3043 
3044   op_cost(0);
3045   format %{ %}
3046   interface(CONST_INTER);
3047 %}
3048 
3049 // Valid scale values for addressing modes
3050 operand immI2()
3051 %{
3052   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3053   match(ConI);
3054 
3055   format %{ %}
3056   interface(CONST_INTER);
3057 %}
3058 
3059 operand immI8()
3060 %{
3061   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3062   match(ConI);
3063 
3064   op_cost(5);
3065   format %{ %}
3066   interface(CONST_INTER);
3067 %}
3068 
3069 operand immI16()
3070 %{
3071   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3072   match(ConI);
3073 
3074   op_cost(10);
3075   format %{ %}
3076   interface(CONST_INTER);
3077 %}
3078 
3079 // Constant for long shifts
3080 operand immI_32()
3081 %{
3082   predicate( n->get_int() == 32 );
3083   match(ConI);
3084 
3085   op_cost(0);
3086   format %{ %}
3087   interface(CONST_INTER);
3088 %}
3089 
3090 // Constant for long shifts
3091 operand immI_64()
3092 %{
3093   predicate( n->get_int() == 64 );
3094   match(ConI);
3095 
3096   op_cost(0);
3097   format %{ %}
3098   interface(CONST_INTER);
3099 %}
3100 
3101 // Pointer Immediate
3102 operand immP()
3103 %{
3104   match(ConP);
3105 
3106   op_cost(10);
3107   format %{ %}
3108   interface(CONST_INTER);
3109 %}
3110 
3111 // NULL Pointer Immediate
3112 operand immP0()
3113 %{
3114   predicate(n->get_ptr() == 0);
3115   match(ConP);
3116 
3117   op_cost(5);
3118   format %{ %}
3119   interface(CONST_INTER);
3120 %}
3121 
3122 // Pointer Immediate
3123 operand immN() %{
3124   match(ConN);
3125 
3126   op_cost(10);
3127   format %{ %}
3128   interface(CONST_INTER);
3129 %}
3130 
3131 operand immNKlass() %{
3132   match(ConNKlass);
3133 
3134   op_cost(10);
3135   format %{ %}
3136   interface(CONST_INTER);
3137 %}
3138 
3139 // NULL Pointer Immediate
3140 operand immN0() %{
3141   predicate(n->get_narrowcon() == 0);
3142   match(ConN);
3143 
3144   op_cost(5);
3145   format %{ %}
3146   interface(CONST_INTER);
3147 %}
3148 
3149 operand immP31()
3150 %{
3151   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3152             && (n->get_ptr() >> 31) == 0);
3153   match(ConP);
3154 
3155   op_cost(5);
3156   format %{ %}
3157   interface(CONST_INTER);
3158 %}
3159 
3160 
3161 // Long Immediate
3162 operand immL()
3163 %{
3164   match(ConL);
3165 
3166   op_cost(20);
3167   format %{ %}
3168   interface(CONST_INTER);
3169 %}
3170 
3171 // Long Immediate 8-bit
3172 operand immL8()
3173 %{
3174   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3175   match(ConL);
3176 
3177   op_cost(5);
3178   format %{ %}
3179   interface(CONST_INTER);
3180 %}
3181 
3182 // Long Immediate 32-bit unsigned
3183 operand immUL32()
3184 %{
3185   predicate(n->get_long() == (unsigned int) (n->get_long()));
3186   match(ConL);
3187 
3188   op_cost(10);
3189   format %{ %}
3190   interface(CONST_INTER);
3191 %}
3192 
3193 // Long Immediate 32-bit signed
3194 operand immL32()
3195 %{
3196   predicate(n->get_long() == (int) (n->get_long()));
3197   match(ConL);
3198 
3199   op_cost(15);
3200   format %{ %}
3201   interface(CONST_INTER);
3202 %}
3203 
3204 // Long Immediate zero
3205 operand immL0()
3206 %{
3207   predicate(n->get_long() == 0L);
3208   match(ConL);
3209 
3210   op_cost(10);
3211   format %{ %}
3212   interface(CONST_INTER);
3213 %}
3214 
3215 // Constant for increment
3216 operand immL1()
3217 %{
3218   predicate(n->get_long() == 1);
3219   match(ConL);
3220 
3221   format %{ %}
3222   interface(CONST_INTER);
3223 %}
3224 
3225 // Constant for decrement
3226 operand immL_M1()
3227 %{
3228   predicate(n->get_long() == -1);
3229   match(ConL);
3230 
3231   format %{ %}
3232   interface(CONST_INTER);
3233 %}
3234 
3235 // Long Immediate: the value 10
3236 operand immL10()
3237 %{
3238   predicate(n->get_long() == 10);
3239   match(ConL);
3240 
3241   format %{ %}
3242   interface(CONST_INTER);
3243 %}
3244 
3245 // Long immediate from 0 to 127.
3246 // Used for a shorter form of long mul by 10.
3247 operand immL_127()
3248 %{
3249   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3250   match(ConL);
3251 
3252   op_cost(10);
3253   format %{ %}
3254   interface(CONST_INTER);
3255 %}
3256 
3257 // Long Immediate: low 32-bit mask
3258 operand immL_32bits()
3259 %{
3260   predicate(n->get_long() == 0xFFFFFFFFL);
3261   match(ConL);
3262   op_cost(20);
3263 
3264   format %{ %}
3265   interface(CONST_INTER);
3266 %}
3267 
3268 // Float Immediate zero
3269 operand immF0()
3270 %{
3271   predicate(jint_cast(n->getf()) == 0);
3272   match(ConF);
3273 
3274   op_cost(5);
3275   format %{ %}
3276   interface(CONST_INTER);
3277 %}
3278 
3279 // Float Immediate
3280 operand immF()
3281 %{
3282   match(ConF);
3283 
3284   op_cost(15);
3285   format %{ %}
3286   interface(CONST_INTER);
3287 %}
3288 
3289 // Double Immediate zero
3290 operand immD0()
3291 %{
3292   predicate(jlong_cast(n->getd()) == 0);
3293   match(ConD);
3294 
3295   op_cost(5);
3296   format %{ %}
3297   interface(CONST_INTER);
3298 %}
3299 
3300 // Double Immediate
3301 operand immD()
3302 %{
3303   match(ConD);
3304 
3305   op_cost(15);
3306   format %{ %}
3307   interface(CONST_INTER);
3308 %}
3309 
3310 // Immediates for special shifts (sign extend)
3311 
3312 // Constants for increment
3313 operand immI_16()
3314 %{
3315   predicate(n->get_int() == 16);
3316   match(ConI);
3317 
3318   format %{ %}
3319   interface(CONST_INTER);
3320 %}
3321 
3322 operand immI_24()
3323 %{
3324   predicate(n->get_int() == 24);
3325   match(ConI);
3326 
3327   format %{ %}
3328   interface(CONST_INTER);
3329 %}
3330 
3331 // Constant for byte-wide masking
3332 operand immI_255()
3333 %{
3334   predicate(n->get_int() == 255);
3335   match(ConI);
3336 
3337   format %{ %}
3338   interface(CONST_INTER);
3339 %}
3340 
3341 // Constant for short-wide masking
3342 operand immI_65535()
3343 %{
3344   predicate(n->get_int() == 65535);
3345   match(ConI);
3346 
3347   format %{ %}
3348   interface(CONST_INTER);
3349 %}
3350 
3351 // Constant for byte-wide masking
3352 operand immL_255()
3353 %{
3354   predicate(n->get_long() == 255);
3355   match(ConL);
3356 
3357   format %{ %}
3358   interface(CONST_INTER);
3359 %}
3360 
3361 // Constant for short-wide masking
3362 operand immL_65535()
3363 %{
3364   predicate(n->get_long() == 65535);
3365   match(ConL);
3366 
3367   format %{ %}
3368   interface(CONST_INTER);
3369 %}
3370 
3371 // Register Operands
3372 // Integer Register
3373 operand rRegI()
3374 %{
3375   constraint(ALLOC_IN_RC(int_reg));
3376   match(RegI);
3377 
3378   match(rax_RegI);
3379   match(rbx_RegI);
3380   match(rcx_RegI);
3381   match(rdx_RegI);
3382   match(rdi_RegI);
3383 
3384   format %{ %}
3385   interface(REG_INTER);
3386 %}
3387 
3388 // Special Registers
3389 operand rax_RegI()
3390 %{
3391   constraint(ALLOC_IN_RC(int_rax_reg));
3392   match(RegI);
3393   match(rRegI);
3394 
3395   format %{ "RAX" %}
3396   interface(REG_INTER);
3397 %}
3398 
3399 // Special Registers
3400 operand rbx_RegI()
3401 %{
3402   constraint(ALLOC_IN_RC(int_rbx_reg));
3403   match(RegI);
3404   match(rRegI);
3405 
3406   format %{ "RBX" %}
3407   interface(REG_INTER);
3408 %}
3409 
3410 operand rcx_RegI()
3411 %{
3412   constraint(ALLOC_IN_RC(int_rcx_reg));
3413   match(RegI);
3414   match(rRegI);
3415 
3416   format %{ "RCX" %}
3417   interface(REG_INTER);
3418 %}
3419 
3420 operand rdx_RegI()
3421 %{
3422   constraint(ALLOC_IN_RC(int_rdx_reg));
3423   match(RegI);
3424   match(rRegI);
3425 
3426   format %{ "RDX" %}
3427   interface(REG_INTER);
3428 %}
3429 
3430 operand rdi_RegI()
3431 %{
3432   constraint(ALLOC_IN_RC(int_rdi_reg));
3433   match(RegI);
3434   match(rRegI);
3435 
3436   format %{ "RDI" %}
3437   interface(REG_INTER);
3438 %}
3439 
3440 operand no_rcx_RegI()
3441 %{
3442   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3443   match(RegI);
3444   match(rax_RegI);
3445   match(rbx_RegI);
3446   match(rdx_RegI);
3447   match(rdi_RegI);
3448 
3449   format %{ %}
3450   interface(REG_INTER);
3451 %}
3452 
3453 operand no_rax_rdx_RegI()
3454 %{
3455   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3456   match(RegI);
3457   match(rbx_RegI);
3458   match(rcx_RegI);
3459   match(rdi_RegI);
3460 
3461   format %{ %}
3462   interface(REG_INTER);
3463 %}
3464 
3465 // Pointer Register
3466 operand any_RegP()
3467 %{
3468   constraint(ALLOC_IN_RC(any_reg));
3469   match(RegP);
3470   match(rax_RegP);
3471   match(rbx_RegP);
3472   match(rdi_RegP);
3473   match(rsi_RegP);
3474   match(rbp_RegP);
3475   match(r15_RegP);
3476   match(rRegP);
3477 
3478   format %{ %}
3479   interface(REG_INTER);
3480 %}
3481 
3482 operand rRegP()
3483 %{
3484   constraint(ALLOC_IN_RC(ptr_reg));
3485   match(RegP);
3486   match(rax_RegP);
3487   match(rbx_RegP);
3488   match(rdi_RegP);
3489   match(rsi_RegP);
3490   match(rbp_RegP);
3491   match(r15_RegP);  // See Q&A below about r15_RegP.
3492 
3493   format %{ %}
3494   interface(REG_INTER);
3495 %}
3496 
3497 operand rRegN() %{
3498   constraint(ALLOC_IN_RC(int_reg));
3499   match(RegN);
3500 
3501   format %{ %}
3502   interface(REG_INTER);
3503 %}
3504 
3505 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3506 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3507 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3508 // The output of an instruction is controlled by the allocator, which respects
3509 // register class masks, not match rules.  Unless an instruction mentions
3510 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3511 // by the allocator as an input.
3512 
3513 operand no_rax_RegP()
3514 %{
3515   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3516   match(RegP);
3517   match(rbx_RegP);
3518   match(rsi_RegP);
3519   match(rdi_RegP);
3520 
3521   format %{ %}
3522   interface(REG_INTER);
3523 %}
3524 
3525 operand no_rbp_RegP()
3526 %{
3527   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3528   match(RegP);
3529   match(rbx_RegP);
3530   match(rsi_RegP);
3531   match(rdi_RegP);
3532 
3533   format %{ %}
3534   interface(REG_INTER);
3535 %}
3536 
3537 operand no_rax_rbx_RegP()
3538 %{
3539   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3540   match(RegP);
3541   match(rsi_RegP);
3542   match(rdi_RegP);
3543 
3544   format %{ %}
3545   interface(REG_INTER);
3546 %}
3547 
3548 // Special Registers
3549 // Return a pointer value
3550 operand rax_RegP()
3551 %{
3552   constraint(ALLOC_IN_RC(ptr_rax_reg));
3553   match(RegP);
3554   match(rRegP);
3555 
3556   format %{ %}
3557   interface(REG_INTER);
3558 %}
3559 
3560 // Special Registers
3561 // Return a compressed pointer value
3562 operand rax_RegN()
3563 %{
3564   constraint(ALLOC_IN_RC(int_rax_reg));
3565   match(RegN);
3566   match(rRegN);
3567 
3568   format %{ %}
3569   interface(REG_INTER);
3570 %}
3571 
3572 // Used in AtomicAdd
3573 operand rbx_RegP()
3574 %{
3575   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3576   match(RegP);
3577   match(rRegP);
3578 
3579   format %{ %}
3580   interface(REG_INTER);
3581 %}
3582 
3583 operand rsi_RegP()
3584 %{
3585   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3586   match(RegP);
3587   match(rRegP);
3588 
3589   format %{ %}
3590   interface(REG_INTER);
3591 %}
3592 
3593 // Used in rep stosq
3594 operand rdi_RegP()
3595 %{
3596   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3597   match(RegP);
3598   match(rRegP);
3599 
3600   format %{ %}
3601   interface(REG_INTER);
3602 %}
3603 
3604 operand rbp_RegP()
3605 %{
3606   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3607   match(RegP);
3608   match(rRegP);
3609 
3610   format %{ %}
3611   interface(REG_INTER);
3612 %}
3613 
3614 operand r15_RegP()
3615 %{
3616   constraint(ALLOC_IN_RC(ptr_r15_reg));
3617   match(RegP);
3618   match(rRegP);
3619 
3620   format %{ %}
3621   interface(REG_INTER);
3622 %}
3623 
3624 operand rRegL()
3625 %{
3626   constraint(ALLOC_IN_RC(long_reg));
3627   match(RegL);
3628   match(rax_RegL);
3629   match(rdx_RegL);
3630 
3631   format %{ %}
3632   interface(REG_INTER);
3633 %}
3634 
3635 // Special Registers
3636 operand no_rax_rdx_RegL()
3637 %{
3638   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3639   match(RegL);
3640   match(rRegL);
3641 
3642   format %{ %}
3643   interface(REG_INTER);
3644 %}
3645 
3646 operand no_rax_RegL()
3647 %{
3648   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3649   match(RegL);
3650   match(rRegL);
3651   match(rdx_RegL);
3652 
3653   format %{ %}
3654   interface(REG_INTER);
3655 %}
3656 
3657 operand no_rcx_RegL()
3658 %{
3659   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3660   match(RegL);
3661   match(rRegL);
3662 
3663   format %{ %}
3664   interface(REG_INTER);
3665 %}
3666 
3667 operand rax_RegL()
3668 %{
3669   constraint(ALLOC_IN_RC(long_rax_reg));
3670   match(RegL);
3671   match(rRegL);
3672 
3673   format %{ "RAX" %}
3674   interface(REG_INTER);
3675 %}
3676 
3677 operand rcx_RegL()
3678 %{
3679   constraint(ALLOC_IN_RC(long_rcx_reg));
3680   match(RegL);
3681   match(rRegL);
3682 
3683   format %{ %}
3684   interface(REG_INTER);
3685 %}
3686 
3687 operand rdx_RegL()
3688 %{
3689   constraint(ALLOC_IN_RC(long_rdx_reg));
3690   match(RegL);
3691   match(rRegL);
3692 
3693   format %{ %}
3694   interface(REG_INTER);
3695 %}
3696 
3697 // Flags register, used as output of compare instructions
3698 operand rFlagsReg()
3699 %{
3700   constraint(ALLOC_IN_RC(int_flags));
3701   match(RegFlags);
3702 
3703   format %{ "RFLAGS" %}
3704   interface(REG_INTER);
3705 %}
3706 
3707 // Flags register, used as output of FLOATING POINT compare instructions
3708 operand rFlagsRegU()
3709 %{
3710   constraint(ALLOC_IN_RC(int_flags));
3711   match(RegFlags);
3712 
3713   format %{ "RFLAGS_U" %}
3714   interface(REG_INTER);
3715 %}
3716 
3717 operand rFlagsRegUCF() %{
3718   constraint(ALLOC_IN_RC(int_flags));
3719   match(RegFlags);
3720   predicate(false);
3721 
3722   format %{ "RFLAGS_U_CF" %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 // Float register operands
3727 operand regF()
3728 %{
3729   constraint(ALLOC_IN_RC(float_reg));
3730   match(RegF);
3731 
3732   format %{ %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 // Double register operands
3737 operand regD()
3738 %{
3739   constraint(ALLOC_IN_RC(double_reg));
3740   match(RegD);
3741 
3742   format %{ %}
3743   interface(REG_INTER);
3744 %}
3745 
3746 //----------Memory Operands----------------------------------------------------
3747 // Direct Memory Operand
3748 // operand direct(immP addr)
3749 // %{
3750 //   match(addr);
3751 
3752 //   format %{ "[$addr]" %}
3753 //   interface(MEMORY_INTER) %{
3754 //     base(0xFFFFFFFF);
3755 //     index(0x4);
3756 //     scale(0x0);
3757 //     disp($addr);
3758 //   %}
3759 // %}
3760 
3761 // Indirect Memory Operand
3762 operand indirect(any_RegP reg)
3763 %{
3764   constraint(ALLOC_IN_RC(ptr_reg));
3765   match(reg);
3766 
3767   format %{ "[$reg]" %}
3768   interface(MEMORY_INTER) %{
3769     base($reg);
3770     index(0x4);
3771     scale(0x0);
3772     disp(0x0);
3773   %}
3774 %}
3775 
3776 // Indirect Memory Plus Short Offset Operand
3777 operand indOffset8(any_RegP reg, immL8 off)
3778 %{
3779   constraint(ALLOC_IN_RC(ptr_reg));
3780   match(AddP reg off);
3781 
3782   format %{ "[$reg + $off (8-bit)]" %}
3783   interface(MEMORY_INTER) %{
3784     base($reg);
3785     index(0x4);
3786     scale(0x0);
3787     disp($off);
3788   %}
3789 %}
3790 
3791 // Indirect Memory Plus Long Offset Operand
3792 operand indOffset32(any_RegP reg, immL32 off)
3793 %{
3794   constraint(ALLOC_IN_RC(ptr_reg));
3795   match(AddP reg off);
3796 
3797   format %{ "[$reg + $off (32-bit)]" %}
3798   interface(MEMORY_INTER) %{
3799     base($reg);
3800     index(0x4);
3801     scale(0x0);
3802     disp($off);
3803   %}
3804 %}
3805 
3806 // Indirect Memory Plus Index Register Plus Offset Operand
3807 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3808 %{
3809   constraint(ALLOC_IN_RC(ptr_reg));
3810   match(AddP (AddP reg lreg) off);
3811 
3812   op_cost(10);
3813   format %{"[$reg + $off + $lreg]" %}
3814   interface(MEMORY_INTER) %{
3815     base($reg);
3816     index($lreg);
3817     scale(0x0);
3818     disp($off);
3819   %}
3820 %}
3821 
3822 // Indirect Memory Plus Index Register Plus Offset Operand
3823 operand indIndex(any_RegP reg, rRegL lreg)
3824 %{
3825   constraint(ALLOC_IN_RC(ptr_reg));
3826   match(AddP reg lreg);
3827 
3828   op_cost(10);
3829   format %{"[$reg + $lreg]" %}
3830   interface(MEMORY_INTER) %{
3831     base($reg);
3832     index($lreg);
3833     scale(0x0);
3834     disp(0x0);
3835   %}
3836 %}
3837 
3838 // Indirect Memory Times Scale Plus Index Register
3839 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3840 %{
3841   constraint(ALLOC_IN_RC(ptr_reg));
3842   match(AddP reg (LShiftL lreg scale));
3843 
3844   op_cost(10);
3845   format %{"[$reg + $lreg << $scale]" %}
3846   interface(MEMORY_INTER) %{
3847     base($reg);
3848     index($lreg);
3849     scale($scale);
3850     disp(0x0);
3851   %}
3852 %}
3853 
3854 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3855 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3856 %{
3857   constraint(ALLOC_IN_RC(ptr_reg));
3858   match(AddP (AddP reg (LShiftL lreg scale)) off);
3859 
3860   op_cost(10);
3861   format %{"[$reg + $off + $lreg << $scale]" %}
3862   interface(MEMORY_INTER) %{
3863     base($reg);
3864     index($lreg);
3865     scale($scale);
3866     disp($off);
3867   %}
3868 %}
3869 
3870 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3871 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3872 %{
3873   constraint(ALLOC_IN_RC(ptr_reg));
3874   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3875   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3876 
3877   op_cost(10);
3878   format %{"[$reg + $off + $idx << $scale]" %}
3879   interface(MEMORY_INTER) %{
3880     base($reg);
3881     index($idx);
3882     scale($scale);
3883     disp($off);
3884   %}
3885 %}
3886 
3887 // Indirect Narrow Oop Plus Offset Operand
3888 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3889 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3890 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3891   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3892   constraint(ALLOC_IN_RC(ptr_reg));
3893   match(AddP (DecodeN reg) off);
3894 
3895   op_cost(10);
3896   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3897   interface(MEMORY_INTER) %{
3898     base(0xc); // R12
3899     index($reg);
3900     scale(0x3);
3901     disp($off);
3902   %}
3903 %}
3904 
3905 // Indirect Memory Operand
3906 operand indirectNarrow(rRegN reg)
3907 %{
3908   predicate(Universe::narrow_oop_shift() == 0);
3909   constraint(ALLOC_IN_RC(ptr_reg));
3910   match(DecodeN reg);
3911 
3912   format %{ "[$reg]" %}
3913   interface(MEMORY_INTER) %{
3914     base($reg);
3915     index(0x4);
3916     scale(0x0);
3917     disp(0x0);
3918   %}
3919 %}
3920 
3921 // Indirect Memory Plus Short Offset Operand
3922 operand indOffset8Narrow(rRegN reg, immL8 off)
3923 %{
3924   predicate(Universe::narrow_oop_shift() == 0);
3925   constraint(ALLOC_IN_RC(ptr_reg));
3926   match(AddP (DecodeN reg) off);
3927 
3928   format %{ "[$reg + $off (8-bit)]" %}
3929   interface(MEMORY_INTER) %{
3930     base($reg);
3931     index(0x4);
3932     scale(0x0);
3933     disp($off);
3934   %}
3935 %}
3936 
3937 // Indirect Memory Plus Long Offset Operand
3938 operand indOffset32Narrow(rRegN reg, immL32 off)
3939 %{
3940   predicate(Universe::narrow_oop_shift() == 0);
3941   constraint(ALLOC_IN_RC(ptr_reg));
3942   match(AddP (DecodeN reg) off);
3943 
3944   format %{ "[$reg + $off (32-bit)]" %}
3945   interface(MEMORY_INTER) %{
3946     base($reg);
3947     index(0x4);
3948     scale(0x0);
3949     disp($off);
3950   %}
3951 %}
3952 
3953 // Indirect Memory Plus Index Register Plus Offset Operand
3954 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3955 %{
3956   predicate(Universe::narrow_oop_shift() == 0);
3957   constraint(ALLOC_IN_RC(ptr_reg));
3958   match(AddP (AddP (DecodeN reg) lreg) off);
3959 
3960   op_cost(10);
3961   format %{"[$reg + $off + $lreg]" %}
3962   interface(MEMORY_INTER) %{
3963     base($reg);
3964     index($lreg);
3965     scale(0x0);
3966     disp($off);
3967   %}
3968 %}
3969 
3970 // Indirect Memory Plus Index Register Plus Offset Operand
3971 operand indIndexNarrow(rRegN reg, rRegL lreg)
3972 %{
3973   predicate(Universe::narrow_oop_shift() == 0);
3974   constraint(ALLOC_IN_RC(ptr_reg));
3975   match(AddP (DecodeN reg) lreg);
3976 
3977   op_cost(10);
3978   format %{"[$reg + $lreg]" %}
3979   interface(MEMORY_INTER) %{
3980     base($reg);
3981     index($lreg);
3982     scale(0x0);
3983     disp(0x0);
3984   %}
3985 %}
3986 
3987 // Indirect Memory Times Scale Plus Index Register
3988 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
3989 %{
3990   predicate(Universe::narrow_oop_shift() == 0);
3991   constraint(ALLOC_IN_RC(ptr_reg));
3992   match(AddP (DecodeN reg) (LShiftL lreg scale));
3993 
3994   op_cost(10);
3995   format %{"[$reg + $lreg << $scale]" %}
3996   interface(MEMORY_INTER) %{
3997     base($reg);
3998     index($lreg);
3999     scale($scale);
4000     disp(0x0);
4001   %}
4002 %}
4003 
4004 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4005 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4006 %{
4007   predicate(Universe::narrow_oop_shift() == 0);
4008   constraint(ALLOC_IN_RC(ptr_reg));
4009   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4010 
4011   op_cost(10);
4012   format %{"[$reg + $off + $lreg << $scale]" %}
4013   interface(MEMORY_INTER) %{
4014     base($reg);
4015     index($lreg);
4016     scale($scale);
4017     disp($off);
4018   %}
4019 %}
4020 
4021 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4022 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4023 %{
4024   constraint(ALLOC_IN_RC(ptr_reg));
4025   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4026   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4027 
4028   op_cost(10);
4029   format %{"[$reg + $off + $idx << $scale]" %}
4030   interface(MEMORY_INTER) %{
4031     base($reg);
4032     index($idx);
4033     scale($scale);
4034     disp($off);
4035   %}
4036 %}
4037 
4038 operand indirectNarrowKlass(rRegN reg)
4039 %{
4040   predicate(Universe::narrow_klass_shift() == 0);
4041   constraint(ALLOC_IN_RC(ptr_reg));
4042   match(DecodeNKlass reg);
4043 
4044   format %{ "[$reg]" %}
4045   interface(MEMORY_INTER) %{
4046     base($reg);
4047     index(0x4);
4048     scale(0x0);
4049     disp(0x0);
4050   %}
4051 %}
4052 
4053 operand indOffset8NarrowKlass(rRegN reg, immL8 off)
4054 %{
4055   predicate(Universe::narrow_klass_shift() == 0);
4056   constraint(ALLOC_IN_RC(ptr_reg));
4057   match(AddP (DecodeNKlass reg) off);
4058 
4059   format %{ "[$reg + $off (8-bit)]" %}
4060   interface(MEMORY_INTER) %{
4061     base($reg);
4062     index(0x4);
4063     scale(0x0);
4064     disp($off);
4065   %}
4066 %}
4067 
4068 operand indOffset32NarrowKlass(rRegN reg, immL32 off)
4069 %{
4070   predicate(Universe::narrow_klass_shift() == 0);
4071   constraint(ALLOC_IN_RC(ptr_reg));
4072   match(AddP (DecodeNKlass reg) off);
4073 
4074   format %{ "[$reg + $off (32-bit)]" %}
4075   interface(MEMORY_INTER) %{
4076     base($reg);
4077     index(0x4);
4078     scale(0x0);
4079     disp($off);
4080   %}
4081 %}
4082 
4083 operand indIndexOffsetNarrowKlass(rRegN reg, rRegL lreg, immL32 off)
4084 %{
4085   predicate(Universe::narrow_klass_shift() == 0);
4086   constraint(ALLOC_IN_RC(ptr_reg));
4087   match(AddP (AddP (DecodeNKlass reg) lreg) off);
4088 
4089   op_cost(10);
4090   format %{"[$reg + $off + $lreg]" %}
4091   interface(MEMORY_INTER) %{
4092     base($reg);
4093     index($lreg);
4094     scale(0x0);
4095     disp($off);
4096   %}
4097 %}
4098 
4099 operand indIndexNarrowKlass(rRegN reg, rRegL lreg)
4100 %{
4101   predicate(Universe::narrow_klass_shift() == 0);
4102   constraint(ALLOC_IN_RC(ptr_reg));
4103   match(AddP (DecodeNKlass reg) lreg);
4104 
4105   op_cost(10);
4106   format %{"[$reg + $lreg]" %}
4107   interface(MEMORY_INTER) %{
4108     base($reg);
4109     index($lreg);
4110     scale(0x0);
4111     disp(0x0);
4112   %}
4113 %}
4114 
4115 operand indIndexScaleNarrowKlass(rRegN reg, rRegL lreg, immI2 scale)
4116 %{
4117   predicate(Universe::narrow_klass_shift() == 0);
4118   constraint(ALLOC_IN_RC(ptr_reg));
4119   match(AddP (DecodeNKlass reg) (LShiftL lreg scale));
4120 
4121   op_cost(10);
4122   format %{"[$reg + $lreg << $scale]" %}
4123   interface(MEMORY_INTER) %{
4124     base($reg);
4125     index($lreg);
4126     scale($scale);
4127     disp(0x0);
4128   %}
4129 %}
4130 
4131 operand indIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4132 %{
4133   predicate(Universe::narrow_klass_shift() == 0);
4134   constraint(ALLOC_IN_RC(ptr_reg));
4135   match(AddP (AddP (DecodeNKlass reg) (LShiftL lreg scale)) off);
4136 
4137   op_cost(10);
4138   format %{"[$reg + $off + $lreg << $scale]" %}
4139   interface(MEMORY_INTER) %{
4140     base($reg);
4141     index($lreg);
4142     scale($scale);
4143     disp($off);
4144   %}
4145 %}
4146 
4147 operand indCompressedKlassOffset(rRegN reg, immL32 off) %{
4148   predicate(UseCompressedKlassPointers && (Universe::narrow_klass_shift() == Address::times_8));
4149   constraint(ALLOC_IN_RC(ptr_reg));
4150   match(AddP (DecodeNKlass reg) off);
4151 
4152   op_cost(10);
4153   format %{"[R12 + $reg << 3 + $off] (compressed klass addressing)" %}
4154   interface(MEMORY_INTER) %{
4155     base(0xc); // R12
4156     index($reg);
4157     scale(0x3);
4158     disp($off);
4159   %}
4160 %}
4161 
4162 operand indPosIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4163 %{
4164   constraint(ALLOC_IN_RC(ptr_reg));
4165   predicate(Universe::narrow_klass_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4166   match(AddP (AddP (DecodeNKlass reg) (LShiftL (ConvI2L idx) scale)) off);
4167 
4168   op_cost(10);
4169   format %{"[$reg + $off + $idx << $scale]" %}
4170   interface(MEMORY_INTER) %{
4171     base($reg);
4172     index($idx);
4173     scale($scale);
4174     disp($off);
4175   %}
4176 %}
4177 
4178 //----------Special Memory Operands--------------------------------------------
4179 // Stack Slot Operand - This operand is used for loading and storing temporary
4180 //                      values on the stack where a match requires a value to
4181 //                      flow through memory.
4182 operand stackSlotP(sRegP reg)
4183 %{
4184   constraint(ALLOC_IN_RC(stack_slots));
4185   // No match rule because this operand is only generated in matching
4186 
4187   format %{ "[$reg]" %}
4188   interface(MEMORY_INTER) %{
4189     base(0x4);   // RSP
4190     index(0x4);  // No Index
4191     scale(0x0);  // No Scale
4192     disp($reg);  // Stack Offset
4193   %}
4194 %}
4195 
4196 operand stackSlotI(sRegI reg)
4197 %{
4198   constraint(ALLOC_IN_RC(stack_slots));
4199   // No match rule because this operand is only generated in matching
4200 
4201   format %{ "[$reg]" %}
4202   interface(MEMORY_INTER) %{
4203     base(0x4);   // RSP
4204     index(0x4);  // No Index
4205     scale(0x0);  // No Scale
4206     disp($reg);  // Stack Offset
4207   %}
4208 %}
4209 
4210 operand stackSlotF(sRegF reg)
4211 %{
4212   constraint(ALLOC_IN_RC(stack_slots));
4213   // No match rule because this operand is only generated in matching
4214 
4215   format %{ "[$reg]" %}
4216   interface(MEMORY_INTER) %{
4217     base(0x4);   // RSP
4218     index(0x4);  // No Index
4219     scale(0x0);  // No Scale
4220     disp($reg);  // Stack Offset
4221   %}
4222 %}
4223 
4224 operand stackSlotD(sRegD reg)
4225 %{
4226   constraint(ALLOC_IN_RC(stack_slots));
4227   // No match rule because this operand is only generated in matching
4228 
4229   format %{ "[$reg]" %}
4230   interface(MEMORY_INTER) %{
4231     base(0x4);   // RSP
4232     index(0x4);  // No Index
4233     scale(0x0);  // No Scale
4234     disp($reg);  // Stack Offset
4235   %}
4236 %}
4237 operand stackSlotL(sRegL reg)
4238 %{
4239   constraint(ALLOC_IN_RC(stack_slots));
4240   // No match rule because this operand is only generated in matching
4241 
4242   format %{ "[$reg]" %}
4243   interface(MEMORY_INTER) %{
4244     base(0x4);   // RSP
4245     index(0x4);  // No Index
4246     scale(0x0);  // No Scale
4247     disp($reg);  // Stack Offset
4248   %}
4249 %}
4250 
4251 //----------Conditional Branch Operands----------------------------------------
4252 // Comparison Op  - This is the operation of the comparison, and is limited to
4253 //                  the following set of codes:
4254 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4255 //
4256 // Other attributes of the comparison, such as unsignedness, are specified
4257 // by the comparison instruction that sets a condition code flags register.
4258 // That result is represented by a flags operand whose subtype is appropriate
4259 // to the unsignedness (etc.) of the comparison.
4260 //
4261 // Later, the instruction which matches both the Comparison Op (a Bool) and
4262 // the flags (produced by the Cmp) specifies the coding of the comparison op
4263 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4264 
4265 // Comparision Code
4266 operand cmpOp()
4267 %{
4268   match(Bool);
4269 
4270   format %{ "" %}
4271   interface(COND_INTER) %{
4272     equal(0x4, "e");
4273     not_equal(0x5, "ne");
4274     less(0xC, "l");
4275     greater_equal(0xD, "ge");
4276     less_equal(0xE, "le");
4277     greater(0xF, "g");
4278   %}
4279 %}
4280 
4281 // Comparison Code, unsigned compare.  Used by FP also, with
4282 // C2 (unordered) turned into GT or LT already.  The other bits
4283 // C0 and C3 are turned into Carry & Zero flags.
4284 operand cmpOpU()
4285 %{
4286   match(Bool);
4287 
4288   format %{ "" %}
4289   interface(COND_INTER) %{
4290     equal(0x4, "e");
4291     not_equal(0x5, "ne");
4292     less(0x2, "b");
4293     greater_equal(0x3, "nb");
4294     less_equal(0x6, "be");
4295     greater(0x7, "nbe");
4296   %}
4297 %}
4298 
4299 
4300 // Floating comparisons that don't require any fixup for the unordered case
4301 operand cmpOpUCF() %{
4302   match(Bool);
4303   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4304             n->as_Bool()->_test._test == BoolTest::ge ||
4305             n->as_Bool()->_test._test == BoolTest::le ||
4306             n->as_Bool()->_test._test == BoolTest::gt);
4307   format %{ "" %}
4308   interface(COND_INTER) %{
4309     equal(0x4, "e");
4310     not_equal(0x5, "ne");
4311     less(0x2, "b");
4312     greater_equal(0x3, "nb");
4313     less_equal(0x6, "be");
4314     greater(0x7, "nbe");
4315   %}
4316 %}
4317 
4318 
4319 // Floating comparisons that can be fixed up with extra conditional jumps
4320 operand cmpOpUCF2() %{
4321   match(Bool);
4322   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4323             n->as_Bool()->_test._test == BoolTest::eq);
4324   format %{ "" %}
4325   interface(COND_INTER) %{
4326     equal(0x4, "e");
4327     not_equal(0x5, "ne");
4328     less(0x2, "b");
4329     greater_equal(0x3, "nb");
4330     less_equal(0x6, "be");
4331     greater(0x7, "nbe");
4332   %}
4333 %}
4334 
4335 
4336 //----------OPERAND CLASSES----------------------------------------------------
4337 // Operand Classes are groups of operands that are used as to simplify
4338 // instruction definitions by not requiring the AD writer to specify separate
4339 // instructions for every form of operand when the instruction accepts
4340 // multiple operand types with the same basic encoding and format.  The classic
4341 // case of this is memory operands.
4342 
4343 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4344                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4345                indCompressedOopOffset,
4346                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4347                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4348                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow,
4349                indCompressedKlassOffset,
4350                indirectNarrowKlass, indOffset8NarrowKlass, indOffset32NarrowKlass,
4351                indIndexOffsetNarrowKlass, indIndexNarrowKlass, indIndexScaleNarrowKlass,
4352                indIndexScaleOffsetNarrowKlass, indPosIndexScaleOffsetNarrowKlass);
4353 
4354 //----------PIPELINE-----------------------------------------------------------
4355 // Rules which define the behavior of the target architectures pipeline.
4356 pipeline %{
4357 
4358 //----------ATTRIBUTES---------------------------------------------------------
4359 attributes %{
4360   variable_size_instructions;        // Fixed size instructions
4361   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4362   instruction_unit_size = 1;         // An instruction is 1 bytes long
4363   instruction_fetch_unit_size = 16;  // The processor fetches one line
4364   instruction_fetch_units = 1;       // of 16 bytes
4365 
4366   // List of nop instructions
4367   nops( MachNop );
4368 %}
4369 
4370 //----------RESOURCES----------------------------------------------------------
4371 // Resources are the functional units available to the machine
4372 
4373 // Generic P2/P3 pipeline
4374 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4375 // 3 instructions decoded per cycle.
4376 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4377 // 3 ALU op, only ALU0 handles mul instructions.
4378 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4379            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4380            BR, FPU,
4381            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4382 
4383 //----------PIPELINE DESCRIPTION-----------------------------------------------
4384 // Pipeline Description specifies the stages in the machine's pipeline
4385 
4386 // Generic P2/P3 pipeline
4387 pipe_desc(S0, S1, S2, S3, S4, S5);
4388 
4389 //----------PIPELINE CLASSES---------------------------------------------------
4390 // Pipeline Classes describe the stages in which input and output are
4391 // referenced by the hardware pipeline.
4392 
4393 // Naming convention: ialu or fpu
4394 // Then: _reg
4395 // Then: _reg if there is a 2nd register
4396 // Then: _long if it's a pair of instructions implementing a long
4397 // Then: _fat if it requires the big decoder
4398 //   Or: _mem if it requires the big decoder and a memory unit.
4399 
4400 // Integer ALU reg operation
4401 pipe_class ialu_reg(rRegI dst)
4402 %{
4403     single_instruction;
4404     dst    : S4(write);
4405     dst    : S3(read);
4406     DECODE : S0;        // any decoder
4407     ALU    : S3;        // any alu
4408 %}
4409 
4410 // Long ALU reg operation
4411 pipe_class ialu_reg_long(rRegL dst)
4412 %{
4413     instruction_count(2);
4414     dst    : S4(write);
4415     dst    : S3(read);
4416     DECODE : S0(2);     // any 2 decoders
4417     ALU    : S3(2);     // both alus
4418 %}
4419 
4420 // Integer ALU reg operation using big decoder
4421 pipe_class ialu_reg_fat(rRegI dst)
4422 %{
4423     single_instruction;
4424     dst    : S4(write);
4425     dst    : S3(read);
4426     D0     : S0;        // big decoder only
4427     ALU    : S3;        // any alu
4428 %}
4429 
4430 // Long ALU reg operation using big decoder
4431 pipe_class ialu_reg_long_fat(rRegL dst)
4432 %{
4433     instruction_count(2);
4434     dst    : S4(write);
4435     dst    : S3(read);
4436     D0     : S0(2);     // big decoder only; twice
4437     ALU    : S3(2);     // any 2 alus
4438 %}
4439 
4440 // Integer ALU reg-reg operation
4441 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4442 %{
4443     single_instruction;
4444     dst    : S4(write);
4445     src    : S3(read);
4446     DECODE : S0;        // any decoder
4447     ALU    : S3;        // any alu
4448 %}
4449 
4450 // Long ALU reg-reg operation
4451 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4452 %{
4453     instruction_count(2);
4454     dst    : S4(write);
4455     src    : S3(read);
4456     DECODE : S0(2);     // any 2 decoders
4457     ALU    : S3(2);     // both alus
4458 %}
4459 
4460 // Integer ALU reg-reg operation
4461 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4462 %{
4463     single_instruction;
4464     dst    : S4(write);
4465     src    : S3(read);
4466     D0     : S0;        // big decoder only
4467     ALU    : S3;        // any alu
4468 %}
4469 
4470 // Long ALU reg-reg operation
4471 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4472 %{
4473     instruction_count(2);
4474     dst    : S4(write);
4475     src    : S3(read);
4476     D0     : S0(2);     // big decoder only; twice
4477     ALU    : S3(2);     // both alus
4478 %}
4479 
4480 // Integer ALU reg-mem operation
4481 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4482 %{
4483     single_instruction;
4484     dst    : S5(write);
4485     mem    : S3(read);
4486     D0     : S0;        // big decoder only
4487     ALU    : S4;        // any alu
4488     MEM    : S3;        // any mem
4489 %}
4490 
4491 // Integer mem operation (prefetch)
4492 pipe_class ialu_mem(memory mem)
4493 %{
4494     single_instruction;
4495     mem    : S3(read);
4496     D0     : S0;        // big decoder only
4497     MEM    : S3;        // any mem
4498 %}
4499 
4500 // Integer Store to Memory
4501 pipe_class ialu_mem_reg(memory mem, rRegI src)
4502 %{
4503     single_instruction;
4504     mem    : S3(read);
4505     src    : S5(read);
4506     D0     : S0;        // big decoder only
4507     ALU    : S4;        // any alu
4508     MEM    : S3;
4509 %}
4510 
4511 // // Long Store to Memory
4512 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4513 // %{
4514 //     instruction_count(2);
4515 //     mem    : S3(read);
4516 //     src    : S5(read);
4517 //     D0     : S0(2);          // big decoder only; twice
4518 //     ALU    : S4(2);     // any 2 alus
4519 //     MEM    : S3(2);  // Both mems
4520 // %}
4521 
4522 // Integer Store to Memory
4523 pipe_class ialu_mem_imm(memory mem)
4524 %{
4525     single_instruction;
4526     mem    : S3(read);
4527     D0     : S0;        // big decoder only
4528     ALU    : S4;        // any alu
4529     MEM    : S3;
4530 %}
4531 
4532 // Integer ALU0 reg-reg operation
4533 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4534 %{
4535     single_instruction;
4536     dst    : S4(write);
4537     src    : S3(read);
4538     D0     : S0;        // Big decoder only
4539     ALU0   : S3;        // only alu0
4540 %}
4541 
4542 // Integer ALU0 reg-mem operation
4543 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4544 %{
4545     single_instruction;
4546     dst    : S5(write);
4547     mem    : S3(read);
4548     D0     : S0;        // big decoder only
4549     ALU0   : S4;        // ALU0 only
4550     MEM    : S3;        // any mem
4551 %}
4552 
4553 // Integer ALU reg-reg operation
4554 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4555 %{
4556     single_instruction;
4557     cr     : S4(write);
4558     src1   : S3(read);
4559     src2   : S3(read);
4560     DECODE : S0;        // any decoder
4561     ALU    : S3;        // any alu
4562 %}
4563 
4564 // Integer ALU reg-imm operation
4565 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4566 %{
4567     single_instruction;
4568     cr     : S4(write);
4569     src1   : S3(read);
4570     DECODE : S0;        // any decoder
4571     ALU    : S3;        // any alu
4572 %}
4573 
4574 // Integer ALU reg-mem operation
4575 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4576 %{
4577     single_instruction;
4578     cr     : S4(write);
4579     src1   : S3(read);
4580     src2   : S3(read);
4581     D0     : S0;        // big decoder only
4582     ALU    : S4;        // any alu
4583     MEM    : S3;
4584 %}
4585 
4586 // Conditional move reg-reg
4587 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4588 %{
4589     instruction_count(4);
4590     y      : S4(read);
4591     q      : S3(read);
4592     p      : S3(read);
4593     DECODE : S0(4);     // any decoder
4594 %}
4595 
4596 // Conditional move reg-reg
4597 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4598 %{
4599     single_instruction;
4600     dst    : S4(write);
4601     src    : S3(read);
4602     cr     : S3(read);
4603     DECODE : S0;        // any decoder
4604 %}
4605 
4606 // Conditional move reg-mem
4607 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4608 %{
4609     single_instruction;
4610     dst    : S4(write);
4611     src    : S3(read);
4612     cr     : S3(read);
4613     DECODE : S0;        // any decoder
4614     MEM    : S3;
4615 %}
4616 
4617 // Conditional move reg-reg long
4618 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4619 %{
4620     single_instruction;
4621     dst    : S4(write);
4622     src    : S3(read);
4623     cr     : S3(read);
4624     DECODE : S0(2);     // any 2 decoders
4625 %}
4626 
4627 // XXX
4628 // // Conditional move double reg-reg
4629 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4630 // %{
4631 //     single_instruction;
4632 //     dst    : S4(write);
4633 //     src    : S3(read);
4634 //     cr     : S3(read);
4635 //     DECODE : S0;     // any decoder
4636 // %}
4637 
4638 // Float reg-reg operation
4639 pipe_class fpu_reg(regD dst)
4640 %{
4641     instruction_count(2);
4642     dst    : S3(read);
4643     DECODE : S0(2);     // any 2 decoders
4644     FPU    : S3;
4645 %}
4646 
4647 // Float reg-reg operation
4648 pipe_class fpu_reg_reg(regD dst, regD src)
4649 %{
4650     instruction_count(2);
4651     dst    : S4(write);
4652     src    : S3(read);
4653     DECODE : S0(2);     // any 2 decoders
4654     FPU    : S3;
4655 %}
4656 
4657 // Float reg-reg operation
4658 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4659 %{
4660     instruction_count(3);
4661     dst    : S4(write);
4662     src1   : S3(read);
4663     src2   : S3(read);
4664     DECODE : S0(3);     // any 3 decoders
4665     FPU    : S3(2);
4666 %}
4667 
4668 // Float reg-reg operation
4669 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4670 %{
4671     instruction_count(4);
4672     dst    : S4(write);
4673     src1   : S3(read);
4674     src2   : S3(read);
4675     src3   : S3(read);
4676     DECODE : S0(4);     // any 3 decoders
4677     FPU    : S3(2);
4678 %}
4679 
4680 // Float reg-reg operation
4681 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4682 %{
4683     instruction_count(4);
4684     dst    : S4(write);
4685     src1   : S3(read);
4686     src2   : S3(read);
4687     src3   : S3(read);
4688     DECODE : S1(3);     // any 3 decoders
4689     D0     : S0;        // Big decoder only
4690     FPU    : S3(2);
4691     MEM    : S3;
4692 %}
4693 
4694 // Float reg-mem operation
4695 pipe_class fpu_reg_mem(regD dst, memory mem)
4696 %{
4697     instruction_count(2);
4698     dst    : S5(write);
4699     mem    : S3(read);
4700     D0     : S0;        // big decoder only
4701     DECODE : S1;        // any decoder for FPU POP
4702     FPU    : S4;
4703     MEM    : S3;        // any mem
4704 %}
4705 
4706 // Float reg-mem operation
4707 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4708 %{
4709     instruction_count(3);
4710     dst    : S5(write);
4711     src1   : S3(read);
4712     mem    : S3(read);
4713     D0     : S0;        // big decoder only
4714     DECODE : S1(2);     // any decoder for FPU POP
4715     FPU    : S4;
4716     MEM    : S3;        // any mem
4717 %}
4718 
4719 // Float mem-reg operation
4720 pipe_class fpu_mem_reg(memory mem, regD src)
4721 %{
4722     instruction_count(2);
4723     src    : S5(read);
4724     mem    : S3(read);
4725     DECODE : S0;        // any decoder for FPU PUSH
4726     D0     : S1;        // big decoder only
4727     FPU    : S4;
4728     MEM    : S3;        // any mem
4729 %}
4730 
4731 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4732 %{
4733     instruction_count(3);
4734     src1   : S3(read);
4735     src2   : S3(read);
4736     mem    : S3(read);
4737     DECODE : S0(2);     // any decoder for FPU PUSH
4738     D0     : S1;        // big decoder only
4739     FPU    : S4;
4740     MEM    : S3;        // any mem
4741 %}
4742 
4743 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4744 %{
4745     instruction_count(3);
4746     src1   : S3(read);
4747     src2   : S3(read);
4748     mem    : S4(read);
4749     DECODE : S0;        // any decoder for FPU PUSH
4750     D0     : S0(2);     // big decoder only
4751     FPU    : S4;
4752     MEM    : S3(2);     // any mem
4753 %}
4754 
4755 pipe_class fpu_mem_mem(memory dst, memory src1)
4756 %{
4757     instruction_count(2);
4758     src1   : S3(read);
4759     dst    : S4(read);
4760     D0     : S0(2);     // big decoder only
4761     MEM    : S3(2);     // any mem
4762 %}
4763 
4764 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4765 %{
4766     instruction_count(3);
4767     src1   : S3(read);
4768     src2   : S3(read);
4769     dst    : S4(read);
4770     D0     : S0(3);     // big decoder only
4771     FPU    : S4;
4772     MEM    : S3(3);     // any mem
4773 %}
4774 
4775 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4776 %{
4777     instruction_count(3);
4778     src1   : S4(read);
4779     mem    : S4(read);
4780     DECODE : S0;        // any decoder for FPU PUSH
4781     D0     : S0(2);     // big decoder only
4782     FPU    : S4;
4783     MEM    : S3(2);     // any mem
4784 %}
4785 
4786 // Float load constant
4787 pipe_class fpu_reg_con(regD dst)
4788 %{
4789     instruction_count(2);
4790     dst    : S5(write);
4791     D0     : S0;        // big decoder only for the load
4792     DECODE : S1;        // any decoder for FPU POP
4793     FPU    : S4;
4794     MEM    : S3;        // any mem
4795 %}
4796 
4797 // Float load constant
4798 pipe_class fpu_reg_reg_con(regD dst, regD src)
4799 %{
4800     instruction_count(3);
4801     dst    : S5(write);
4802     src    : S3(read);
4803     D0     : S0;        // big decoder only for the load
4804     DECODE : S1(2);     // any decoder for FPU POP
4805     FPU    : S4;
4806     MEM    : S3;        // any mem
4807 %}
4808 
4809 // UnConditional branch
4810 pipe_class pipe_jmp(label labl)
4811 %{
4812     single_instruction;
4813     BR   : S3;
4814 %}
4815 
4816 // Conditional branch
4817 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4818 %{
4819     single_instruction;
4820     cr    : S1(read);
4821     BR    : S3;
4822 %}
4823 
4824 // Allocation idiom
4825 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4826 %{
4827     instruction_count(1); force_serialization;
4828     fixed_latency(6);
4829     heap_ptr : S3(read);
4830     DECODE   : S0(3);
4831     D0       : S2;
4832     MEM      : S3;
4833     ALU      : S3(2);
4834     dst      : S5(write);
4835     BR       : S5;
4836 %}
4837 
4838 // Generic big/slow expanded idiom
4839 pipe_class pipe_slow()
4840 %{
4841     instruction_count(10); multiple_bundles; force_serialization;
4842     fixed_latency(100);
4843     D0  : S0(2);
4844     MEM : S3(2);
4845 %}
4846 
4847 // The real do-nothing guy
4848 pipe_class empty()
4849 %{
4850     instruction_count(0);
4851 %}
4852 
4853 // Define the class for the Nop node
4854 define
4855 %{
4856    MachNop = empty;
4857 %}
4858 
4859 %}
4860 
4861 //----------INSTRUCTIONS-------------------------------------------------------
4862 //
4863 // match      -- States which machine-independent subtree may be replaced
4864 //               by this instruction.
4865 // ins_cost   -- The estimated cost of this instruction is used by instruction
4866 //               selection to identify a minimum cost tree of machine
4867 //               instructions that matches a tree of machine-independent
4868 //               instructions.
4869 // format     -- A string providing the disassembly for this instruction.
4870 //               The value of an instruction's operand may be inserted
4871 //               by referring to it with a '$' prefix.
4872 // opcode     -- Three instruction opcodes may be provided.  These are referred
4873 //               to within an encode class as $primary, $secondary, and $tertiary
4874 //               rrspectively.  The primary opcode is commonly used to
4875 //               indicate the type of machine instruction, while secondary
4876 //               and tertiary are often used for prefix options or addressing
4877 //               modes.
4878 // ins_encode -- A list of encode classes with parameters. The encode class
4879 //               name must have been defined in an 'enc_class' specification
4880 //               in the encode section of the architecture description.
4881 
4882 
4883 //----------Load/Store/Move Instructions---------------------------------------
4884 //----------Load Instructions--------------------------------------------------
4885 
4886 // Load Byte (8 bit signed)
4887 instruct loadB(rRegI dst, memory mem)
4888 %{
4889   match(Set dst (LoadB mem));
4890 
4891   ins_cost(125);
4892   format %{ "movsbl  $dst, $mem\t# byte" %}
4893 
4894   ins_encode %{
4895     __ movsbl($dst$$Register, $mem$$Address);
4896   %}
4897 
4898   ins_pipe(ialu_reg_mem);
4899 %}
4900 
4901 // Load Byte (8 bit signed) into Long Register
4902 instruct loadB2L(rRegL dst, memory mem)
4903 %{
4904   match(Set dst (ConvI2L (LoadB mem)));
4905 
4906   ins_cost(125);
4907   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4908 
4909   ins_encode %{
4910     __ movsbq($dst$$Register, $mem$$Address);
4911   %}
4912 
4913   ins_pipe(ialu_reg_mem);
4914 %}
4915 
4916 // Load Unsigned Byte (8 bit UNsigned)
4917 instruct loadUB(rRegI dst, memory mem)
4918 %{
4919   match(Set dst (LoadUB mem));
4920 
4921   ins_cost(125);
4922   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4923 
4924   ins_encode %{
4925     __ movzbl($dst$$Register, $mem$$Address);
4926   %}
4927 
4928   ins_pipe(ialu_reg_mem);
4929 %}
4930 
4931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4932 instruct loadUB2L(rRegL dst, memory mem)
4933 %{
4934   match(Set dst (ConvI2L (LoadUB mem)));
4935 
4936   ins_cost(125);
4937   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4938 
4939   ins_encode %{
4940     __ movzbq($dst$$Register, $mem$$Address);
4941   %}
4942 
4943   ins_pipe(ialu_reg_mem);
4944 %}
4945 
4946 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
4947 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
4948   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4949   effect(KILL cr);
4950 
4951   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
4952             "andl    $dst, $mask" %}
4953   ins_encode %{
4954     Register Rdst = $dst$$Register;
4955     __ movzbq(Rdst, $mem$$Address);
4956     __ andl(Rdst, $mask$$constant);
4957   %}
4958   ins_pipe(ialu_reg_mem);
4959 %}
4960 
4961 // Load Short (16 bit signed)
4962 instruct loadS(rRegI dst, memory mem)
4963 %{
4964   match(Set dst (LoadS mem));
4965 
4966   ins_cost(125);
4967   format %{ "movswl $dst, $mem\t# short" %}
4968 
4969   ins_encode %{
4970     __ movswl($dst$$Register, $mem$$Address);
4971   %}
4972 
4973   ins_pipe(ialu_reg_mem);
4974 %}
4975 
4976 // Load Short (16 bit signed) to Byte (8 bit signed)
4977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4978   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4979 
4980   ins_cost(125);
4981   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4982   ins_encode %{
4983     __ movsbl($dst$$Register, $mem$$Address);
4984   %}
4985   ins_pipe(ialu_reg_mem);
4986 %}
4987 
4988 // Load Short (16 bit signed) into Long Register
4989 instruct loadS2L(rRegL dst, memory mem)
4990 %{
4991   match(Set dst (ConvI2L (LoadS mem)));
4992 
4993   ins_cost(125);
4994   format %{ "movswq $dst, $mem\t# short -> long" %}
4995 
4996   ins_encode %{
4997     __ movswq($dst$$Register, $mem$$Address);
4998   %}
4999 
5000   ins_pipe(ialu_reg_mem);
5001 %}
5002 
5003 // Load Unsigned Short/Char (16 bit UNsigned)
5004 instruct loadUS(rRegI dst, memory mem)
5005 %{
5006   match(Set dst (LoadUS mem));
5007 
5008   ins_cost(125);
5009   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5010 
5011   ins_encode %{
5012     __ movzwl($dst$$Register, $mem$$Address);
5013   %}
5014 
5015   ins_pipe(ialu_reg_mem);
5016 %}
5017 
5018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5020   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5021 
5022   ins_cost(125);
5023   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5024   ins_encode %{
5025     __ movsbl($dst$$Register, $mem$$Address);
5026   %}
5027   ins_pipe(ialu_reg_mem);
5028 %}
5029 
5030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5031 instruct loadUS2L(rRegL dst, memory mem)
5032 %{
5033   match(Set dst (ConvI2L (LoadUS mem)));
5034 
5035   ins_cost(125);
5036   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5037 
5038   ins_encode %{
5039     __ movzwq($dst$$Register, $mem$$Address);
5040   %}
5041 
5042   ins_pipe(ialu_reg_mem);
5043 %}
5044 
5045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5047   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5048 
5049   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5050   ins_encode %{
5051     __ movzbq($dst$$Register, $mem$$Address);
5052   %}
5053   ins_pipe(ialu_reg_mem);
5054 %}
5055 
5056 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5057 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5058   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5059   effect(KILL cr);
5060 
5061   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5062             "andl    $dst, $mask" %}
5063   ins_encode %{
5064     Register Rdst = $dst$$Register;
5065     __ movzwq(Rdst, $mem$$Address);
5066     __ andl(Rdst, $mask$$constant);
5067   %}
5068   ins_pipe(ialu_reg_mem);
5069 %}
5070 
5071 // Load Integer
5072 instruct loadI(rRegI dst, memory mem)
5073 %{
5074   match(Set dst (LoadI mem));
5075 
5076   ins_cost(125);
5077   format %{ "movl    $dst, $mem\t# int" %}
5078 
5079   ins_encode %{
5080     __ movl($dst$$Register, $mem$$Address);
5081   %}
5082 
5083   ins_pipe(ialu_reg_mem);
5084 %}
5085 
5086 // Load Integer (32 bit signed) to Byte (8 bit signed)
5087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5088   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5089 
5090   ins_cost(125);
5091   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5092   ins_encode %{
5093     __ movsbl($dst$$Register, $mem$$Address);
5094   %}
5095   ins_pipe(ialu_reg_mem);
5096 %}
5097 
5098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5100   match(Set dst (AndI (LoadI mem) mask));
5101 
5102   ins_cost(125);
5103   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5104   ins_encode %{
5105     __ movzbl($dst$$Register, $mem$$Address);
5106   %}
5107   ins_pipe(ialu_reg_mem);
5108 %}
5109 
5110 // Load Integer (32 bit signed) to Short (16 bit signed)
5111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5112   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5113 
5114   ins_cost(125);
5115   format %{ "movswl  $dst, $mem\t# int -> short" %}
5116   ins_encode %{
5117     __ movswl($dst$$Register, $mem$$Address);
5118   %}
5119   ins_pipe(ialu_reg_mem);
5120 %}
5121 
5122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5124   match(Set dst (AndI (LoadI mem) mask));
5125 
5126   ins_cost(125);
5127   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5128   ins_encode %{
5129     __ movzwl($dst$$Register, $mem$$Address);
5130   %}
5131   ins_pipe(ialu_reg_mem);
5132 %}
5133 
5134 // Load Integer into Long Register
5135 instruct loadI2L(rRegL dst, memory mem)
5136 %{
5137   match(Set dst (ConvI2L (LoadI mem)));
5138 
5139   ins_cost(125);
5140   format %{ "movslq  $dst, $mem\t# int -> long" %}
5141 
5142   ins_encode %{
5143     __ movslq($dst$$Register, $mem$$Address);
5144   %}
5145 
5146   ins_pipe(ialu_reg_mem);
5147 %}
5148 
5149 // Load Integer with mask 0xFF into Long Register
5150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5151   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5152 
5153   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5154   ins_encode %{
5155     __ movzbq($dst$$Register, $mem$$Address);
5156   %}
5157   ins_pipe(ialu_reg_mem);
5158 %}
5159 
5160 // Load Integer with mask 0xFFFF into Long Register
5161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5162   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5163 
5164   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5165   ins_encode %{
5166     __ movzwq($dst$$Register, $mem$$Address);
5167   %}
5168   ins_pipe(ialu_reg_mem);
5169 %}
5170 
5171 // Load Integer with a 32-bit mask into Long Register
5172 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5173   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5174   effect(KILL cr);
5175 
5176   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
5177             "andl    $dst, $mask" %}
5178   ins_encode %{
5179     Register Rdst = $dst$$Register;
5180     __ movl(Rdst, $mem$$Address);
5181     __ andl(Rdst, $mask$$constant);
5182   %}
5183   ins_pipe(ialu_reg_mem);
5184 %}
5185 
5186 // Load Unsigned Integer into Long Register
5187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
5188 %{
5189   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5190 
5191   ins_cost(125);
5192   format %{ "movl    $dst, $mem\t# uint -> long" %}
5193 
5194   ins_encode %{
5195     __ movl($dst$$Register, $mem$$Address);
5196   %}
5197 
5198   ins_pipe(ialu_reg_mem);
5199 %}
5200 
5201 // Load Long
5202 instruct loadL(rRegL dst, memory mem)
5203 %{
5204   match(Set dst (LoadL mem));
5205 
5206   ins_cost(125);
5207   format %{ "movq    $dst, $mem\t# long" %}
5208 
5209   ins_encode %{
5210     __ movq($dst$$Register, $mem$$Address);
5211   %}
5212 
5213   ins_pipe(ialu_reg_mem); // XXX
5214 %}
5215 
5216 // Load Range
5217 instruct loadRange(rRegI dst, memory mem)
5218 %{
5219   match(Set dst (LoadRange mem));
5220 
5221   ins_cost(125); // XXX
5222   format %{ "movl    $dst, $mem\t# range" %}
5223   opcode(0x8B);
5224   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5225   ins_pipe(ialu_reg_mem);
5226 %}
5227 
5228 // Load Pointer
5229 instruct loadP(rRegP dst, memory mem)
5230 %{
5231   match(Set dst (LoadP mem));
5232 
5233   ins_cost(125); // XXX
5234   format %{ "movq    $dst, $mem\t# ptr" %}
5235   opcode(0x8B);
5236   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5237   ins_pipe(ialu_reg_mem); // XXX
5238 %}
5239 
5240 // Load Compressed Pointer
5241 instruct loadN(rRegN dst, memory mem)
5242 %{
5243    match(Set dst (LoadN mem));
5244 
5245    ins_cost(125); // XXX
5246    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5247    ins_encode %{
5248      __ movl($dst$$Register, $mem$$Address);
5249    %}
5250    ins_pipe(ialu_reg_mem); // XXX
5251 %}
5252 
5253 
5254 // Load Klass Pointer
5255 instruct loadKlass(rRegP dst, memory mem)
5256 %{
5257   match(Set dst (LoadKlass mem));
5258 
5259   ins_cost(125); // XXX
5260   format %{ "movq    $dst, $mem\t# class" %}
5261   opcode(0x8B);
5262   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5263   ins_pipe(ialu_reg_mem); // XXX
5264 %}
5265 
5266 // Load narrow Klass Pointer
5267 instruct loadNKlass(rRegN dst, memory mem)
5268 %{
5269   match(Set dst (LoadNKlass mem));
5270 
5271   ins_cost(125); // XXX
5272   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5273   ins_encode %{
5274     __ movl($dst$$Register, $mem$$Address);
5275   %}
5276   ins_pipe(ialu_reg_mem); // XXX
5277 %}
5278 
5279 // Load Float
5280 instruct loadF(regF dst, memory mem)
5281 %{
5282   match(Set dst (LoadF mem));
5283 
5284   ins_cost(145); // XXX
5285   format %{ "movss   $dst, $mem\t# float" %}
5286   ins_encode %{
5287     __ movflt($dst$$XMMRegister, $mem$$Address);
5288   %}
5289   ins_pipe(pipe_slow); // XXX
5290 %}
5291 
5292 // Load Double
5293 instruct loadD_partial(regD dst, memory mem)
5294 %{
5295   predicate(!UseXmmLoadAndClearUpper);
5296   match(Set dst (LoadD mem));
5297 
5298   ins_cost(145); // XXX
5299   format %{ "movlpd  $dst, $mem\t# double" %}
5300   ins_encode %{
5301     __ movdbl($dst$$XMMRegister, $mem$$Address);
5302   %}
5303   ins_pipe(pipe_slow); // XXX
5304 %}
5305 
5306 instruct loadD(regD dst, memory mem)
5307 %{
5308   predicate(UseXmmLoadAndClearUpper);
5309   match(Set dst (LoadD mem));
5310 
5311   ins_cost(145); // XXX
5312   format %{ "movsd   $dst, $mem\t# double" %}
5313   ins_encode %{
5314     __ movdbl($dst$$XMMRegister, $mem$$Address);
5315   %}
5316   ins_pipe(pipe_slow); // XXX
5317 %}
5318 
5319 // Load Effective Address
5320 instruct leaP8(rRegP dst, indOffset8 mem)
5321 %{
5322   match(Set dst mem);
5323 
5324   ins_cost(110); // XXX
5325   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5326   opcode(0x8D);
5327   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5328   ins_pipe(ialu_reg_reg_fat);
5329 %}
5330 
5331 instruct leaP32(rRegP dst, indOffset32 mem)
5332 %{
5333   match(Set dst mem);
5334 
5335   ins_cost(110);
5336   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5337   opcode(0x8D);
5338   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5339   ins_pipe(ialu_reg_reg_fat);
5340 %}
5341 
5342 // instruct leaPIdx(rRegP dst, indIndex mem)
5343 // %{
5344 //   match(Set dst mem);
5345 
5346 //   ins_cost(110);
5347 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5348 //   opcode(0x8D);
5349 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5350 //   ins_pipe(ialu_reg_reg_fat);
5351 // %}
5352 
5353 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5354 %{
5355   match(Set dst mem);
5356 
5357   ins_cost(110);
5358   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5359   opcode(0x8D);
5360   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5361   ins_pipe(ialu_reg_reg_fat);
5362 %}
5363 
5364 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5365 %{
5366   match(Set dst mem);
5367 
5368   ins_cost(110);
5369   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5370   opcode(0x8D);
5371   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5372   ins_pipe(ialu_reg_reg_fat);
5373 %}
5374 
5375 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5376 %{
5377   match(Set dst mem);
5378 
5379   ins_cost(110);
5380   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5381   opcode(0x8D);
5382   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5383   ins_pipe(ialu_reg_reg_fat);
5384 %}
5385 
5386 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5387 %{
5388   match(Set dst mem);
5389 
5390   ins_cost(110);
5391   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5392   opcode(0x8D);
5393   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5394   ins_pipe(ialu_reg_reg_fat);
5395 %}
5396 
5397 // Load Effective Address which uses Narrow (32-bits) oop
5398 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5399 %{
5400   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5401   match(Set dst mem);
5402 
5403   ins_cost(110);
5404   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5405   opcode(0x8D);
5406   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5407   ins_pipe(ialu_reg_reg_fat);
5408 %}
5409 
5410 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5411 %{
5412   predicate(Universe::narrow_oop_shift() == 0);
5413   match(Set dst mem);
5414 
5415   ins_cost(110); // XXX
5416   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5417   opcode(0x8D);
5418   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5419   ins_pipe(ialu_reg_reg_fat);
5420 %}
5421 
5422 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5423 %{
5424   predicate(Universe::narrow_oop_shift() == 0);
5425   match(Set dst mem);
5426 
5427   ins_cost(110);
5428   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5429   opcode(0x8D);
5430   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5431   ins_pipe(ialu_reg_reg_fat);
5432 %}
5433 
5434 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5435 %{
5436   predicate(Universe::narrow_oop_shift() == 0);
5437   match(Set dst mem);
5438 
5439   ins_cost(110);
5440   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5441   opcode(0x8D);
5442   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5443   ins_pipe(ialu_reg_reg_fat);
5444 %}
5445 
5446 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5447 %{
5448   predicate(Universe::narrow_oop_shift() == 0);
5449   match(Set dst mem);
5450 
5451   ins_cost(110);
5452   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5453   opcode(0x8D);
5454   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5455   ins_pipe(ialu_reg_reg_fat);
5456 %}
5457 
5458 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5459 %{
5460   predicate(Universe::narrow_oop_shift() == 0);
5461   match(Set dst mem);
5462 
5463   ins_cost(110);
5464   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5465   opcode(0x8D);
5466   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5467   ins_pipe(ialu_reg_reg_fat);
5468 %}
5469 
5470 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5471 %{
5472   predicate(Universe::narrow_oop_shift() == 0);
5473   match(Set dst mem);
5474 
5475   ins_cost(110);
5476   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5477   opcode(0x8D);
5478   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5479   ins_pipe(ialu_reg_reg_fat);
5480 %}
5481 
5482 instruct loadConI(rRegI dst, immI src)
5483 %{
5484   match(Set dst src);
5485 
5486   format %{ "movl    $dst, $src\t# int" %}
5487   ins_encode(load_immI(dst, src));
5488   ins_pipe(ialu_reg_fat); // XXX
5489 %}
5490 
5491 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5492 %{
5493   match(Set dst src);
5494   effect(KILL cr);
5495 
5496   ins_cost(50);
5497   format %{ "xorl    $dst, $dst\t# int" %}
5498   opcode(0x33); /* + rd */
5499   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5500   ins_pipe(ialu_reg);
5501 %}
5502 
5503 instruct loadConL(rRegL dst, immL src)
5504 %{
5505   match(Set dst src);
5506 
5507   ins_cost(150);
5508   format %{ "movq    $dst, $src\t# long" %}
5509   ins_encode(load_immL(dst, src));
5510   ins_pipe(ialu_reg);
5511 %}
5512 
5513 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5514 %{
5515   match(Set dst src);
5516   effect(KILL cr);
5517 
5518   ins_cost(50);
5519   format %{ "xorl    $dst, $dst\t# long" %}
5520   opcode(0x33); /* + rd */
5521   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5522   ins_pipe(ialu_reg); // XXX
5523 %}
5524 
5525 instruct loadConUL32(rRegL dst, immUL32 src)
5526 %{
5527   match(Set dst src);
5528 
5529   ins_cost(60);
5530   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5531   ins_encode(load_immUL32(dst, src));
5532   ins_pipe(ialu_reg);
5533 %}
5534 
5535 instruct loadConL32(rRegL dst, immL32 src)
5536 %{
5537   match(Set dst src);
5538 
5539   ins_cost(70);
5540   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5541   ins_encode(load_immL32(dst, src));
5542   ins_pipe(ialu_reg);
5543 %}
5544 
5545 instruct loadConP(rRegP dst, immP con) %{
5546   match(Set dst con);
5547 
5548   format %{ "movq    $dst, $con\t# ptr" %}
5549   ins_encode(load_immP(dst, con));
5550   ins_pipe(ialu_reg_fat); // XXX
5551 %}
5552 
5553 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5554 %{
5555   match(Set dst src);
5556   effect(KILL cr);
5557 
5558   ins_cost(50);
5559   format %{ "xorl    $dst, $dst\t# ptr" %}
5560   opcode(0x33); /* + rd */
5561   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5562   ins_pipe(ialu_reg);
5563 %}
5564 
5565 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5566 %{
5567   match(Set dst src);
5568   effect(KILL cr);
5569 
5570   ins_cost(60);
5571   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5572   ins_encode(load_immP31(dst, src));
5573   ins_pipe(ialu_reg);
5574 %}
5575 
5576 instruct loadConF(regF dst, immF con) %{
5577   match(Set dst con);
5578   ins_cost(125);
5579   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5580   ins_encode %{
5581     __ movflt($dst$$XMMRegister, $constantaddress($con));
5582   %}
5583   ins_pipe(pipe_slow);
5584 %}
5585 
5586 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5587   match(Set dst src);
5588   effect(KILL cr);
5589   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5590   ins_encode %{
5591     __ xorq($dst$$Register, $dst$$Register);
5592   %}
5593   ins_pipe(ialu_reg);
5594 %}
5595 
5596 instruct loadConN(rRegN dst, immN src) %{
5597   match(Set dst src);
5598 
5599   ins_cost(125);
5600   format %{ "movl    $dst, $src\t# compressed ptr" %}
5601   ins_encode %{
5602     address con = (address)$src$$constant;
5603     if (con == NULL) {
5604       ShouldNotReachHere();
5605     } else {
5606       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5607     }
5608   %}
5609   ins_pipe(ialu_reg_fat); // XXX
5610 %}
5611 
5612 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5613   match(Set dst src);
5614 
5615   ins_cost(125);
5616   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5617   ins_encode %{
5618     address con = (address)$src$$constant;
5619     if (con == NULL) {
5620       ShouldNotReachHere();
5621     } else {
5622       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5623     }
5624   %}
5625   ins_pipe(ialu_reg_fat); // XXX
5626 %}
5627 
5628 instruct loadConF0(regF dst, immF0 src)
5629 %{
5630   match(Set dst src);
5631   ins_cost(100);
5632 
5633   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5634   ins_encode %{
5635     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5636   %}
5637   ins_pipe(pipe_slow);
5638 %}
5639 
5640 // Use the same format since predicate() can not be used here.
5641 instruct loadConD(regD dst, immD con) %{
5642   match(Set dst con);
5643   ins_cost(125);
5644   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5645   ins_encode %{
5646     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5647   %}
5648   ins_pipe(pipe_slow);
5649 %}
5650 
5651 instruct loadConD0(regD dst, immD0 src)
5652 %{
5653   match(Set dst src);
5654   ins_cost(100);
5655 
5656   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5657   ins_encode %{
5658     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5659   %}
5660   ins_pipe(pipe_slow);
5661 %}
5662 
5663 instruct loadSSI(rRegI dst, stackSlotI src)
5664 %{
5665   match(Set dst src);
5666 
5667   ins_cost(125);
5668   format %{ "movl    $dst, $src\t# int stk" %}
5669   opcode(0x8B);
5670   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5671   ins_pipe(ialu_reg_mem);
5672 %}
5673 
5674 instruct loadSSL(rRegL dst, stackSlotL src)
5675 %{
5676   match(Set dst src);
5677 
5678   ins_cost(125);
5679   format %{ "movq    $dst, $src\t# long stk" %}
5680   opcode(0x8B);
5681   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5682   ins_pipe(ialu_reg_mem);
5683 %}
5684 
5685 instruct loadSSP(rRegP dst, stackSlotP src)
5686 %{
5687   match(Set dst src);
5688 
5689   ins_cost(125);
5690   format %{ "movq    $dst, $src\t# ptr stk" %}
5691   opcode(0x8B);
5692   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5693   ins_pipe(ialu_reg_mem);
5694 %}
5695 
5696 instruct loadSSF(regF dst, stackSlotF src)
5697 %{
5698   match(Set dst src);
5699 
5700   ins_cost(125);
5701   format %{ "movss   $dst, $src\t# float stk" %}
5702   ins_encode %{
5703     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5704   %}
5705   ins_pipe(pipe_slow); // XXX
5706 %}
5707 
5708 // Use the same format since predicate() can not be used here.
5709 instruct loadSSD(regD dst, stackSlotD src)
5710 %{
5711   match(Set dst src);
5712 
5713   ins_cost(125);
5714   format %{ "movsd   $dst, $src\t# double stk" %}
5715   ins_encode  %{
5716     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5717   %}
5718   ins_pipe(pipe_slow); // XXX
5719 %}
5720 
5721 // Prefetch instructions.
5722 // Must be safe to execute with invalid address (cannot fault).
5723 
5724 instruct prefetchr( memory mem ) %{
5725   predicate(ReadPrefetchInstr==3);
5726   match(PrefetchRead mem);
5727   ins_cost(125);
5728 
5729   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5730   ins_encode %{
5731     __ prefetchr($mem$$Address);
5732   %}
5733   ins_pipe(ialu_mem);
5734 %}
5735 
5736 instruct prefetchrNTA( memory mem ) %{
5737   predicate(ReadPrefetchInstr==0);
5738   match(PrefetchRead mem);
5739   ins_cost(125);
5740 
5741   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5742   ins_encode %{
5743     __ prefetchnta($mem$$Address);
5744   %}
5745   ins_pipe(ialu_mem);
5746 %}
5747 
5748 instruct prefetchrT0( memory mem ) %{
5749   predicate(ReadPrefetchInstr==1);
5750   match(PrefetchRead mem);
5751   ins_cost(125);
5752 
5753   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5754   ins_encode %{
5755     __ prefetcht0($mem$$Address);
5756   %}
5757   ins_pipe(ialu_mem);
5758 %}
5759 
5760 instruct prefetchrT2( memory mem ) %{
5761   predicate(ReadPrefetchInstr==2);
5762   match(PrefetchRead mem);
5763   ins_cost(125);
5764 
5765   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5766   ins_encode %{
5767     __ prefetcht2($mem$$Address);
5768   %}
5769   ins_pipe(ialu_mem);
5770 %}
5771 
5772 instruct prefetchwNTA( memory mem ) %{
5773   match(PrefetchWrite mem);
5774   ins_cost(125);
5775 
5776   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5777   ins_encode %{
5778     __ prefetchnta($mem$$Address);
5779   %}
5780   ins_pipe(ialu_mem);
5781 %}
5782 
5783 // Prefetch instructions for allocation.
5784 
5785 instruct prefetchAlloc( memory mem ) %{
5786   predicate(AllocatePrefetchInstr==3);
5787   match(PrefetchAllocation mem);
5788   ins_cost(125);
5789 
5790   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5791   ins_encode %{
5792     __ prefetchw($mem$$Address);
5793   %}
5794   ins_pipe(ialu_mem);
5795 %}
5796 
5797 instruct prefetchAllocNTA( memory mem ) %{
5798   predicate(AllocatePrefetchInstr==0);
5799   match(PrefetchAllocation mem);
5800   ins_cost(125);
5801 
5802   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5803   ins_encode %{
5804     __ prefetchnta($mem$$Address);
5805   %}
5806   ins_pipe(ialu_mem);
5807 %}
5808 
5809 instruct prefetchAllocT0( memory mem ) %{
5810   predicate(AllocatePrefetchInstr==1);
5811   match(PrefetchAllocation mem);
5812   ins_cost(125);
5813 
5814   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5815   ins_encode %{
5816     __ prefetcht0($mem$$Address);
5817   %}
5818   ins_pipe(ialu_mem);
5819 %}
5820 
5821 instruct prefetchAllocT2( memory mem ) %{
5822   predicate(AllocatePrefetchInstr==2);
5823   match(PrefetchAllocation mem);
5824   ins_cost(125);
5825 
5826   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5827   ins_encode %{
5828     __ prefetcht2($mem$$Address);
5829   %}
5830   ins_pipe(ialu_mem);
5831 %}
5832 
5833 //----------Store Instructions-------------------------------------------------
5834 
5835 // Store Byte
5836 instruct storeB(memory mem, rRegI src)
5837 %{
5838   match(Set mem (StoreB mem src));
5839 
5840   ins_cost(125); // XXX
5841   format %{ "movb    $mem, $src\t# byte" %}
5842   opcode(0x88);
5843   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5844   ins_pipe(ialu_mem_reg);
5845 %}
5846 
5847 // Store Char/Short
5848 instruct storeC(memory mem, rRegI src)
5849 %{
5850   match(Set mem (StoreC mem src));
5851 
5852   ins_cost(125); // XXX
5853   format %{ "movw    $mem, $src\t# char/short" %}
5854   opcode(0x89);
5855   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5856   ins_pipe(ialu_mem_reg);
5857 %}
5858 
5859 // Store Integer
5860 instruct storeI(memory mem, rRegI src)
5861 %{
5862   match(Set mem (StoreI mem src));
5863 
5864   ins_cost(125); // XXX
5865   format %{ "movl    $mem, $src\t# int" %}
5866   opcode(0x89);
5867   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5868   ins_pipe(ialu_mem_reg);
5869 %}
5870 
5871 // Store Long
5872 instruct storeL(memory mem, rRegL src)
5873 %{
5874   match(Set mem (StoreL mem src));
5875 
5876   ins_cost(125); // XXX
5877   format %{ "movq    $mem, $src\t# long" %}
5878   opcode(0x89);
5879   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5880   ins_pipe(ialu_mem_reg); // XXX
5881 %}
5882 
5883 // Store Pointer
5884 instruct storeP(memory mem, any_RegP src)
5885 %{
5886   match(Set mem (StoreP mem src));
5887 
5888   ins_cost(125); // XXX
5889   format %{ "movq    $mem, $src\t# ptr" %}
5890   opcode(0x89);
5891   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5892   ins_pipe(ialu_mem_reg);
5893 %}
5894 
5895 instruct storeImmP0(memory mem, immP0 zero)
5896 %{
5897   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5898   match(Set mem (StoreP mem zero));
5899 
5900   ins_cost(125); // XXX
5901   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5902   ins_encode %{
5903     __ movq($mem$$Address, r12);
5904   %}
5905   ins_pipe(ialu_mem_reg);
5906 %}
5907 
5908 // Store NULL Pointer, mark word, or other simple pointer constant.
5909 instruct storeImmP(memory mem, immP31 src)
5910 %{
5911   match(Set mem (StoreP mem src));
5912 
5913   ins_cost(150); // XXX
5914   format %{ "movq    $mem, $src\t# ptr" %}
5915   opcode(0xC7); /* C7 /0 */
5916   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5917   ins_pipe(ialu_mem_imm);
5918 %}
5919 
5920 // Store Compressed Pointer
5921 instruct storeN(memory mem, rRegN src)
5922 %{
5923   match(Set mem (StoreN mem src));
5924 
5925   ins_cost(125); // XXX
5926   format %{ "movl    $mem, $src\t# compressed ptr" %}
5927   ins_encode %{
5928     __ movl($mem$$Address, $src$$Register);
5929   %}
5930   ins_pipe(ialu_mem_reg);
5931 %}
5932 
5933 instruct storeNKlass(memory mem, rRegN src)
5934 %{
5935   match(Set mem (StoreNKlass mem src));
5936 
5937   ins_cost(125); // XXX
5938   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5939   ins_encode %{
5940     __ movl($mem$$Address, $src$$Register);
5941   %}
5942   ins_pipe(ialu_mem_reg);
5943 %}
5944 
5945 instruct storeImmN0(memory mem, immN0 zero)
5946 %{
5947   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5948   match(Set mem (StoreN mem zero));
5949 
5950   ins_cost(125); // XXX
5951   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5952   ins_encode %{
5953     __ movl($mem$$Address, r12);
5954   %}
5955   ins_pipe(ialu_mem_reg);
5956 %}
5957 
5958 instruct storeImmN(memory mem, immN src)
5959 %{
5960   match(Set mem (StoreN mem src));
5961 
5962   ins_cost(150); // XXX
5963   format %{ "movl    $mem, $src\t# compressed ptr" %}
5964   ins_encode %{
5965     address con = (address)$src$$constant;
5966     if (con == NULL) {
5967       __ movl($mem$$Address, (int32_t)0);
5968     } else {
5969       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5970     }
5971   %}
5972   ins_pipe(ialu_mem_imm);
5973 %}
5974 
5975 instruct storeImmNKlass(memory mem, immNKlass src)
5976 %{
5977   match(Set mem (StoreNKlass mem src));
5978 
5979   ins_cost(150); // XXX
5980   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5981   ins_encode %{
5982     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5983   %}
5984   ins_pipe(ialu_mem_imm);
5985 %}
5986 
5987 // Store Integer Immediate
5988 instruct storeImmI0(memory mem, immI0 zero)
5989 %{
5990   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5991   match(Set mem (StoreI mem zero));
5992 
5993   ins_cost(125); // XXX
5994   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
5995   ins_encode %{
5996     __ movl($mem$$Address, r12);
5997   %}
5998   ins_pipe(ialu_mem_reg);
5999 %}
6000 
6001 instruct storeImmI(memory mem, immI src)
6002 %{
6003   match(Set mem (StoreI mem src));
6004 
6005   ins_cost(150);
6006   format %{ "movl    $mem, $src\t# int" %}
6007   opcode(0xC7); /* C7 /0 */
6008   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6009   ins_pipe(ialu_mem_imm);
6010 %}
6011 
6012 // Store Long Immediate
6013 instruct storeImmL0(memory mem, immL0 zero)
6014 %{
6015   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6016   match(Set mem (StoreL mem zero));
6017 
6018   ins_cost(125); // XXX
6019   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6020   ins_encode %{
6021     __ movq($mem$$Address, r12);
6022   %}
6023   ins_pipe(ialu_mem_reg);
6024 %}
6025 
6026 instruct storeImmL(memory mem, immL32 src)
6027 %{
6028   match(Set mem (StoreL mem src));
6029 
6030   ins_cost(150);
6031   format %{ "movq    $mem, $src\t# long" %}
6032   opcode(0xC7); /* C7 /0 */
6033   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6034   ins_pipe(ialu_mem_imm);
6035 %}
6036 
6037 // Store Short/Char Immediate
6038 instruct storeImmC0(memory mem, immI0 zero)
6039 %{
6040   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6041   match(Set mem (StoreC mem zero));
6042 
6043   ins_cost(125); // XXX
6044   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6045   ins_encode %{
6046     __ movw($mem$$Address, r12);
6047   %}
6048   ins_pipe(ialu_mem_reg);
6049 %}
6050 
6051 instruct storeImmI16(memory mem, immI16 src)
6052 %{
6053   predicate(UseStoreImmI16);
6054   match(Set mem (StoreC mem src));
6055 
6056   ins_cost(150);
6057   format %{ "movw    $mem, $src\t# short/char" %}
6058   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6059   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6060   ins_pipe(ialu_mem_imm);
6061 %}
6062 
6063 // Store Byte Immediate
6064 instruct storeImmB0(memory mem, immI0 zero)
6065 %{
6066   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6067   match(Set mem (StoreB mem zero));
6068 
6069   ins_cost(125); // XXX
6070   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6071   ins_encode %{
6072     __ movb($mem$$Address, r12);
6073   %}
6074   ins_pipe(ialu_mem_reg);
6075 %}
6076 
6077 instruct storeImmB(memory mem, immI8 src)
6078 %{
6079   match(Set mem (StoreB mem src));
6080 
6081   ins_cost(150); // XXX
6082   format %{ "movb    $mem, $src\t# byte" %}
6083   opcode(0xC6); /* C6 /0 */
6084   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6085   ins_pipe(ialu_mem_imm);
6086 %}
6087 
6088 // Store CMS card-mark Immediate
6089 instruct storeImmCM0_reg(memory mem, immI0 zero)
6090 %{
6091   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6092   match(Set mem (StoreCM mem zero));
6093 
6094   ins_cost(125); // XXX
6095   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6096   ins_encode %{
6097     __ movb($mem$$Address, r12);
6098   %}
6099   ins_pipe(ialu_mem_reg);
6100 %}
6101 
6102 instruct storeImmCM0(memory mem, immI0 src)
6103 %{
6104   match(Set mem (StoreCM mem src));
6105 
6106   ins_cost(150); // XXX
6107   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6108   opcode(0xC6); /* C6 /0 */
6109   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6110   ins_pipe(ialu_mem_imm);
6111 %}
6112 
6113 // Store Float
6114 instruct storeF(memory mem, regF src)
6115 %{
6116   match(Set mem (StoreF mem src));
6117 
6118   ins_cost(95); // XXX
6119   format %{ "movss   $mem, $src\t# float" %}
6120   ins_encode %{
6121     __ movflt($mem$$Address, $src$$XMMRegister);
6122   %}
6123   ins_pipe(pipe_slow); // XXX
6124 %}
6125 
6126 // Store immediate Float value (it is faster than store from XMM register)
6127 instruct storeF0(memory mem, immF0 zero)
6128 %{
6129   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6130   match(Set mem (StoreF mem zero));
6131 
6132   ins_cost(25); // XXX
6133   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6134   ins_encode %{
6135     __ movl($mem$$Address, r12);
6136   %}
6137   ins_pipe(ialu_mem_reg);
6138 %}
6139 
6140 instruct storeF_imm(memory mem, immF src)
6141 %{
6142   match(Set mem (StoreF mem src));
6143 
6144   ins_cost(50);
6145   format %{ "movl    $mem, $src\t# float" %}
6146   opcode(0xC7); /* C7 /0 */
6147   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6148   ins_pipe(ialu_mem_imm);
6149 %}
6150 
6151 // Store Double
6152 instruct storeD(memory mem, regD src)
6153 %{
6154   match(Set mem (StoreD mem src));
6155 
6156   ins_cost(95); // XXX
6157   format %{ "movsd   $mem, $src\t# double" %}
6158   ins_encode %{
6159     __ movdbl($mem$$Address, $src$$XMMRegister);
6160   %}
6161   ins_pipe(pipe_slow); // XXX
6162 %}
6163 
6164 // Store immediate double 0.0 (it is faster than store from XMM register)
6165 instruct storeD0_imm(memory mem, immD0 src)
6166 %{
6167   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6168   match(Set mem (StoreD mem src));
6169 
6170   ins_cost(50);
6171   format %{ "movq    $mem, $src\t# double 0." %}
6172   opcode(0xC7); /* C7 /0 */
6173   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6174   ins_pipe(ialu_mem_imm);
6175 %}
6176 
6177 instruct storeD0(memory mem, immD0 zero)
6178 %{
6179   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6180   match(Set mem (StoreD mem zero));
6181 
6182   ins_cost(25); // XXX
6183   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6184   ins_encode %{
6185     __ movq($mem$$Address, r12);
6186   %}
6187   ins_pipe(ialu_mem_reg);
6188 %}
6189 
6190 instruct storeSSI(stackSlotI dst, rRegI src)
6191 %{
6192   match(Set dst src);
6193 
6194   ins_cost(100);
6195   format %{ "movl    $dst, $src\t# int stk" %}
6196   opcode(0x89);
6197   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6198   ins_pipe( ialu_mem_reg );
6199 %}
6200 
6201 instruct storeSSL(stackSlotL dst, rRegL src)
6202 %{
6203   match(Set dst src);
6204 
6205   ins_cost(100);
6206   format %{ "movq    $dst, $src\t# long stk" %}
6207   opcode(0x89);
6208   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6209   ins_pipe(ialu_mem_reg);
6210 %}
6211 
6212 instruct storeSSP(stackSlotP dst, rRegP src)
6213 %{
6214   match(Set dst src);
6215 
6216   ins_cost(100);
6217   format %{ "movq    $dst, $src\t# ptr stk" %}
6218   opcode(0x89);
6219   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6220   ins_pipe(ialu_mem_reg);
6221 %}
6222 
6223 instruct storeSSF(stackSlotF dst, regF src)
6224 %{
6225   match(Set dst src);
6226 
6227   ins_cost(95); // XXX
6228   format %{ "movss   $dst, $src\t# float stk" %}
6229   ins_encode %{
6230     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6231   %}
6232   ins_pipe(pipe_slow); // XXX
6233 %}
6234 
6235 instruct storeSSD(stackSlotD dst, regD src)
6236 %{
6237   match(Set dst src);
6238 
6239   ins_cost(95); // XXX
6240   format %{ "movsd   $dst, $src\t# double stk" %}
6241   ins_encode %{
6242     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6243   %}
6244   ins_pipe(pipe_slow); // XXX
6245 %}
6246 
6247 //----------BSWAP Instructions-------------------------------------------------
6248 instruct bytes_reverse_int(rRegI dst) %{
6249   match(Set dst (ReverseBytesI dst));
6250 
6251   format %{ "bswapl  $dst" %}
6252   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6253   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6254   ins_pipe( ialu_reg );
6255 %}
6256 
6257 instruct bytes_reverse_long(rRegL dst) %{
6258   match(Set dst (ReverseBytesL dst));
6259 
6260   format %{ "bswapq  $dst" %}
6261   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6262   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6263   ins_pipe( ialu_reg);
6264 %}
6265 
6266 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6267   match(Set dst (ReverseBytesUS dst));
6268   effect(KILL cr);
6269 
6270   format %{ "bswapl  $dst\n\t"
6271             "shrl    $dst,16\n\t" %}
6272   ins_encode %{
6273     __ bswapl($dst$$Register);
6274     __ shrl($dst$$Register, 16);
6275   %}
6276   ins_pipe( ialu_reg );
6277 %}
6278 
6279 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6280   match(Set dst (ReverseBytesS dst));
6281   effect(KILL cr);
6282 
6283   format %{ "bswapl  $dst\n\t"
6284             "sar     $dst,16\n\t" %}
6285   ins_encode %{
6286     __ bswapl($dst$$Register);
6287     __ sarl($dst$$Register, 16);
6288   %}
6289   ins_pipe( ialu_reg );
6290 %}
6291 
6292 //---------- Zeros Count Instructions ------------------------------------------
6293 
6294 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6295   predicate(UseCountLeadingZerosInstruction);
6296   match(Set dst (CountLeadingZerosI src));
6297   effect(KILL cr);
6298 
6299   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6300   ins_encode %{
6301     __ lzcntl($dst$$Register, $src$$Register);
6302   %}
6303   ins_pipe(ialu_reg);
6304 %}
6305 
6306 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6307   predicate(!UseCountLeadingZerosInstruction);
6308   match(Set dst (CountLeadingZerosI src));
6309   effect(KILL cr);
6310 
6311   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6312             "jnz     skip\n\t"
6313             "movl    $dst, -1\n"
6314       "skip:\n\t"
6315             "negl    $dst\n\t"
6316             "addl    $dst, 31" %}
6317   ins_encode %{
6318     Register Rdst = $dst$$Register;
6319     Register Rsrc = $src$$Register;
6320     Label skip;
6321     __ bsrl(Rdst, Rsrc);
6322     __ jccb(Assembler::notZero, skip);
6323     __ movl(Rdst, -1);
6324     __ bind(skip);
6325     __ negl(Rdst);
6326     __ addl(Rdst, BitsPerInt - 1);
6327   %}
6328   ins_pipe(ialu_reg);
6329 %}
6330 
6331 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6332   predicate(UseCountLeadingZerosInstruction);
6333   match(Set dst (CountLeadingZerosL src));
6334   effect(KILL cr);
6335 
6336   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6337   ins_encode %{
6338     __ lzcntq($dst$$Register, $src$$Register);
6339   %}
6340   ins_pipe(ialu_reg);
6341 %}
6342 
6343 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6344   predicate(!UseCountLeadingZerosInstruction);
6345   match(Set dst (CountLeadingZerosL src));
6346   effect(KILL cr);
6347 
6348   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6349             "jnz     skip\n\t"
6350             "movl    $dst, -1\n"
6351       "skip:\n\t"
6352             "negl    $dst\n\t"
6353             "addl    $dst, 63" %}
6354   ins_encode %{
6355     Register Rdst = $dst$$Register;
6356     Register Rsrc = $src$$Register;
6357     Label skip;
6358     __ bsrq(Rdst, Rsrc);
6359     __ jccb(Assembler::notZero, skip);
6360     __ movl(Rdst, -1);
6361     __ bind(skip);
6362     __ negl(Rdst);
6363     __ addl(Rdst, BitsPerLong - 1);
6364   %}
6365   ins_pipe(ialu_reg);
6366 %}
6367 
6368 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6369   match(Set dst (CountTrailingZerosI src));
6370   effect(KILL cr);
6371 
6372   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6373             "jnz     done\n\t"
6374             "movl    $dst, 32\n"
6375       "done:" %}
6376   ins_encode %{
6377     Register Rdst = $dst$$Register;
6378     Label done;
6379     __ bsfl(Rdst, $src$$Register);
6380     __ jccb(Assembler::notZero, done);
6381     __ movl(Rdst, BitsPerInt);
6382     __ bind(done);
6383   %}
6384   ins_pipe(ialu_reg);
6385 %}
6386 
6387 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6388   match(Set dst (CountTrailingZerosL src));
6389   effect(KILL cr);
6390 
6391   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6392             "jnz     done\n\t"
6393             "movl    $dst, 64\n"
6394       "done:" %}
6395   ins_encode %{
6396     Register Rdst = $dst$$Register;
6397     Label done;
6398     __ bsfq(Rdst, $src$$Register);
6399     __ jccb(Assembler::notZero, done);
6400     __ movl(Rdst, BitsPerLong);
6401     __ bind(done);
6402   %}
6403   ins_pipe(ialu_reg);
6404 %}
6405 
6406 
6407 //---------- Population Count Instructions -------------------------------------
6408 
6409 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6410   predicate(UsePopCountInstruction);
6411   match(Set dst (PopCountI src));
6412   effect(KILL cr);
6413 
6414   format %{ "popcnt  $dst, $src" %}
6415   ins_encode %{
6416     __ popcntl($dst$$Register, $src$$Register);
6417   %}
6418   ins_pipe(ialu_reg);
6419 %}
6420 
6421 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6422   predicate(UsePopCountInstruction);
6423   match(Set dst (PopCountI (LoadI mem)));
6424   effect(KILL cr);
6425 
6426   format %{ "popcnt  $dst, $mem" %}
6427   ins_encode %{
6428     __ popcntl($dst$$Register, $mem$$Address);
6429   %}
6430   ins_pipe(ialu_reg);
6431 %}
6432 
6433 // Note: Long.bitCount(long) returns an int.
6434 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6435   predicate(UsePopCountInstruction);
6436   match(Set dst (PopCountL src));
6437   effect(KILL cr);
6438 
6439   format %{ "popcnt  $dst, $src" %}
6440   ins_encode %{
6441     __ popcntq($dst$$Register, $src$$Register);
6442   %}
6443   ins_pipe(ialu_reg);
6444 %}
6445 
6446 // Note: Long.bitCount(long) returns an int.
6447 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6448   predicate(UsePopCountInstruction);
6449   match(Set dst (PopCountL (LoadL mem)));
6450   effect(KILL cr);
6451 
6452   format %{ "popcnt  $dst, $mem" %}
6453   ins_encode %{
6454     __ popcntq($dst$$Register, $mem$$Address);
6455   %}
6456   ins_pipe(ialu_reg);
6457 %}
6458 
6459 
6460 //----------MemBar Instructions-----------------------------------------------
6461 // Memory barrier flavors
6462 
6463 instruct membar_acquire()
6464 %{
6465   match(MemBarAcquire);
6466   ins_cost(0);
6467 
6468   size(0);
6469   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6470   ins_encode();
6471   ins_pipe(empty);
6472 %}
6473 
6474 instruct membar_acquire_lock()
6475 %{
6476   match(MemBarAcquireLock);
6477   ins_cost(0);
6478 
6479   size(0);
6480   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6481   ins_encode();
6482   ins_pipe(empty);
6483 %}
6484 
6485 instruct membar_release()
6486 %{
6487   match(MemBarRelease);
6488   ins_cost(0);
6489 
6490   size(0);
6491   format %{ "MEMBAR-release ! (empty encoding)" %}
6492   ins_encode();
6493   ins_pipe(empty);
6494 %}
6495 
6496 instruct membar_release_lock()
6497 %{
6498   match(MemBarReleaseLock);
6499   ins_cost(0);
6500 
6501   size(0);
6502   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6503   ins_encode();
6504   ins_pipe(empty);
6505 %}
6506 
6507 instruct membar_volatile(rFlagsReg cr) %{
6508   match(MemBarVolatile);
6509   effect(KILL cr);
6510   ins_cost(400);
6511 
6512   format %{
6513     $$template
6514     if (os::is_MP()) {
6515       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6516     } else {
6517       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6518     }
6519   %}
6520   ins_encode %{
6521     __ membar(Assembler::StoreLoad);
6522   %}
6523   ins_pipe(pipe_slow);
6524 %}
6525 
6526 instruct unnecessary_membar_volatile()
6527 %{
6528   match(MemBarVolatile);
6529   predicate(Matcher::post_store_load_barrier(n));
6530   ins_cost(0);
6531 
6532   size(0);
6533   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6534   ins_encode();
6535   ins_pipe(empty);
6536 %}
6537 
6538 instruct membar_storestore() %{
6539   match(MemBarStoreStore);
6540   ins_cost(0);
6541 
6542   size(0);
6543   format %{ "MEMBAR-storestore (empty encoding)" %}
6544   ins_encode( );
6545   ins_pipe(empty);
6546 %}
6547 
6548 //----------Move Instructions--------------------------------------------------
6549 
6550 instruct castX2P(rRegP dst, rRegL src)
6551 %{
6552   match(Set dst (CastX2P src));
6553 
6554   format %{ "movq    $dst, $src\t# long->ptr" %}
6555   ins_encode %{
6556     if ($dst$$reg != $src$$reg) {
6557       __ movptr($dst$$Register, $src$$Register);
6558     }
6559   %}
6560   ins_pipe(ialu_reg_reg); // XXX
6561 %}
6562 
6563 instruct castP2X(rRegL dst, rRegP src)
6564 %{
6565   match(Set dst (CastP2X src));
6566 
6567   format %{ "movq    $dst, $src\t# ptr -> long" %}
6568   ins_encode %{
6569     if ($dst$$reg != $src$$reg) {
6570       __ movptr($dst$$Register, $src$$Register);
6571     }
6572   %}
6573   ins_pipe(ialu_reg_reg); // XXX
6574 %}
6575 
6576 // Convert oop into int for vectors alignment masking
6577 instruct convP2I(rRegI dst, rRegP src)
6578 %{
6579   match(Set dst (ConvL2I (CastP2X src)));
6580 
6581   format %{ "movl    $dst, $src\t# ptr -> int" %}
6582   ins_encode %{
6583     __ movl($dst$$Register, $src$$Register);
6584   %}
6585   ins_pipe(ialu_reg_reg); // XXX
6586 %}
6587 
6588 // Convert compressed oop into int for vectors alignment masking
6589 // in case of 32bit oops (heap < 4Gb).
6590 instruct convN2I(rRegI dst, rRegN src)
6591 %{
6592   predicate(Universe::narrow_oop_shift() == 0);
6593   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6594 
6595   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6596   ins_encode %{
6597     __ movl($dst$$Register, $src$$Register);
6598   %}
6599   ins_pipe(ialu_reg_reg); // XXX
6600 %}
6601 
6602 // Convert oop pointer into compressed form
6603 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6604   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6605   match(Set dst (EncodeP src));
6606   effect(KILL cr);
6607   format %{ "encode_heap_oop $dst,$src" %}
6608   ins_encode %{
6609     Register s = $src$$Register;
6610     Register d = $dst$$Register;
6611     if (s != d) {
6612       __ movq(d, s);
6613     }
6614     __ encode_heap_oop(d);
6615   %}
6616   ins_pipe(ialu_reg_long);
6617 %}
6618 
6619 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6620   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6621   match(Set dst (EncodeP src));
6622   effect(KILL cr);
6623   format %{ "encode_heap_oop_not_null $dst,$src" %}
6624   ins_encode %{
6625     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6626   %}
6627   ins_pipe(ialu_reg_long);
6628 %}
6629 
6630 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6631   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6632             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6633   match(Set dst (DecodeN src));
6634   effect(KILL cr);
6635   format %{ "decode_heap_oop $dst,$src" %}
6636   ins_encode %{
6637     Register s = $src$$Register;
6638     Register d = $dst$$Register;
6639     if (s != d) {
6640       __ movq(d, s);
6641     }
6642     __ decode_heap_oop(d);
6643   %}
6644   ins_pipe(ialu_reg_long);
6645 %}
6646 
6647 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6648   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6649             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6650   match(Set dst (DecodeN src));
6651   effect(KILL cr);
6652   format %{ "decode_heap_oop_not_null $dst,$src" %}
6653   ins_encode %{
6654     Register s = $src$$Register;
6655     Register d = $dst$$Register;
6656     if (s != d) {
6657       __ decode_heap_oop_not_null(d, s);
6658     } else {
6659       __ decode_heap_oop_not_null(d);
6660     }
6661   %}
6662   ins_pipe(ialu_reg_long);
6663 %}
6664 
6665 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6666   match(Set dst (EncodePKlass src));
6667   effect(KILL cr);
6668   format %{ "encode_heap_oop_not_null $dst,$src" %}
6669   ins_encode %{
6670     __ encode_klass_not_null($dst$$Register, $src$$Register);
6671   %}
6672   ins_pipe(ialu_reg_long);
6673 %}
6674 
6675 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6676   match(Set dst (DecodeNKlass src));
6677   effect(KILL cr);
6678   format %{ "decode_heap_oop_not_null $dst,$src" %}
6679   ins_encode %{
6680     Register s = $src$$Register;
6681     Register d = $dst$$Register;
6682     if (s != d) {
6683       __ decode_klass_not_null(d, s);
6684     } else {
6685       __ decode_klass_not_null(d);
6686     }
6687   %}
6688   ins_pipe(ialu_reg_long);
6689 %}
6690 
6691 
6692 //----------Conditional Move---------------------------------------------------
6693 // Jump
6694 // dummy instruction for generating temp registers
6695 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6696   match(Jump (LShiftL switch_val shift));
6697   ins_cost(350);
6698   predicate(false);
6699   effect(TEMP dest);
6700 
6701   format %{ "leaq    $dest, [$constantaddress]\n\t"
6702             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6703   ins_encode %{
6704     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6705     // to do that and the compiler is using that register as one it can allocate.
6706     // So we build it all by hand.
6707     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6708     // ArrayAddress dispatch(table, index);
6709     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6710     __ lea($dest$$Register, $constantaddress);
6711     __ jmp(dispatch);
6712   %}
6713   ins_pipe(pipe_jmp);
6714 %}
6715 
6716 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6717   match(Jump (AddL (LShiftL switch_val shift) offset));
6718   ins_cost(350);
6719   effect(TEMP dest);
6720 
6721   format %{ "leaq    $dest, [$constantaddress]\n\t"
6722             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6723   ins_encode %{
6724     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6725     // to do that and the compiler is using that register as one it can allocate.
6726     // So we build it all by hand.
6727     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6728     // ArrayAddress dispatch(table, index);
6729     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6730     __ lea($dest$$Register, $constantaddress);
6731     __ jmp(dispatch);
6732   %}
6733   ins_pipe(pipe_jmp);
6734 %}
6735 
6736 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6737   match(Jump switch_val);
6738   ins_cost(350);
6739   effect(TEMP dest);
6740 
6741   format %{ "leaq    $dest, [$constantaddress]\n\t"
6742             "jmp     [$dest + $switch_val]\n\t" %}
6743   ins_encode %{
6744     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6745     // to do that and the compiler is using that register as one it can allocate.
6746     // So we build it all by hand.
6747     // Address index(noreg, switch_reg, Address::times_1);
6748     // ArrayAddress dispatch(table, index);
6749     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6750     __ lea($dest$$Register, $constantaddress);
6751     __ jmp(dispatch);
6752   %}
6753   ins_pipe(pipe_jmp);
6754 %}
6755 
6756 // Conditional move
6757 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6758 %{
6759   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6760 
6761   ins_cost(200); // XXX
6762   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6763   opcode(0x0F, 0x40);
6764   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6765   ins_pipe(pipe_cmov_reg);
6766 %}
6767 
6768 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6769   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6770 
6771   ins_cost(200); // XXX
6772   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6773   opcode(0x0F, 0x40);
6774   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6775   ins_pipe(pipe_cmov_reg);
6776 %}
6777 
6778 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6779   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6780   ins_cost(200);
6781   expand %{
6782     cmovI_regU(cop, cr, dst, src);
6783   %}
6784 %}
6785 
6786 // Conditional move
6787 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6788   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6789 
6790   ins_cost(250); // XXX
6791   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6792   opcode(0x0F, 0x40);
6793   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6794   ins_pipe(pipe_cmov_mem);
6795 %}
6796 
6797 // Conditional move
6798 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6799 %{
6800   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6801 
6802   ins_cost(250); // XXX
6803   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6804   opcode(0x0F, 0x40);
6805   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6806   ins_pipe(pipe_cmov_mem);
6807 %}
6808 
6809 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6810   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6811   ins_cost(250);
6812   expand %{
6813     cmovI_memU(cop, cr, dst, src);
6814   %}
6815 %}
6816 
6817 // Conditional move
6818 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6819 %{
6820   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6821 
6822   ins_cost(200); // XXX
6823   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6824   opcode(0x0F, 0x40);
6825   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6826   ins_pipe(pipe_cmov_reg);
6827 %}
6828 
6829 // Conditional move
6830 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6831 %{
6832   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6833 
6834   ins_cost(200); // XXX
6835   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6836   opcode(0x0F, 0x40);
6837   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6838   ins_pipe(pipe_cmov_reg);
6839 %}
6840 
6841 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6842   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6843   ins_cost(200);
6844   expand %{
6845     cmovN_regU(cop, cr, dst, src);
6846   %}
6847 %}
6848 
6849 // Conditional move
6850 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6851 %{
6852   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6853 
6854   ins_cost(200); // XXX
6855   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6856   opcode(0x0F, 0x40);
6857   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6858   ins_pipe(pipe_cmov_reg);  // XXX
6859 %}
6860 
6861 // Conditional move
6862 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6863 %{
6864   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6865 
6866   ins_cost(200); // XXX
6867   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6868   opcode(0x0F, 0x40);
6869   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6870   ins_pipe(pipe_cmov_reg); // XXX
6871 %}
6872 
6873 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6874   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6875   ins_cost(200);
6876   expand %{
6877     cmovP_regU(cop, cr, dst, src);
6878   %}
6879 %}
6880 
6881 // DISABLED: Requires the ADLC to emit a bottom_type call that
6882 // correctly meets the two pointer arguments; one is an incoming
6883 // register but the other is a memory operand.  ALSO appears to
6884 // be buggy with implicit null checks.
6885 //
6886 //// Conditional move
6887 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6888 //%{
6889 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6890 //  ins_cost(250);
6891 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6892 //  opcode(0x0F,0x40);
6893 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6894 //  ins_pipe( pipe_cmov_mem );
6895 //%}
6896 //
6897 //// Conditional move
6898 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6899 //%{
6900 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6901 //  ins_cost(250);
6902 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6903 //  opcode(0x0F,0x40);
6904 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6905 //  ins_pipe( pipe_cmov_mem );
6906 //%}
6907 
6908 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6909 %{
6910   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6911 
6912   ins_cost(200); // XXX
6913   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6914   opcode(0x0F, 0x40);
6915   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6916   ins_pipe(pipe_cmov_reg);  // XXX
6917 %}
6918 
6919 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6920 %{
6921   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6922 
6923   ins_cost(200); // XXX
6924   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6925   opcode(0x0F, 0x40);
6926   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6927   ins_pipe(pipe_cmov_mem);  // XXX
6928 %}
6929 
6930 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6931 %{
6932   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6933 
6934   ins_cost(200); // XXX
6935   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6936   opcode(0x0F, 0x40);
6937   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6938   ins_pipe(pipe_cmov_reg); // XXX
6939 %}
6940 
6941 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6942   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6943   ins_cost(200);
6944   expand %{
6945     cmovL_regU(cop, cr, dst, src);
6946   %}
6947 %}
6948 
6949 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6950 %{
6951   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6952 
6953   ins_cost(200); // XXX
6954   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6955   opcode(0x0F, 0x40);
6956   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6957   ins_pipe(pipe_cmov_mem); // XXX
6958 %}
6959 
6960 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6961   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6962   ins_cost(200);
6963   expand %{
6964     cmovL_memU(cop, cr, dst, src);
6965   %}
6966 %}
6967 
6968 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6969 %{
6970   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6971 
6972   ins_cost(200); // XXX
6973   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6974             "movss     $dst, $src\n"
6975     "skip:" %}
6976   ins_encode %{
6977     Label Lskip;
6978     // Invert sense of branch from sense of CMOV
6979     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6980     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6981     __ bind(Lskip);
6982   %}
6983   ins_pipe(pipe_slow);
6984 %}
6985 
6986 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
6987 // %{
6988 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
6989 
6990 //   ins_cost(200); // XXX
6991 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6992 //             "movss     $dst, $src\n"
6993 //     "skip:" %}
6994 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
6995 //   ins_pipe(pipe_slow);
6996 // %}
6997 
6998 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
6999 %{
7000   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7001 
7002   ins_cost(200); // XXX
7003   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7004             "movss     $dst, $src\n"
7005     "skip:" %}
7006   ins_encode %{
7007     Label Lskip;
7008     // Invert sense of branch from sense of CMOV
7009     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7010     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7011     __ bind(Lskip);
7012   %}
7013   ins_pipe(pipe_slow);
7014 %}
7015 
7016 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7017   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7018   ins_cost(200);
7019   expand %{
7020     cmovF_regU(cop, cr, dst, src);
7021   %}
7022 %}
7023 
7024 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7025 %{
7026   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7027 
7028   ins_cost(200); // XXX
7029   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7030             "movsd     $dst, $src\n"
7031     "skip:" %}
7032   ins_encode %{
7033     Label Lskip;
7034     // Invert sense of branch from sense of CMOV
7035     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7036     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7037     __ bind(Lskip);
7038   %}
7039   ins_pipe(pipe_slow);
7040 %}
7041 
7042 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7043 %{
7044   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7045 
7046   ins_cost(200); // XXX
7047   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7048             "movsd     $dst, $src\n"
7049     "skip:" %}
7050   ins_encode %{
7051     Label Lskip;
7052     // Invert sense of branch from sense of CMOV
7053     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7054     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7055     __ bind(Lskip);
7056   %}
7057   ins_pipe(pipe_slow);
7058 %}
7059 
7060 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7061   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7062   ins_cost(200);
7063   expand %{
7064     cmovD_regU(cop, cr, dst, src);
7065   %}
7066 %}
7067 
7068 //----------Arithmetic Instructions--------------------------------------------
7069 //----------Addition Instructions----------------------------------------------
7070 
7071 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7072 %{
7073   match(Set dst (AddI dst src));
7074   effect(KILL cr);
7075 
7076   format %{ "addl    $dst, $src\t# int" %}
7077   opcode(0x03);
7078   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7079   ins_pipe(ialu_reg_reg);
7080 %}
7081 
7082 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7083 %{
7084   match(Set dst (AddI dst src));
7085   effect(KILL cr);
7086 
7087   format %{ "addl    $dst, $src\t# int" %}
7088   opcode(0x81, 0x00); /* /0 id */
7089   ins_encode(OpcSErm(dst, src), Con8or32(src));
7090   ins_pipe( ialu_reg );
7091 %}
7092 
7093 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7094 %{
7095   match(Set dst (AddI dst (LoadI src)));
7096   effect(KILL cr);
7097 
7098   ins_cost(125); // XXX
7099   format %{ "addl    $dst, $src\t# int" %}
7100   opcode(0x03);
7101   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7102   ins_pipe(ialu_reg_mem);
7103 %}
7104 
7105 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7106 %{
7107   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7108   effect(KILL cr);
7109 
7110   ins_cost(150); // XXX
7111   format %{ "addl    $dst, $src\t# int" %}
7112   opcode(0x01); /* Opcode 01 /r */
7113   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7114   ins_pipe(ialu_mem_reg);
7115 %}
7116 
7117 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7118 %{
7119   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7120   effect(KILL cr);
7121 
7122   ins_cost(125); // XXX
7123   format %{ "addl    $dst, $src\t# int" %}
7124   opcode(0x81); /* Opcode 81 /0 id */
7125   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7126   ins_pipe(ialu_mem_imm);
7127 %}
7128 
7129 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7130 %{
7131   predicate(UseIncDec);
7132   match(Set dst (AddI dst src));
7133   effect(KILL cr);
7134 
7135   format %{ "incl    $dst\t# int" %}
7136   opcode(0xFF, 0x00); // FF /0
7137   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7138   ins_pipe(ialu_reg);
7139 %}
7140 
7141 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7142 %{
7143   predicate(UseIncDec);
7144   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7145   effect(KILL cr);
7146 
7147   ins_cost(125); // XXX
7148   format %{ "incl    $dst\t# int" %}
7149   opcode(0xFF); /* Opcode FF /0 */
7150   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7151   ins_pipe(ialu_mem_imm);
7152 %}
7153 
7154 // XXX why does that use AddI
7155 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7156 %{
7157   predicate(UseIncDec);
7158   match(Set dst (AddI dst src));
7159   effect(KILL cr);
7160 
7161   format %{ "decl    $dst\t# int" %}
7162   opcode(0xFF, 0x01); // FF /1
7163   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7164   ins_pipe(ialu_reg);
7165 %}
7166 
7167 // XXX why does that use AddI
7168 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7169 %{
7170   predicate(UseIncDec);
7171   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7172   effect(KILL cr);
7173 
7174   ins_cost(125); // XXX
7175   format %{ "decl    $dst\t# int" %}
7176   opcode(0xFF); /* Opcode FF /1 */
7177   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7178   ins_pipe(ialu_mem_imm);
7179 %}
7180 
7181 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7182 %{
7183   match(Set dst (AddI src0 src1));
7184 
7185   ins_cost(110);
7186   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7187   opcode(0x8D); /* 0x8D /r */
7188   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7189   ins_pipe(ialu_reg_reg);
7190 %}
7191 
7192 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7193 %{
7194   match(Set dst (AddL dst src));
7195   effect(KILL cr);
7196 
7197   format %{ "addq    $dst, $src\t# long" %}
7198   opcode(0x03);
7199   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7200   ins_pipe(ialu_reg_reg);
7201 %}
7202 
7203 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7204 %{
7205   match(Set dst (AddL dst src));
7206   effect(KILL cr);
7207 
7208   format %{ "addq    $dst, $src\t# long" %}
7209   opcode(0x81, 0x00); /* /0 id */
7210   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7211   ins_pipe( ialu_reg );
7212 %}
7213 
7214 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7215 %{
7216   match(Set dst (AddL dst (LoadL src)));
7217   effect(KILL cr);
7218 
7219   ins_cost(125); // XXX
7220   format %{ "addq    $dst, $src\t# long" %}
7221   opcode(0x03);
7222   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7223   ins_pipe(ialu_reg_mem);
7224 %}
7225 
7226 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7227 %{
7228   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7229   effect(KILL cr);
7230 
7231   ins_cost(150); // XXX
7232   format %{ "addq    $dst, $src\t# long" %}
7233   opcode(0x01); /* Opcode 01 /r */
7234   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7235   ins_pipe(ialu_mem_reg);
7236 %}
7237 
7238 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7239 %{
7240   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7241   effect(KILL cr);
7242 
7243   ins_cost(125); // XXX
7244   format %{ "addq    $dst, $src\t# long" %}
7245   opcode(0x81); /* Opcode 81 /0 id */
7246   ins_encode(REX_mem_wide(dst),
7247              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7248   ins_pipe(ialu_mem_imm);
7249 %}
7250 
7251 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7252 %{
7253   predicate(UseIncDec);
7254   match(Set dst (AddL dst src));
7255   effect(KILL cr);
7256 
7257   format %{ "incq    $dst\t# long" %}
7258   opcode(0xFF, 0x00); // FF /0
7259   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7260   ins_pipe(ialu_reg);
7261 %}
7262 
7263 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7264 %{
7265   predicate(UseIncDec);
7266   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7267   effect(KILL cr);
7268 
7269   ins_cost(125); // XXX
7270   format %{ "incq    $dst\t# long" %}
7271   opcode(0xFF); /* Opcode FF /0 */
7272   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7273   ins_pipe(ialu_mem_imm);
7274 %}
7275 
7276 // XXX why does that use AddL
7277 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7278 %{
7279   predicate(UseIncDec);
7280   match(Set dst (AddL dst src));
7281   effect(KILL cr);
7282 
7283   format %{ "decq    $dst\t# long" %}
7284   opcode(0xFF, 0x01); // FF /1
7285   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7286   ins_pipe(ialu_reg);
7287 %}
7288 
7289 // XXX why does that use AddL
7290 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7291 %{
7292   predicate(UseIncDec);
7293   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7294   effect(KILL cr);
7295 
7296   ins_cost(125); // XXX
7297   format %{ "decq    $dst\t# long" %}
7298   opcode(0xFF); /* Opcode FF /1 */
7299   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7300   ins_pipe(ialu_mem_imm);
7301 %}
7302 
7303 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7304 %{
7305   match(Set dst (AddL src0 src1));
7306 
7307   ins_cost(110);
7308   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7309   opcode(0x8D); /* 0x8D /r */
7310   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7311   ins_pipe(ialu_reg_reg);
7312 %}
7313 
7314 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7315 %{
7316   match(Set dst (AddP dst src));
7317   effect(KILL cr);
7318 
7319   format %{ "addq    $dst, $src\t# ptr" %}
7320   opcode(0x03);
7321   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7322   ins_pipe(ialu_reg_reg);
7323 %}
7324 
7325 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7326 %{
7327   match(Set dst (AddP dst src));
7328   effect(KILL cr);
7329 
7330   format %{ "addq    $dst, $src\t# ptr" %}
7331   opcode(0x81, 0x00); /* /0 id */
7332   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7333   ins_pipe( ialu_reg );
7334 %}
7335 
7336 // XXX addP mem ops ????
7337 
7338 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7339 %{
7340   match(Set dst (AddP src0 src1));
7341 
7342   ins_cost(110);
7343   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7344   opcode(0x8D); /* 0x8D /r */
7345   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7346   ins_pipe(ialu_reg_reg);
7347 %}
7348 
7349 instruct checkCastPP(rRegP dst)
7350 %{
7351   match(Set dst (CheckCastPP dst));
7352 
7353   size(0);
7354   format %{ "# checkcastPP of $dst" %}
7355   ins_encode(/* empty encoding */);
7356   ins_pipe(empty);
7357 %}
7358 
7359 instruct castPP(rRegP dst)
7360 %{
7361   match(Set dst (CastPP dst));
7362 
7363   size(0);
7364   format %{ "# castPP of $dst" %}
7365   ins_encode(/* empty encoding */);
7366   ins_pipe(empty);
7367 %}
7368 
7369 instruct castII(rRegI dst)
7370 %{
7371   match(Set dst (CastII dst));
7372 
7373   size(0);
7374   format %{ "# castII of $dst" %}
7375   ins_encode(/* empty encoding */);
7376   ins_cost(0);
7377   ins_pipe(empty);
7378 %}
7379 
7380 // LoadP-locked same as a regular LoadP when used with compare-swap
7381 instruct loadPLocked(rRegP dst, memory mem)
7382 %{
7383   match(Set dst (LoadPLocked mem));
7384 
7385   ins_cost(125); // XXX
7386   format %{ "movq    $dst, $mem\t# ptr locked" %}
7387   opcode(0x8B);
7388   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7389   ins_pipe(ialu_reg_mem); // XXX
7390 %}
7391 
7392 // Conditional-store of the updated heap-top.
7393 // Used during allocation of the shared heap.
7394 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7395 
7396 instruct storePConditional(memory heap_top_ptr,
7397                            rax_RegP oldval, rRegP newval,
7398                            rFlagsReg cr)
7399 %{
7400   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7401 
7402   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7403             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7404   opcode(0x0F, 0xB1);
7405   ins_encode(lock_prefix,
7406              REX_reg_mem_wide(newval, heap_top_ptr),
7407              OpcP, OpcS,
7408              reg_mem(newval, heap_top_ptr));
7409   ins_pipe(pipe_cmpxchg);
7410 %}
7411 
7412 // Conditional-store of an int value.
7413 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7414 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7415 %{
7416   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7417   effect(KILL oldval);
7418 
7419   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7420   opcode(0x0F, 0xB1);
7421   ins_encode(lock_prefix,
7422              REX_reg_mem(newval, mem),
7423              OpcP, OpcS,
7424              reg_mem(newval, mem));
7425   ins_pipe(pipe_cmpxchg);
7426 %}
7427 
7428 // Conditional-store of a long value.
7429 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7430 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7431 %{
7432   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7433   effect(KILL oldval);
7434 
7435   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7436   opcode(0x0F, 0xB1);
7437   ins_encode(lock_prefix,
7438              REX_reg_mem_wide(newval, mem),
7439              OpcP, OpcS,
7440              reg_mem(newval, mem));
7441   ins_pipe(pipe_cmpxchg);
7442 %}
7443 
7444 
7445 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7446 instruct compareAndSwapP(rRegI res,
7447                          memory mem_ptr,
7448                          rax_RegP oldval, rRegP newval,
7449                          rFlagsReg cr)
7450 %{
7451   predicate(VM_Version::supports_cx8());
7452   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7453   effect(KILL cr, KILL oldval);
7454 
7455   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7456             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7457             "sete    $res\n\t"
7458             "movzbl  $res, $res" %}
7459   opcode(0x0F, 0xB1);
7460   ins_encode(lock_prefix,
7461              REX_reg_mem_wide(newval, mem_ptr),
7462              OpcP, OpcS,
7463              reg_mem(newval, mem_ptr),
7464              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7465              REX_reg_breg(res, res), // movzbl
7466              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct compareAndSwapL(rRegI res,
7471                          memory mem_ptr,
7472                          rax_RegL oldval, rRegL newval,
7473                          rFlagsReg cr)
7474 %{
7475   predicate(VM_Version::supports_cx8());
7476   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7477   effect(KILL cr, KILL oldval);
7478 
7479   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7480             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7481             "sete    $res\n\t"
7482             "movzbl  $res, $res" %}
7483   opcode(0x0F, 0xB1);
7484   ins_encode(lock_prefix,
7485              REX_reg_mem_wide(newval, mem_ptr),
7486              OpcP, OpcS,
7487              reg_mem(newval, mem_ptr),
7488              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7489              REX_reg_breg(res, res), // movzbl
7490              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7491   ins_pipe( pipe_cmpxchg );
7492 %}
7493 
7494 instruct compareAndSwapI(rRegI res,
7495                          memory mem_ptr,
7496                          rax_RegI oldval, rRegI newval,
7497                          rFlagsReg cr)
7498 %{
7499   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7500   effect(KILL cr, KILL oldval);
7501 
7502   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7503             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7504             "sete    $res\n\t"
7505             "movzbl  $res, $res" %}
7506   opcode(0x0F, 0xB1);
7507   ins_encode(lock_prefix,
7508              REX_reg_mem(newval, mem_ptr),
7509              OpcP, OpcS,
7510              reg_mem(newval, mem_ptr),
7511              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7512              REX_reg_breg(res, res), // movzbl
7513              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7514   ins_pipe( pipe_cmpxchg );
7515 %}
7516 
7517 
7518 instruct compareAndSwapN(rRegI res,
7519                           memory mem_ptr,
7520                           rax_RegN oldval, rRegN newval,
7521                           rFlagsReg cr) %{
7522   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7523   effect(KILL cr, KILL oldval);
7524 
7525   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7526             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7527             "sete    $res\n\t"
7528             "movzbl  $res, $res" %}
7529   opcode(0x0F, 0xB1);
7530   ins_encode(lock_prefix,
7531              REX_reg_mem(newval, mem_ptr),
7532              OpcP, OpcS,
7533              reg_mem(newval, mem_ptr),
7534              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7535              REX_reg_breg(res, res), // movzbl
7536              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7537   ins_pipe( pipe_cmpxchg );
7538 %}
7539 
7540 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7541   predicate(n->as_LoadStore()->result_not_used());
7542   match(Set dummy (GetAndAddI mem add));
7543   effect(KILL cr);
7544   format %{ "ADDL  [$mem],$add" %}
7545   ins_encode %{
7546     if (os::is_MP()) { __ lock(); }
7547     __ addl($mem$$Address, $add$$constant);
7548   %}
7549   ins_pipe( pipe_cmpxchg );
7550 %}
7551 
7552 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7553   match(Set newval (GetAndAddI mem newval));
7554   effect(KILL cr);
7555   format %{ "XADDL  [$mem],$newval" %}
7556   ins_encode %{
7557     if (os::is_MP()) { __ lock(); }
7558     __ xaddl($mem$$Address, $newval$$Register);
7559   %}
7560   ins_pipe( pipe_cmpxchg );
7561 %}
7562 
7563 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7564   predicate(n->as_LoadStore()->result_not_used());
7565   match(Set dummy (GetAndAddL mem add));
7566   effect(KILL cr);
7567   format %{ "ADDQ  [$mem],$add" %}
7568   ins_encode %{
7569     if (os::is_MP()) { __ lock(); }
7570     __ addq($mem$$Address, $add$$constant);
7571   %}
7572   ins_pipe( pipe_cmpxchg );
7573 %}
7574 
7575 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7576   match(Set newval (GetAndAddL mem newval));
7577   effect(KILL cr);
7578   format %{ "XADDQ  [$mem],$newval" %}
7579   ins_encode %{
7580     if (os::is_MP()) { __ lock(); }
7581     __ xaddq($mem$$Address, $newval$$Register);
7582   %}
7583   ins_pipe( pipe_cmpxchg );
7584 %}
7585 
7586 instruct xchgI( memory mem, rRegI newval) %{
7587   match(Set newval (GetAndSetI mem newval));
7588   format %{ "XCHGL  $newval,[$mem]" %}
7589   ins_encode %{
7590     __ xchgl($newval$$Register, $mem$$Address);
7591   %}
7592   ins_pipe( pipe_cmpxchg );
7593 %}
7594 
7595 instruct xchgL( memory mem, rRegL newval) %{
7596   match(Set newval (GetAndSetL mem newval));
7597   format %{ "XCHGL  $newval,[$mem]" %}
7598   ins_encode %{
7599     __ xchgq($newval$$Register, $mem$$Address);
7600   %}
7601   ins_pipe( pipe_cmpxchg );
7602 %}
7603 
7604 instruct xchgP( memory mem, rRegP newval) %{
7605   match(Set newval (GetAndSetP mem newval));
7606   format %{ "XCHGQ  $newval,[$mem]" %}
7607   ins_encode %{
7608     __ xchgq($newval$$Register, $mem$$Address);
7609   %}
7610   ins_pipe( pipe_cmpxchg );
7611 %}
7612 
7613 instruct xchgN( memory mem, rRegN newval) %{
7614   match(Set newval (GetAndSetN mem newval));
7615   format %{ "XCHGL  $newval,$mem]" %}
7616   ins_encode %{
7617     __ xchgl($newval$$Register, $mem$$Address);
7618   %}
7619   ins_pipe( pipe_cmpxchg );
7620 %}
7621 
7622 //----------Subtraction Instructions-------------------------------------------
7623 
7624 // Integer Subtraction Instructions
7625 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7626 %{
7627   match(Set dst (SubI dst src));
7628   effect(KILL cr);
7629 
7630   format %{ "subl    $dst, $src\t# int" %}
7631   opcode(0x2B);
7632   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7633   ins_pipe(ialu_reg_reg);
7634 %}
7635 
7636 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7637 %{
7638   match(Set dst (SubI dst src));
7639   effect(KILL cr);
7640 
7641   format %{ "subl    $dst, $src\t# int" %}
7642   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7643   ins_encode(OpcSErm(dst, src), Con8or32(src));
7644   ins_pipe(ialu_reg);
7645 %}
7646 
7647 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7648 %{
7649   match(Set dst (SubI dst (LoadI src)));
7650   effect(KILL cr);
7651 
7652   ins_cost(125);
7653   format %{ "subl    $dst, $src\t# int" %}
7654   opcode(0x2B);
7655   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7656   ins_pipe(ialu_reg_mem);
7657 %}
7658 
7659 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7660 %{
7661   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7662   effect(KILL cr);
7663 
7664   ins_cost(150);
7665   format %{ "subl    $dst, $src\t# int" %}
7666   opcode(0x29); /* Opcode 29 /r */
7667   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7668   ins_pipe(ialu_mem_reg);
7669 %}
7670 
7671 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7672 %{
7673   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7674   effect(KILL cr);
7675 
7676   ins_cost(125); // XXX
7677   format %{ "subl    $dst, $src\t# int" %}
7678   opcode(0x81); /* Opcode 81 /5 id */
7679   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7680   ins_pipe(ialu_mem_imm);
7681 %}
7682 
7683 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7684 %{
7685   match(Set dst (SubL dst src));
7686   effect(KILL cr);
7687 
7688   format %{ "subq    $dst, $src\t# long" %}
7689   opcode(0x2B);
7690   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7691   ins_pipe(ialu_reg_reg);
7692 %}
7693 
7694 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7695 %{
7696   match(Set dst (SubL dst src));
7697   effect(KILL cr);
7698 
7699   format %{ "subq    $dst, $src\t# long" %}
7700   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7701   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7702   ins_pipe(ialu_reg);
7703 %}
7704 
7705 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7706 %{
7707   match(Set dst (SubL dst (LoadL src)));
7708   effect(KILL cr);
7709 
7710   ins_cost(125);
7711   format %{ "subq    $dst, $src\t# long" %}
7712   opcode(0x2B);
7713   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7714   ins_pipe(ialu_reg_mem);
7715 %}
7716 
7717 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7718 %{
7719   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7720   effect(KILL cr);
7721 
7722   ins_cost(150);
7723   format %{ "subq    $dst, $src\t# long" %}
7724   opcode(0x29); /* Opcode 29 /r */
7725   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7726   ins_pipe(ialu_mem_reg);
7727 %}
7728 
7729 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7730 %{
7731   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7732   effect(KILL cr);
7733 
7734   ins_cost(125); // XXX
7735   format %{ "subq    $dst, $src\t# long" %}
7736   opcode(0x81); /* Opcode 81 /5 id */
7737   ins_encode(REX_mem_wide(dst),
7738              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7739   ins_pipe(ialu_mem_imm);
7740 %}
7741 
7742 // Subtract from a pointer
7743 // XXX hmpf???
7744 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7745 %{
7746   match(Set dst (AddP dst (SubI zero src)));
7747   effect(KILL cr);
7748 
7749   format %{ "subq    $dst, $src\t# ptr - int" %}
7750   opcode(0x2B);
7751   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7752   ins_pipe(ialu_reg_reg);
7753 %}
7754 
7755 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7756 %{
7757   match(Set dst (SubI zero dst));
7758   effect(KILL cr);
7759 
7760   format %{ "negl    $dst\t# int" %}
7761   opcode(0xF7, 0x03);  // Opcode F7 /3
7762   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7763   ins_pipe(ialu_reg);
7764 %}
7765 
7766 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7767 %{
7768   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7769   effect(KILL cr);
7770 
7771   format %{ "negl    $dst\t# int" %}
7772   opcode(0xF7, 0x03);  // Opcode F7 /3
7773   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7774   ins_pipe(ialu_reg);
7775 %}
7776 
7777 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7778 %{
7779   match(Set dst (SubL zero dst));
7780   effect(KILL cr);
7781 
7782   format %{ "negq    $dst\t# long" %}
7783   opcode(0xF7, 0x03);  // Opcode F7 /3
7784   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7785   ins_pipe(ialu_reg);
7786 %}
7787 
7788 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7789 %{
7790   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7791   effect(KILL cr);
7792 
7793   format %{ "negq    $dst\t# long" %}
7794   opcode(0xF7, 0x03);  // Opcode F7 /3
7795   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7796   ins_pipe(ialu_reg);
7797 %}
7798 
7799 
7800 //----------Multiplication/Division Instructions-------------------------------
7801 // Integer Multiplication Instructions
7802 // Multiply Register
7803 
7804 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7805 %{
7806   match(Set dst (MulI dst src));
7807   effect(KILL cr);
7808 
7809   ins_cost(300);
7810   format %{ "imull   $dst, $src\t# int" %}
7811   opcode(0x0F, 0xAF);
7812   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7813   ins_pipe(ialu_reg_reg_alu0);
7814 %}
7815 
7816 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7817 %{
7818   match(Set dst (MulI src imm));
7819   effect(KILL cr);
7820 
7821   ins_cost(300);
7822   format %{ "imull   $dst, $src, $imm\t# int" %}
7823   opcode(0x69); /* 69 /r id */
7824   ins_encode(REX_reg_reg(dst, src),
7825              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7826   ins_pipe(ialu_reg_reg_alu0);
7827 %}
7828 
7829 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7830 %{
7831   match(Set dst (MulI dst (LoadI src)));
7832   effect(KILL cr);
7833 
7834   ins_cost(350);
7835   format %{ "imull   $dst, $src\t# int" %}
7836   opcode(0x0F, 0xAF);
7837   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7838   ins_pipe(ialu_reg_mem_alu0);
7839 %}
7840 
7841 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7842 %{
7843   match(Set dst (MulI (LoadI src) imm));
7844   effect(KILL cr);
7845 
7846   ins_cost(300);
7847   format %{ "imull   $dst, $src, $imm\t# int" %}
7848   opcode(0x69); /* 69 /r id */
7849   ins_encode(REX_reg_mem(dst, src),
7850              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7851   ins_pipe(ialu_reg_mem_alu0);
7852 %}
7853 
7854 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7855 %{
7856   match(Set dst (MulL dst src));
7857   effect(KILL cr);
7858 
7859   ins_cost(300);
7860   format %{ "imulq   $dst, $src\t# long" %}
7861   opcode(0x0F, 0xAF);
7862   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7863   ins_pipe(ialu_reg_reg_alu0);
7864 %}
7865 
7866 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7867 %{
7868   match(Set dst (MulL src imm));
7869   effect(KILL cr);
7870 
7871   ins_cost(300);
7872   format %{ "imulq   $dst, $src, $imm\t# long" %}
7873   opcode(0x69); /* 69 /r id */
7874   ins_encode(REX_reg_reg_wide(dst, src),
7875              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7876   ins_pipe(ialu_reg_reg_alu0);
7877 %}
7878 
7879 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7880 %{
7881   match(Set dst (MulL dst (LoadL src)));
7882   effect(KILL cr);
7883 
7884   ins_cost(350);
7885   format %{ "imulq   $dst, $src\t# long" %}
7886   opcode(0x0F, 0xAF);
7887   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7888   ins_pipe(ialu_reg_mem_alu0);
7889 %}
7890 
7891 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7892 %{
7893   match(Set dst (MulL (LoadL src) imm));
7894   effect(KILL cr);
7895 
7896   ins_cost(300);
7897   format %{ "imulq   $dst, $src, $imm\t# long" %}
7898   opcode(0x69); /* 69 /r id */
7899   ins_encode(REX_reg_mem_wide(dst, src),
7900              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7901   ins_pipe(ialu_reg_mem_alu0);
7902 %}
7903 
7904 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7905 %{
7906   match(Set dst (MulHiL src rax));
7907   effect(USE_KILL rax, KILL cr);
7908 
7909   ins_cost(300);
7910   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
7911   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7912   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7913   ins_pipe(ialu_reg_reg_alu0);
7914 %}
7915 
7916 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7917                    rFlagsReg cr)
7918 %{
7919   match(Set rax (DivI rax div));
7920   effect(KILL rdx, KILL cr);
7921 
7922   ins_cost(30*100+10*100); // XXX
7923   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7924             "jne,s   normal\n\t"
7925             "xorl    rdx, rdx\n\t"
7926             "cmpl    $div, -1\n\t"
7927             "je,s    done\n"
7928     "normal: cdql\n\t"
7929             "idivl   $div\n"
7930     "done:"        %}
7931   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7932   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7933   ins_pipe(ialu_reg_reg_alu0);
7934 %}
7935 
7936 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7937                    rFlagsReg cr)
7938 %{
7939   match(Set rax (DivL rax div));
7940   effect(KILL rdx, KILL cr);
7941 
7942   ins_cost(30*100+10*100); // XXX
7943   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7944             "cmpq    rax, rdx\n\t"
7945             "jne,s   normal\n\t"
7946             "xorl    rdx, rdx\n\t"
7947             "cmpq    $div, -1\n\t"
7948             "je,s    done\n"
7949     "normal: cdqq\n\t"
7950             "idivq   $div\n"
7951     "done:"        %}
7952   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7953   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7954   ins_pipe(ialu_reg_reg_alu0);
7955 %}
7956 
7957 // Integer DIVMOD with Register, both quotient and mod results
7958 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7959                              rFlagsReg cr)
7960 %{
7961   match(DivModI rax div);
7962   effect(KILL cr);
7963 
7964   ins_cost(30*100+10*100); // XXX
7965   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7966             "jne,s   normal\n\t"
7967             "xorl    rdx, rdx\n\t"
7968             "cmpl    $div, -1\n\t"
7969             "je,s    done\n"
7970     "normal: cdql\n\t"
7971             "idivl   $div\n"
7972     "done:"        %}
7973   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7974   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7975   ins_pipe(pipe_slow);
7976 %}
7977 
7978 // Long DIVMOD with Register, both quotient and mod results
7979 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7980                              rFlagsReg cr)
7981 %{
7982   match(DivModL rax div);
7983   effect(KILL cr);
7984 
7985   ins_cost(30*100+10*100); // XXX
7986   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7987             "cmpq    rax, rdx\n\t"
7988             "jne,s   normal\n\t"
7989             "xorl    rdx, rdx\n\t"
7990             "cmpq    $div, -1\n\t"
7991             "je,s    done\n"
7992     "normal: cdqq\n\t"
7993             "idivq   $div\n"
7994     "done:"        %}
7995   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7996   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7997   ins_pipe(pipe_slow);
7998 %}
7999 
8000 //----------- DivL-By-Constant-Expansions--------------------------------------
8001 // DivI cases are handled by the compiler
8002 
8003 // Magic constant, reciprocal of 10
8004 instruct loadConL_0x6666666666666667(rRegL dst)
8005 %{
8006   effect(DEF dst);
8007 
8008   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8009   ins_encode(load_immL(dst, 0x6666666666666667));
8010   ins_pipe(ialu_reg);
8011 %}
8012 
8013 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8014 %{
8015   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8016 
8017   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8018   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8019   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8020   ins_pipe(ialu_reg_reg_alu0);
8021 %}
8022 
8023 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8024 %{
8025   effect(USE_DEF dst, KILL cr);
8026 
8027   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8028   opcode(0xC1, 0x7); /* C1 /7 ib */
8029   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8030   ins_pipe(ialu_reg);
8031 %}
8032 
8033 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8034 %{
8035   effect(USE_DEF dst, KILL cr);
8036 
8037   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8038   opcode(0xC1, 0x7); /* C1 /7 ib */
8039   ins_encode(reg_opc_imm_wide(dst, 0x2));
8040   ins_pipe(ialu_reg);
8041 %}
8042 
8043 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8044 %{
8045   match(Set dst (DivL src div));
8046 
8047   ins_cost((5+8)*100);
8048   expand %{
8049     rax_RegL rax;                     // Killed temp
8050     rFlagsReg cr;                     // Killed
8051     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8052     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8053     sarL_rReg_63(src, cr);            // sarq  src, 63
8054     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8055     subL_rReg(dst, src, cr);          // subl  rdx, src
8056   %}
8057 %}
8058 
8059 //-----------------------------------------------------------------------------
8060 
8061 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8062                    rFlagsReg cr)
8063 %{
8064   match(Set rdx (ModI rax div));
8065   effect(KILL rax, KILL cr);
8066 
8067   ins_cost(300); // XXX
8068   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8069             "jne,s   normal\n\t"
8070             "xorl    rdx, rdx\n\t"
8071             "cmpl    $div, -1\n\t"
8072             "je,s    done\n"
8073     "normal: cdql\n\t"
8074             "idivl   $div\n"
8075     "done:"        %}
8076   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8077   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8078   ins_pipe(ialu_reg_reg_alu0);
8079 %}
8080 
8081 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8082                    rFlagsReg cr)
8083 %{
8084   match(Set rdx (ModL rax div));
8085   effect(KILL rax, KILL cr);
8086 
8087   ins_cost(300); // XXX
8088   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8089             "cmpq    rax, rdx\n\t"
8090             "jne,s   normal\n\t"
8091             "xorl    rdx, rdx\n\t"
8092             "cmpq    $div, -1\n\t"
8093             "je,s    done\n"
8094     "normal: cdqq\n\t"
8095             "idivq   $div\n"
8096     "done:"        %}
8097   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8098   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8099   ins_pipe(ialu_reg_reg_alu0);
8100 %}
8101 
8102 // Integer Shift Instructions
8103 // Shift Left by one
8104 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8105 %{
8106   match(Set dst (LShiftI dst shift));
8107   effect(KILL cr);
8108 
8109   format %{ "sall    $dst, $shift" %}
8110   opcode(0xD1, 0x4); /* D1 /4 */
8111   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8112   ins_pipe(ialu_reg);
8113 %}
8114 
8115 // Shift Left by one
8116 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8117 %{
8118   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8119   effect(KILL cr);
8120 
8121   format %{ "sall    $dst, $shift\t" %}
8122   opcode(0xD1, 0x4); /* D1 /4 */
8123   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8124   ins_pipe(ialu_mem_imm);
8125 %}
8126 
8127 // Shift Left by 8-bit immediate
8128 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8129 %{
8130   match(Set dst (LShiftI dst shift));
8131   effect(KILL cr);
8132 
8133   format %{ "sall    $dst, $shift" %}
8134   opcode(0xC1, 0x4); /* C1 /4 ib */
8135   ins_encode(reg_opc_imm(dst, shift));
8136   ins_pipe(ialu_reg);
8137 %}
8138 
8139 // Shift Left by 8-bit immediate
8140 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8141 %{
8142   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8143   effect(KILL cr);
8144 
8145   format %{ "sall    $dst, $shift" %}
8146   opcode(0xC1, 0x4); /* C1 /4 ib */
8147   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8148   ins_pipe(ialu_mem_imm);
8149 %}
8150 
8151 // Shift Left by variable
8152 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8153 %{
8154   match(Set dst (LShiftI dst shift));
8155   effect(KILL cr);
8156 
8157   format %{ "sall    $dst, $shift" %}
8158   opcode(0xD3, 0x4); /* D3 /4 */
8159   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8160   ins_pipe(ialu_reg_reg);
8161 %}
8162 
8163 // Shift Left by variable
8164 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8165 %{
8166   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8167   effect(KILL cr);
8168 
8169   format %{ "sall    $dst, $shift" %}
8170   opcode(0xD3, 0x4); /* D3 /4 */
8171   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8172   ins_pipe(ialu_mem_reg);
8173 %}
8174 
8175 // Arithmetic shift right by one
8176 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8177 %{
8178   match(Set dst (RShiftI dst shift));
8179   effect(KILL cr);
8180 
8181   format %{ "sarl    $dst, $shift" %}
8182   opcode(0xD1, 0x7); /* D1 /7 */
8183   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8184   ins_pipe(ialu_reg);
8185 %}
8186 
8187 // Arithmetic shift right by one
8188 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8189 %{
8190   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8191   effect(KILL cr);
8192 
8193   format %{ "sarl    $dst, $shift" %}
8194   opcode(0xD1, 0x7); /* D1 /7 */
8195   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8196   ins_pipe(ialu_mem_imm);
8197 %}
8198 
8199 // Arithmetic Shift Right by 8-bit immediate
8200 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8201 %{
8202   match(Set dst (RShiftI dst shift));
8203   effect(KILL cr);
8204 
8205   format %{ "sarl    $dst, $shift" %}
8206   opcode(0xC1, 0x7); /* C1 /7 ib */
8207   ins_encode(reg_opc_imm(dst, shift));
8208   ins_pipe(ialu_mem_imm);
8209 %}
8210 
8211 // Arithmetic Shift Right by 8-bit immediate
8212 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8213 %{
8214   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8215   effect(KILL cr);
8216 
8217   format %{ "sarl    $dst, $shift" %}
8218   opcode(0xC1, 0x7); /* C1 /7 ib */
8219   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8220   ins_pipe(ialu_mem_imm);
8221 %}
8222 
8223 // Arithmetic Shift Right by variable
8224 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8225 %{
8226   match(Set dst (RShiftI dst shift));
8227   effect(KILL cr);
8228 
8229   format %{ "sarl    $dst, $shift" %}
8230   opcode(0xD3, 0x7); /* D3 /7 */
8231   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8232   ins_pipe(ialu_reg_reg);
8233 %}
8234 
8235 // Arithmetic Shift Right by variable
8236 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8237 %{
8238   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8239   effect(KILL cr);
8240 
8241   format %{ "sarl    $dst, $shift" %}
8242   opcode(0xD3, 0x7); /* D3 /7 */
8243   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8244   ins_pipe(ialu_mem_reg);
8245 %}
8246 
8247 // Logical shift right by one
8248 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8249 %{
8250   match(Set dst (URShiftI dst shift));
8251   effect(KILL cr);
8252 
8253   format %{ "shrl    $dst, $shift" %}
8254   opcode(0xD1, 0x5); /* D1 /5 */
8255   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8256   ins_pipe(ialu_reg);
8257 %}
8258 
8259 // Logical shift right by one
8260 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8261 %{
8262   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8263   effect(KILL cr);
8264 
8265   format %{ "shrl    $dst, $shift" %}
8266   opcode(0xD1, 0x5); /* D1 /5 */
8267   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8268   ins_pipe(ialu_mem_imm);
8269 %}
8270 
8271 // Logical Shift Right by 8-bit immediate
8272 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8273 %{
8274   match(Set dst (URShiftI dst shift));
8275   effect(KILL cr);
8276 
8277   format %{ "shrl    $dst, $shift" %}
8278   opcode(0xC1, 0x5); /* C1 /5 ib */
8279   ins_encode(reg_opc_imm(dst, shift));
8280   ins_pipe(ialu_reg);
8281 %}
8282 
8283 // Logical Shift Right by 8-bit immediate
8284 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8285 %{
8286   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8287   effect(KILL cr);
8288 
8289   format %{ "shrl    $dst, $shift" %}
8290   opcode(0xC1, 0x5); /* C1 /5 ib */
8291   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8292   ins_pipe(ialu_mem_imm);
8293 %}
8294 
8295 // Logical Shift Right by variable
8296 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8297 %{
8298   match(Set dst (URShiftI dst shift));
8299   effect(KILL cr);
8300 
8301   format %{ "shrl    $dst, $shift" %}
8302   opcode(0xD3, 0x5); /* D3 /5 */
8303   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8304   ins_pipe(ialu_reg_reg);
8305 %}
8306 
8307 // Logical Shift Right by variable
8308 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8309 %{
8310   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8311   effect(KILL cr);
8312 
8313   format %{ "shrl    $dst, $shift" %}
8314   opcode(0xD3, 0x5); /* D3 /5 */
8315   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8316   ins_pipe(ialu_mem_reg);
8317 %}
8318 
8319 // Long Shift Instructions
8320 // Shift Left by one
8321 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8322 %{
8323   match(Set dst (LShiftL dst shift));
8324   effect(KILL cr);
8325 
8326   format %{ "salq    $dst, $shift" %}
8327   opcode(0xD1, 0x4); /* D1 /4 */
8328   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8329   ins_pipe(ialu_reg);
8330 %}
8331 
8332 // Shift Left by one
8333 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8334 %{
8335   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8336   effect(KILL cr);
8337 
8338   format %{ "salq    $dst, $shift" %}
8339   opcode(0xD1, 0x4); /* D1 /4 */
8340   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8341   ins_pipe(ialu_mem_imm);
8342 %}
8343 
8344 // Shift Left by 8-bit immediate
8345 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8346 %{
8347   match(Set dst (LShiftL dst shift));
8348   effect(KILL cr);
8349 
8350   format %{ "salq    $dst, $shift" %}
8351   opcode(0xC1, 0x4); /* C1 /4 ib */
8352   ins_encode(reg_opc_imm_wide(dst, shift));
8353   ins_pipe(ialu_reg);
8354 %}
8355 
8356 // Shift Left by 8-bit immediate
8357 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8358 %{
8359   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8360   effect(KILL cr);
8361 
8362   format %{ "salq    $dst, $shift" %}
8363   opcode(0xC1, 0x4); /* C1 /4 ib */
8364   ins_encode(REX_mem_wide(dst), OpcP,
8365              RM_opc_mem(secondary, dst), Con8or32(shift));
8366   ins_pipe(ialu_mem_imm);
8367 %}
8368 
8369 // Shift Left by variable
8370 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8371 %{
8372   match(Set dst (LShiftL dst shift));
8373   effect(KILL cr);
8374 
8375   format %{ "salq    $dst, $shift" %}
8376   opcode(0xD3, 0x4); /* D3 /4 */
8377   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8378   ins_pipe(ialu_reg_reg);
8379 %}
8380 
8381 // Shift Left by variable
8382 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8383 %{
8384   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8385   effect(KILL cr);
8386 
8387   format %{ "salq    $dst, $shift" %}
8388   opcode(0xD3, 0x4); /* D3 /4 */
8389   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8390   ins_pipe(ialu_mem_reg);
8391 %}
8392 
8393 // Arithmetic shift right by one
8394 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8395 %{
8396   match(Set dst (RShiftL dst shift));
8397   effect(KILL cr);
8398 
8399   format %{ "sarq    $dst, $shift" %}
8400   opcode(0xD1, 0x7); /* D1 /7 */
8401   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8402   ins_pipe(ialu_reg);
8403 %}
8404 
8405 // Arithmetic shift right by one
8406 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8407 %{
8408   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8409   effect(KILL cr);
8410 
8411   format %{ "sarq    $dst, $shift" %}
8412   opcode(0xD1, 0x7); /* D1 /7 */
8413   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8414   ins_pipe(ialu_mem_imm);
8415 %}
8416 
8417 // Arithmetic Shift Right by 8-bit immediate
8418 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8419 %{
8420   match(Set dst (RShiftL dst shift));
8421   effect(KILL cr);
8422 
8423   format %{ "sarq    $dst, $shift" %}
8424   opcode(0xC1, 0x7); /* C1 /7 ib */
8425   ins_encode(reg_opc_imm_wide(dst, shift));
8426   ins_pipe(ialu_mem_imm);
8427 %}
8428 
8429 // Arithmetic Shift Right by 8-bit immediate
8430 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8431 %{
8432   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8433   effect(KILL cr);
8434 
8435   format %{ "sarq    $dst, $shift" %}
8436   opcode(0xC1, 0x7); /* C1 /7 ib */
8437   ins_encode(REX_mem_wide(dst), OpcP,
8438              RM_opc_mem(secondary, dst), Con8or32(shift));
8439   ins_pipe(ialu_mem_imm);
8440 %}
8441 
8442 // Arithmetic Shift Right by variable
8443 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8444 %{
8445   match(Set dst (RShiftL dst shift));
8446   effect(KILL cr);
8447 
8448   format %{ "sarq    $dst, $shift" %}
8449   opcode(0xD3, 0x7); /* D3 /7 */
8450   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8451   ins_pipe(ialu_reg_reg);
8452 %}
8453 
8454 // Arithmetic Shift Right by variable
8455 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8456 %{
8457   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8458   effect(KILL cr);
8459 
8460   format %{ "sarq    $dst, $shift" %}
8461   opcode(0xD3, 0x7); /* D3 /7 */
8462   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8463   ins_pipe(ialu_mem_reg);
8464 %}
8465 
8466 // Logical shift right by one
8467 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8468 %{
8469   match(Set dst (URShiftL dst shift));
8470   effect(KILL cr);
8471 
8472   format %{ "shrq    $dst, $shift" %}
8473   opcode(0xD1, 0x5); /* D1 /5 */
8474   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8475   ins_pipe(ialu_reg);
8476 %}
8477 
8478 // Logical shift right by one
8479 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8480 %{
8481   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8482   effect(KILL cr);
8483 
8484   format %{ "shrq    $dst, $shift" %}
8485   opcode(0xD1, 0x5); /* D1 /5 */
8486   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8487   ins_pipe(ialu_mem_imm);
8488 %}
8489 
8490 // Logical Shift Right by 8-bit immediate
8491 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8492 %{
8493   match(Set dst (URShiftL dst shift));
8494   effect(KILL cr);
8495 
8496   format %{ "shrq    $dst, $shift" %}
8497   opcode(0xC1, 0x5); /* C1 /5 ib */
8498   ins_encode(reg_opc_imm_wide(dst, shift));
8499   ins_pipe(ialu_reg);
8500 %}
8501 
8502 
8503 // Logical Shift Right by 8-bit immediate
8504 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8505 %{
8506   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8507   effect(KILL cr);
8508 
8509   format %{ "shrq    $dst, $shift" %}
8510   opcode(0xC1, 0x5); /* C1 /5 ib */
8511   ins_encode(REX_mem_wide(dst), OpcP,
8512              RM_opc_mem(secondary, dst), Con8or32(shift));
8513   ins_pipe(ialu_mem_imm);
8514 %}
8515 
8516 // Logical Shift Right by variable
8517 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8518 %{
8519   match(Set dst (URShiftL dst shift));
8520   effect(KILL cr);
8521 
8522   format %{ "shrq    $dst, $shift" %}
8523   opcode(0xD3, 0x5); /* D3 /5 */
8524   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8525   ins_pipe(ialu_reg_reg);
8526 %}
8527 
8528 // Logical Shift Right by variable
8529 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8530 %{
8531   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8532   effect(KILL cr);
8533 
8534   format %{ "shrq    $dst, $shift" %}
8535   opcode(0xD3, 0x5); /* D3 /5 */
8536   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8537   ins_pipe(ialu_mem_reg);
8538 %}
8539 
8540 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8541 // This idiom is used by the compiler for the i2b bytecode.
8542 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8543 %{
8544   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8545 
8546   format %{ "movsbl  $dst, $src\t# i2b" %}
8547   opcode(0x0F, 0xBE);
8548   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8549   ins_pipe(ialu_reg_reg);
8550 %}
8551 
8552 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8553 // This idiom is used by the compiler the i2s bytecode.
8554 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8555 %{
8556   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8557 
8558   format %{ "movswl  $dst, $src\t# i2s" %}
8559   opcode(0x0F, 0xBF);
8560   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8561   ins_pipe(ialu_reg_reg);
8562 %}
8563 
8564 // ROL/ROR instructions
8565 
8566 // ROL expand
8567 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8568   effect(KILL cr, USE_DEF dst);
8569 
8570   format %{ "roll    $dst" %}
8571   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8572   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8573   ins_pipe(ialu_reg);
8574 %}
8575 
8576 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8577   effect(USE_DEF dst, USE shift, KILL cr);
8578 
8579   format %{ "roll    $dst, $shift" %}
8580   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8581   ins_encode( reg_opc_imm(dst, shift) );
8582   ins_pipe(ialu_reg);
8583 %}
8584 
8585 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8586 %{
8587   effect(USE_DEF dst, USE shift, KILL cr);
8588 
8589   format %{ "roll    $dst, $shift" %}
8590   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8591   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8592   ins_pipe(ialu_reg_reg);
8593 %}
8594 // end of ROL expand
8595 
8596 // Rotate Left by one
8597 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8598 %{
8599   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8600 
8601   expand %{
8602     rolI_rReg_imm1(dst, cr);
8603   %}
8604 %}
8605 
8606 // Rotate Left by 8-bit immediate
8607 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8608 %{
8609   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8610   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8611 
8612   expand %{
8613     rolI_rReg_imm8(dst, lshift, cr);
8614   %}
8615 %}
8616 
8617 // Rotate Left by variable
8618 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8619 %{
8620   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8621 
8622   expand %{
8623     rolI_rReg_CL(dst, shift, cr);
8624   %}
8625 %}
8626 
8627 // Rotate Left by variable
8628 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8629 %{
8630   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8631 
8632   expand %{
8633     rolI_rReg_CL(dst, shift, cr);
8634   %}
8635 %}
8636 
8637 // ROR expand
8638 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8639 %{
8640   effect(USE_DEF dst, KILL cr);
8641 
8642   format %{ "rorl    $dst" %}
8643   opcode(0xD1, 0x1); /* D1 /1 */
8644   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8645   ins_pipe(ialu_reg);
8646 %}
8647 
8648 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8649 %{
8650   effect(USE_DEF dst, USE shift, KILL cr);
8651 
8652   format %{ "rorl    $dst, $shift" %}
8653   opcode(0xC1, 0x1); /* C1 /1 ib */
8654   ins_encode(reg_opc_imm(dst, shift));
8655   ins_pipe(ialu_reg);
8656 %}
8657 
8658 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8659 %{
8660   effect(USE_DEF dst, USE shift, KILL cr);
8661 
8662   format %{ "rorl    $dst, $shift" %}
8663   opcode(0xD3, 0x1); /* D3 /1 */
8664   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8665   ins_pipe(ialu_reg_reg);
8666 %}
8667 // end of ROR expand
8668 
8669 // Rotate Right by one
8670 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8671 %{
8672   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8673 
8674   expand %{
8675     rorI_rReg_imm1(dst, cr);
8676   %}
8677 %}
8678 
8679 // Rotate Right by 8-bit immediate
8680 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8681 %{
8682   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8683   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8684 
8685   expand %{
8686     rorI_rReg_imm8(dst, rshift, cr);
8687   %}
8688 %}
8689 
8690 // Rotate Right by variable
8691 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8692 %{
8693   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8694 
8695   expand %{
8696     rorI_rReg_CL(dst, shift, cr);
8697   %}
8698 %}
8699 
8700 // Rotate Right by variable
8701 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8702 %{
8703   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8704 
8705   expand %{
8706     rorI_rReg_CL(dst, shift, cr);
8707   %}
8708 %}
8709 
8710 // for long rotate
8711 // ROL expand
8712 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8713   effect(USE_DEF dst, KILL cr);
8714 
8715   format %{ "rolq    $dst" %}
8716   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8717   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8718   ins_pipe(ialu_reg);
8719 %}
8720 
8721 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8722   effect(USE_DEF dst, USE shift, KILL cr);
8723 
8724   format %{ "rolq    $dst, $shift" %}
8725   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8726   ins_encode( reg_opc_imm_wide(dst, shift) );
8727   ins_pipe(ialu_reg);
8728 %}
8729 
8730 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8731 %{
8732   effect(USE_DEF dst, USE shift, KILL cr);
8733 
8734   format %{ "rolq    $dst, $shift" %}
8735   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8736   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8737   ins_pipe(ialu_reg_reg);
8738 %}
8739 // end of ROL expand
8740 
8741 // Rotate Left by one
8742 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8743 %{
8744   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8745 
8746   expand %{
8747     rolL_rReg_imm1(dst, cr);
8748   %}
8749 %}
8750 
8751 // Rotate Left by 8-bit immediate
8752 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8753 %{
8754   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8755   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8756 
8757   expand %{
8758     rolL_rReg_imm8(dst, lshift, cr);
8759   %}
8760 %}
8761 
8762 // Rotate Left by variable
8763 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8764 %{
8765   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8766 
8767   expand %{
8768     rolL_rReg_CL(dst, shift, cr);
8769   %}
8770 %}
8771 
8772 // Rotate Left by variable
8773 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8774 %{
8775   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8776 
8777   expand %{
8778     rolL_rReg_CL(dst, shift, cr);
8779   %}
8780 %}
8781 
8782 // ROR expand
8783 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8784 %{
8785   effect(USE_DEF dst, KILL cr);
8786 
8787   format %{ "rorq    $dst" %}
8788   opcode(0xD1, 0x1); /* D1 /1 */
8789   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8790   ins_pipe(ialu_reg);
8791 %}
8792 
8793 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8794 %{
8795   effect(USE_DEF dst, USE shift, KILL cr);
8796 
8797   format %{ "rorq    $dst, $shift" %}
8798   opcode(0xC1, 0x1); /* C1 /1 ib */
8799   ins_encode(reg_opc_imm_wide(dst, shift));
8800   ins_pipe(ialu_reg);
8801 %}
8802 
8803 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8804 %{
8805   effect(USE_DEF dst, USE shift, KILL cr);
8806 
8807   format %{ "rorq    $dst, $shift" %}
8808   opcode(0xD3, 0x1); /* D3 /1 */
8809   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8810   ins_pipe(ialu_reg_reg);
8811 %}
8812 // end of ROR expand
8813 
8814 // Rotate Right by one
8815 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8816 %{
8817   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8818 
8819   expand %{
8820     rorL_rReg_imm1(dst, cr);
8821   %}
8822 %}
8823 
8824 // Rotate Right by 8-bit immediate
8825 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8826 %{
8827   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8828   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8829 
8830   expand %{
8831     rorL_rReg_imm8(dst, rshift, cr);
8832   %}
8833 %}
8834 
8835 // Rotate Right by variable
8836 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8837 %{
8838   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8839 
8840   expand %{
8841     rorL_rReg_CL(dst, shift, cr);
8842   %}
8843 %}
8844 
8845 // Rotate Right by variable
8846 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8847 %{
8848   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8849 
8850   expand %{
8851     rorL_rReg_CL(dst, shift, cr);
8852   %}
8853 %}
8854 
8855 // Logical Instructions
8856 
8857 // Integer Logical Instructions
8858 
8859 // And Instructions
8860 // And Register with Register
8861 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8862 %{
8863   match(Set dst (AndI dst src));
8864   effect(KILL cr);
8865 
8866   format %{ "andl    $dst, $src\t# int" %}
8867   opcode(0x23);
8868   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8869   ins_pipe(ialu_reg_reg);
8870 %}
8871 
8872 // And Register with Immediate 255
8873 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8874 %{
8875   match(Set dst (AndI dst src));
8876 
8877   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8878   opcode(0x0F, 0xB6);
8879   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8880   ins_pipe(ialu_reg);
8881 %}
8882 
8883 // And Register with Immediate 255 and promote to long
8884 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8885 %{
8886   match(Set dst (ConvI2L (AndI src mask)));
8887 
8888   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8889   opcode(0x0F, 0xB6);
8890   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8891   ins_pipe(ialu_reg);
8892 %}
8893 
8894 // And Register with Immediate 65535
8895 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8896 %{
8897   match(Set dst (AndI dst src));
8898 
8899   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8900   opcode(0x0F, 0xB7);
8901   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8902   ins_pipe(ialu_reg);
8903 %}
8904 
8905 // And Register with Immediate 65535 and promote to long
8906 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8907 %{
8908   match(Set dst (ConvI2L (AndI src mask)));
8909 
8910   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8911   opcode(0x0F, 0xB7);
8912   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8913   ins_pipe(ialu_reg);
8914 %}
8915 
8916 // And Register with Immediate
8917 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8918 %{
8919   match(Set dst (AndI dst src));
8920   effect(KILL cr);
8921 
8922   format %{ "andl    $dst, $src\t# int" %}
8923   opcode(0x81, 0x04); /* Opcode 81 /4 */
8924   ins_encode(OpcSErm(dst, src), Con8or32(src));
8925   ins_pipe(ialu_reg);
8926 %}
8927 
8928 // And Register with Memory
8929 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8930 %{
8931   match(Set dst (AndI dst (LoadI src)));
8932   effect(KILL cr);
8933 
8934   ins_cost(125);
8935   format %{ "andl    $dst, $src\t# int" %}
8936   opcode(0x23);
8937   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8938   ins_pipe(ialu_reg_mem);
8939 %}
8940 
8941 // And Memory with Register
8942 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8943 %{
8944   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8945   effect(KILL cr);
8946 
8947   ins_cost(150);
8948   format %{ "andl    $dst, $src\t# int" %}
8949   opcode(0x21); /* Opcode 21 /r */
8950   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8951   ins_pipe(ialu_mem_reg);
8952 %}
8953 
8954 // And Memory with Immediate
8955 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
8956 %{
8957   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8958   effect(KILL cr);
8959 
8960   ins_cost(125);
8961   format %{ "andl    $dst, $src\t# int" %}
8962   opcode(0x81, 0x4); /* Opcode 81 /4 id */
8963   ins_encode(REX_mem(dst), OpcSE(src),
8964              RM_opc_mem(secondary, dst), Con8or32(src));
8965   ins_pipe(ialu_mem_imm);
8966 %}
8967 
8968 // Or Instructions
8969 // Or Register with Register
8970 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8971 %{
8972   match(Set dst (OrI dst src));
8973   effect(KILL cr);
8974 
8975   format %{ "orl     $dst, $src\t# int" %}
8976   opcode(0x0B);
8977   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8978   ins_pipe(ialu_reg_reg);
8979 %}
8980 
8981 // Or Register with Immediate
8982 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8983 %{
8984   match(Set dst (OrI dst src));
8985   effect(KILL cr);
8986 
8987   format %{ "orl     $dst, $src\t# int" %}
8988   opcode(0x81, 0x01); /* Opcode 81 /1 id */
8989   ins_encode(OpcSErm(dst, src), Con8or32(src));
8990   ins_pipe(ialu_reg);
8991 %}
8992 
8993 // Or Register with Memory
8994 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8995 %{
8996   match(Set dst (OrI dst (LoadI src)));
8997   effect(KILL cr);
8998 
8999   ins_cost(125);
9000   format %{ "orl     $dst, $src\t# int" %}
9001   opcode(0x0B);
9002   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9003   ins_pipe(ialu_reg_mem);
9004 %}
9005 
9006 // Or Memory with Register
9007 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9008 %{
9009   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9010   effect(KILL cr);
9011 
9012   ins_cost(150);
9013   format %{ "orl     $dst, $src\t# int" %}
9014   opcode(0x09); /* Opcode 09 /r */
9015   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9016   ins_pipe(ialu_mem_reg);
9017 %}
9018 
9019 // Or Memory with Immediate
9020 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9021 %{
9022   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9023   effect(KILL cr);
9024 
9025   ins_cost(125);
9026   format %{ "orl     $dst, $src\t# int" %}
9027   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9028   ins_encode(REX_mem(dst), OpcSE(src),
9029              RM_opc_mem(secondary, dst), Con8or32(src));
9030   ins_pipe(ialu_mem_imm);
9031 %}
9032 
9033 // Xor Instructions
9034 // Xor Register with Register
9035 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9036 %{
9037   match(Set dst (XorI dst src));
9038   effect(KILL cr);
9039 
9040   format %{ "xorl    $dst, $src\t# int" %}
9041   opcode(0x33);
9042   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9043   ins_pipe(ialu_reg_reg);
9044 %}
9045 
9046 // Xor Register with Immediate -1
9047 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9048   match(Set dst (XorI dst imm));
9049 
9050   format %{ "not    $dst" %}
9051   ins_encode %{
9052      __ notl($dst$$Register);
9053   %}
9054   ins_pipe(ialu_reg);
9055 %}
9056 
9057 // Xor Register with Immediate
9058 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9059 %{
9060   match(Set dst (XorI dst src));
9061   effect(KILL cr);
9062 
9063   format %{ "xorl    $dst, $src\t# int" %}
9064   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9065   ins_encode(OpcSErm(dst, src), Con8or32(src));
9066   ins_pipe(ialu_reg);
9067 %}
9068 
9069 // Xor Register with Memory
9070 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9071 %{
9072   match(Set dst (XorI dst (LoadI src)));
9073   effect(KILL cr);
9074 
9075   ins_cost(125);
9076   format %{ "xorl    $dst, $src\t# int" %}
9077   opcode(0x33);
9078   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9079   ins_pipe(ialu_reg_mem);
9080 %}
9081 
9082 // Xor Memory with Register
9083 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9084 %{
9085   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9086   effect(KILL cr);
9087 
9088   ins_cost(150);
9089   format %{ "xorl    $dst, $src\t# int" %}
9090   opcode(0x31); /* Opcode 31 /r */
9091   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9092   ins_pipe(ialu_mem_reg);
9093 %}
9094 
9095 // Xor Memory with Immediate
9096 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9097 %{
9098   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9099   effect(KILL cr);
9100 
9101   ins_cost(125);
9102   format %{ "xorl    $dst, $src\t# int" %}
9103   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9104   ins_encode(REX_mem(dst), OpcSE(src),
9105              RM_opc_mem(secondary, dst), Con8or32(src));
9106   ins_pipe(ialu_mem_imm);
9107 %}
9108 
9109 
9110 // Long Logical Instructions
9111 
9112 // And Instructions
9113 // And Register with Register
9114 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9115 %{
9116   match(Set dst (AndL dst src));
9117   effect(KILL cr);
9118 
9119   format %{ "andq    $dst, $src\t# long" %}
9120   opcode(0x23);
9121   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9122   ins_pipe(ialu_reg_reg);
9123 %}
9124 
9125 // And Register with Immediate 255
9126 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9127 %{
9128   match(Set dst (AndL dst src));
9129 
9130   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9131   opcode(0x0F, 0xB6);
9132   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9133   ins_pipe(ialu_reg);
9134 %}
9135 
9136 // And Register with Immediate 65535
9137 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9138 %{
9139   match(Set dst (AndL dst src));
9140 
9141   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9142   opcode(0x0F, 0xB7);
9143   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9144   ins_pipe(ialu_reg);
9145 %}
9146 
9147 // And Register with Immediate
9148 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9149 %{
9150   match(Set dst (AndL dst src));
9151   effect(KILL cr);
9152 
9153   format %{ "andq    $dst, $src\t# long" %}
9154   opcode(0x81, 0x04); /* Opcode 81 /4 */
9155   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9156   ins_pipe(ialu_reg);
9157 %}
9158 
9159 // And Register with Memory
9160 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9161 %{
9162   match(Set dst (AndL dst (LoadL src)));
9163   effect(KILL cr);
9164 
9165   ins_cost(125);
9166   format %{ "andq    $dst, $src\t# long" %}
9167   opcode(0x23);
9168   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9169   ins_pipe(ialu_reg_mem);
9170 %}
9171 
9172 // And Memory with Register
9173 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9174 %{
9175   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9176   effect(KILL cr);
9177 
9178   ins_cost(150);
9179   format %{ "andq    $dst, $src\t# long" %}
9180   opcode(0x21); /* Opcode 21 /r */
9181   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9182   ins_pipe(ialu_mem_reg);
9183 %}
9184 
9185 // And Memory with Immediate
9186 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9187 %{
9188   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9189   effect(KILL cr);
9190 
9191   ins_cost(125);
9192   format %{ "andq    $dst, $src\t# long" %}
9193   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9194   ins_encode(REX_mem_wide(dst), OpcSE(src),
9195              RM_opc_mem(secondary, dst), Con8or32(src));
9196   ins_pipe(ialu_mem_imm);
9197 %}
9198 
9199 // Or Instructions
9200 // Or Register with Register
9201 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9202 %{
9203   match(Set dst (OrL dst src));
9204   effect(KILL cr);
9205 
9206   format %{ "orq     $dst, $src\t# long" %}
9207   opcode(0x0B);
9208   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9209   ins_pipe(ialu_reg_reg);
9210 %}
9211 
9212 // Use any_RegP to match R15 (TLS register) without spilling.
9213 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9214   match(Set dst (OrL dst (CastP2X src)));
9215   effect(KILL cr);
9216 
9217   format %{ "orq     $dst, $src\t# long" %}
9218   opcode(0x0B);
9219   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9220   ins_pipe(ialu_reg_reg);
9221 %}
9222 
9223 
9224 // Or Register with Immediate
9225 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9226 %{
9227   match(Set dst (OrL dst src));
9228   effect(KILL cr);
9229 
9230   format %{ "orq     $dst, $src\t# long" %}
9231   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9232   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9233   ins_pipe(ialu_reg);
9234 %}
9235 
9236 // Or Register with Memory
9237 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9238 %{
9239   match(Set dst (OrL dst (LoadL src)));
9240   effect(KILL cr);
9241 
9242   ins_cost(125);
9243   format %{ "orq     $dst, $src\t# long" %}
9244   opcode(0x0B);
9245   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9246   ins_pipe(ialu_reg_mem);
9247 %}
9248 
9249 // Or Memory with Register
9250 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9251 %{
9252   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9253   effect(KILL cr);
9254 
9255   ins_cost(150);
9256   format %{ "orq     $dst, $src\t# long" %}
9257   opcode(0x09); /* Opcode 09 /r */
9258   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9259   ins_pipe(ialu_mem_reg);
9260 %}
9261 
9262 // Or Memory with Immediate
9263 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9264 %{
9265   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9266   effect(KILL cr);
9267 
9268   ins_cost(125);
9269   format %{ "orq     $dst, $src\t# long" %}
9270   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9271   ins_encode(REX_mem_wide(dst), OpcSE(src),
9272              RM_opc_mem(secondary, dst), Con8or32(src));
9273   ins_pipe(ialu_mem_imm);
9274 %}
9275 
9276 // Xor Instructions
9277 // Xor Register with Register
9278 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9279 %{
9280   match(Set dst (XorL dst src));
9281   effect(KILL cr);
9282 
9283   format %{ "xorq    $dst, $src\t# long" %}
9284   opcode(0x33);
9285   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9286   ins_pipe(ialu_reg_reg);
9287 %}
9288 
9289 // Xor Register with Immediate -1
9290 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9291   match(Set dst (XorL dst imm));
9292 
9293   format %{ "notq   $dst" %}
9294   ins_encode %{
9295      __ notq($dst$$Register);
9296   %}
9297   ins_pipe(ialu_reg);
9298 %}
9299 
9300 // Xor Register with Immediate
9301 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9302 %{
9303   match(Set dst (XorL dst src));
9304   effect(KILL cr);
9305 
9306   format %{ "xorq    $dst, $src\t# long" %}
9307   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9308   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9309   ins_pipe(ialu_reg);
9310 %}
9311 
9312 // Xor Register with Memory
9313 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9314 %{
9315   match(Set dst (XorL dst (LoadL src)));
9316   effect(KILL cr);
9317 
9318   ins_cost(125);
9319   format %{ "xorq    $dst, $src\t# long" %}
9320   opcode(0x33);
9321   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9322   ins_pipe(ialu_reg_mem);
9323 %}
9324 
9325 // Xor Memory with Register
9326 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9327 %{
9328   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9329   effect(KILL cr);
9330 
9331   ins_cost(150);
9332   format %{ "xorq    $dst, $src\t# long" %}
9333   opcode(0x31); /* Opcode 31 /r */
9334   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9335   ins_pipe(ialu_mem_reg);
9336 %}
9337 
9338 // Xor Memory with Immediate
9339 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9340 %{
9341   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9342   effect(KILL cr);
9343 
9344   ins_cost(125);
9345   format %{ "xorq    $dst, $src\t# long" %}
9346   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9347   ins_encode(REX_mem_wide(dst), OpcSE(src),
9348              RM_opc_mem(secondary, dst), Con8or32(src));
9349   ins_pipe(ialu_mem_imm);
9350 %}
9351 
9352 // Convert Int to Boolean
9353 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9354 %{
9355   match(Set dst (Conv2B src));
9356   effect(KILL cr);
9357 
9358   format %{ "testl   $src, $src\t# ci2b\n\t"
9359             "setnz   $dst\n\t"
9360             "movzbl  $dst, $dst" %}
9361   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9362              setNZ_reg(dst),
9363              REX_reg_breg(dst, dst), // movzbl
9364              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9365   ins_pipe(pipe_slow); // XXX
9366 %}
9367 
9368 // Convert Pointer to Boolean
9369 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9370 %{
9371   match(Set dst (Conv2B src));
9372   effect(KILL cr);
9373 
9374   format %{ "testq   $src, $src\t# cp2b\n\t"
9375             "setnz   $dst\n\t"
9376             "movzbl  $dst, $dst" %}
9377   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9378              setNZ_reg(dst),
9379              REX_reg_breg(dst, dst), // movzbl
9380              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9381   ins_pipe(pipe_slow); // XXX
9382 %}
9383 
9384 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9385 %{
9386   match(Set dst (CmpLTMask p q));
9387   effect(KILL cr);
9388 
9389   ins_cost(400);
9390   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9391             "setlt   $dst\n\t"
9392             "movzbl  $dst, $dst\n\t"
9393             "negl    $dst" %}
9394   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9395              setLT_reg(dst),
9396              REX_reg_breg(dst, dst), // movzbl
9397              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9398              neg_reg(dst));
9399   ins_pipe(pipe_slow);
9400 %}
9401 
9402 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9403 %{
9404   match(Set dst (CmpLTMask dst zero));
9405   effect(KILL cr);
9406 
9407   ins_cost(100);
9408   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9409   ins_encode %{
9410   __ sarl($dst$$Register, 31);
9411   %}
9412   ins_pipe(ialu_reg);
9413 %}
9414 
9415 /* Better to save a register than avoid a branch */
9416 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9417 %{
9418   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9419   effect(KILL cr);
9420   ins_cost(300);
9421   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
9422             "jge    done\n\t"
9423             "addl   $p,$y\n"
9424             "done:  " %}
9425   ins_encode %{
9426     Register Rp = $p$$Register;
9427     Register Rq = $q$$Register;
9428     Register Ry = $y$$Register;
9429     Label done;
9430     __ subl(Rp, Rq);
9431     __ jccb(Assembler::greaterEqual, done);
9432     __ addl(Rp, Ry);
9433     __ bind(done);
9434   %}
9435   ins_pipe(pipe_cmplt);
9436 %}
9437 
9438 /* Better to save a register than avoid a branch */
9439 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9440 %{
9441   match(Set y (AndI (CmpLTMask p q) y));
9442   effect(KILL cr);
9443 
9444   ins_cost(300);
9445 
9446   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
9447             "jlt      done\n\t"
9448             "xorl     $y, $y\n"
9449             "done:  " %}
9450   ins_encode %{
9451     Register Rp = $p$$Register;
9452     Register Rq = $q$$Register;
9453     Register Ry = $y$$Register;
9454     Label done;
9455     __ cmpl(Rp, Rq);
9456     __ jccb(Assembler::less, done);
9457     __ xorl(Ry, Ry);
9458     __ bind(done);
9459   %}
9460   ins_pipe(pipe_cmplt);
9461 %}
9462 
9463 
9464 //---------- FP Instructions------------------------------------------------
9465 
9466 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9467 %{
9468   match(Set cr (CmpF src1 src2));
9469 
9470   ins_cost(145);
9471   format %{ "ucomiss $src1, $src2\n\t"
9472             "jnp,s   exit\n\t"
9473             "pushfq\t# saw NaN, set CF\n\t"
9474             "andq    [rsp], #0xffffff2b\n\t"
9475             "popfq\n"
9476     "exit:" %}
9477   ins_encode %{
9478     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9479     emit_cmpfp_fixup(_masm);
9480   %}
9481   ins_pipe(pipe_slow);
9482 %}
9483 
9484 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9485   match(Set cr (CmpF src1 src2));
9486 
9487   ins_cost(100);
9488   format %{ "ucomiss $src1, $src2" %}
9489   ins_encode %{
9490     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9491   %}
9492   ins_pipe(pipe_slow);
9493 %}
9494 
9495 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9496 %{
9497   match(Set cr (CmpF src1 (LoadF src2)));
9498 
9499   ins_cost(145);
9500   format %{ "ucomiss $src1, $src2\n\t"
9501             "jnp,s   exit\n\t"
9502             "pushfq\t# saw NaN, set CF\n\t"
9503             "andq    [rsp], #0xffffff2b\n\t"
9504             "popfq\n"
9505     "exit:" %}
9506   ins_encode %{
9507     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9508     emit_cmpfp_fixup(_masm);
9509   %}
9510   ins_pipe(pipe_slow);
9511 %}
9512 
9513 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9514   match(Set cr (CmpF src1 (LoadF src2)));
9515 
9516   ins_cost(100);
9517   format %{ "ucomiss $src1, $src2" %}
9518   ins_encode %{
9519     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9520   %}
9521   ins_pipe(pipe_slow);
9522 %}
9523 
9524 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9525   match(Set cr (CmpF src con));
9526 
9527   ins_cost(145);
9528   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9529             "jnp,s   exit\n\t"
9530             "pushfq\t# saw NaN, set CF\n\t"
9531             "andq    [rsp], #0xffffff2b\n\t"
9532             "popfq\n"
9533     "exit:" %}
9534   ins_encode %{
9535     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9536     emit_cmpfp_fixup(_masm);
9537   %}
9538   ins_pipe(pipe_slow);
9539 %}
9540 
9541 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9542   match(Set cr (CmpF src con));
9543   ins_cost(100);
9544   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9545   ins_encode %{
9546     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9547   %}
9548   ins_pipe(pipe_slow);
9549 %}
9550 
9551 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9552 %{
9553   match(Set cr (CmpD src1 src2));
9554 
9555   ins_cost(145);
9556   format %{ "ucomisd $src1, $src2\n\t"
9557             "jnp,s   exit\n\t"
9558             "pushfq\t# saw NaN, set CF\n\t"
9559             "andq    [rsp], #0xffffff2b\n\t"
9560             "popfq\n"
9561     "exit:" %}
9562   ins_encode %{
9563     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9564     emit_cmpfp_fixup(_masm);
9565   %}
9566   ins_pipe(pipe_slow);
9567 %}
9568 
9569 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9570   match(Set cr (CmpD src1 src2));
9571 
9572   ins_cost(100);
9573   format %{ "ucomisd $src1, $src2 test" %}
9574   ins_encode %{
9575     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9576   %}
9577   ins_pipe(pipe_slow);
9578 %}
9579 
9580 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9581 %{
9582   match(Set cr (CmpD src1 (LoadD src2)));
9583 
9584   ins_cost(145);
9585   format %{ "ucomisd $src1, $src2\n\t"
9586             "jnp,s   exit\n\t"
9587             "pushfq\t# saw NaN, set CF\n\t"
9588             "andq    [rsp], #0xffffff2b\n\t"
9589             "popfq\n"
9590     "exit:" %}
9591   ins_encode %{
9592     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9593     emit_cmpfp_fixup(_masm);
9594   %}
9595   ins_pipe(pipe_slow);
9596 %}
9597 
9598 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
9599   match(Set cr (CmpD src1 (LoadD src2)));
9600 
9601   ins_cost(100);
9602   format %{ "ucomisd $src1, $src2" %}
9603   ins_encode %{
9604     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9605   %}
9606   ins_pipe(pipe_slow);
9607 %}
9608 
9609 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
9610   match(Set cr (CmpD src con));
9611 
9612   ins_cost(145);
9613   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9614             "jnp,s   exit\n\t"
9615             "pushfq\t# saw NaN, set CF\n\t"
9616             "andq    [rsp], #0xffffff2b\n\t"
9617             "popfq\n"
9618     "exit:" %}
9619   ins_encode %{
9620     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9621     emit_cmpfp_fixup(_masm);
9622   %}
9623   ins_pipe(pipe_slow);
9624 %}
9625 
9626 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
9627   match(Set cr (CmpD src con));
9628   ins_cost(100);
9629   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
9630   ins_encode %{
9631     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9632   %}
9633   ins_pipe(pipe_slow);
9634 %}
9635 
9636 // Compare into -1,0,1
9637 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9638 %{
9639   match(Set dst (CmpF3 src1 src2));
9640   effect(KILL cr);
9641 
9642   ins_cost(275);
9643   format %{ "ucomiss $src1, $src2\n\t"
9644             "movl    $dst, #-1\n\t"
9645             "jp,s    done\n\t"
9646             "jb,s    done\n\t"
9647             "setne   $dst\n\t"
9648             "movzbl  $dst, $dst\n"
9649     "done:" %}
9650   ins_encode %{
9651     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9652     emit_cmpfp3(_masm, $dst$$Register);
9653   %}
9654   ins_pipe(pipe_slow);
9655 %}
9656 
9657 // Compare into -1,0,1
9658 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9659 %{
9660   match(Set dst (CmpF3 src1 (LoadF src2)));
9661   effect(KILL cr);
9662 
9663   ins_cost(275);
9664   format %{ "ucomiss $src1, $src2\n\t"
9665             "movl    $dst, #-1\n\t"
9666             "jp,s    done\n\t"
9667             "jb,s    done\n\t"
9668             "setne   $dst\n\t"
9669             "movzbl  $dst, $dst\n"
9670     "done:" %}
9671   ins_encode %{
9672     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9673     emit_cmpfp3(_masm, $dst$$Register);
9674   %}
9675   ins_pipe(pipe_slow);
9676 %}
9677 
9678 // Compare into -1,0,1
9679 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
9680   match(Set dst (CmpF3 src con));
9681   effect(KILL cr);
9682 
9683   ins_cost(275);
9684   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9685             "movl    $dst, #-1\n\t"
9686             "jp,s    done\n\t"
9687             "jb,s    done\n\t"
9688             "setne   $dst\n\t"
9689             "movzbl  $dst, $dst\n"
9690     "done:" %}
9691   ins_encode %{
9692     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9693     emit_cmpfp3(_masm, $dst$$Register);
9694   %}
9695   ins_pipe(pipe_slow);
9696 %}
9697 
9698 // Compare into -1,0,1
9699 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9700 %{
9701   match(Set dst (CmpD3 src1 src2));
9702   effect(KILL cr);
9703 
9704   ins_cost(275);
9705   format %{ "ucomisd $src1, $src2\n\t"
9706             "movl    $dst, #-1\n\t"
9707             "jp,s    done\n\t"
9708             "jb,s    done\n\t"
9709             "setne   $dst\n\t"
9710             "movzbl  $dst, $dst\n"
9711     "done:" %}
9712   ins_encode %{
9713     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9714     emit_cmpfp3(_masm, $dst$$Register);
9715   %}
9716   ins_pipe(pipe_slow);
9717 %}
9718 
9719 // Compare into -1,0,1
9720 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9721 %{
9722   match(Set dst (CmpD3 src1 (LoadD src2)));
9723   effect(KILL cr);
9724 
9725   ins_cost(275);
9726   format %{ "ucomisd $src1, $src2\n\t"
9727             "movl    $dst, #-1\n\t"
9728             "jp,s    done\n\t"
9729             "jb,s    done\n\t"
9730             "setne   $dst\n\t"
9731             "movzbl  $dst, $dst\n"
9732     "done:" %}
9733   ins_encode %{
9734     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9735     emit_cmpfp3(_masm, $dst$$Register);
9736   %}
9737   ins_pipe(pipe_slow);
9738 %}
9739 
9740 // Compare into -1,0,1
9741 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
9742   match(Set dst (CmpD3 src con));
9743   effect(KILL cr);
9744 
9745   ins_cost(275);
9746   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9747             "movl    $dst, #-1\n\t"
9748             "jp,s    done\n\t"
9749             "jb,s    done\n\t"
9750             "setne   $dst\n\t"
9751             "movzbl  $dst, $dst\n"
9752     "done:" %}
9753   ins_encode %{
9754     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9755     emit_cmpfp3(_masm, $dst$$Register);
9756   %}
9757   ins_pipe(pipe_slow);
9758 %}
9759 
9760 // -----------Trig and Trancendental Instructions------------------------------
9761 instruct cosD_reg(regD dst) %{
9762   match(Set dst (CosD dst));
9763 
9764   format %{ "dcos   $dst\n\t" %}
9765   opcode(0xD9, 0xFF);
9766   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9767   ins_pipe( pipe_slow );
9768 %}
9769 
9770 instruct sinD_reg(regD dst) %{
9771   match(Set dst (SinD dst));
9772 
9773   format %{ "dsin   $dst\n\t" %}
9774   opcode(0xD9, 0xFE);
9775   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9776   ins_pipe( pipe_slow );
9777 %}
9778 
9779 instruct tanD_reg(regD dst) %{
9780   match(Set dst (TanD dst));
9781 
9782   format %{ "dtan   $dst\n\t" %}
9783   ins_encode( Push_SrcXD(dst),
9784               Opcode(0xD9), Opcode(0xF2),   //fptan
9785               Opcode(0xDD), Opcode(0xD8),   //fstp st
9786               Push_ResultXD(dst) );
9787   ins_pipe( pipe_slow );
9788 %}
9789 
9790 instruct log10D_reg(regD dst) %{
9791   // The source and result Double operands in XMM registers
9792   match(Set dst (Log10D dst));
9793   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9794   // fyl2x        ; compute log_10(2) * log_2(x)
9795   format %{ "fldlg2\t\t\t#Log10\n\t"
9796             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9797          %}
9798    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9799               Push_SrcXD(dst),
9800               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9801               Push_ResultXD(dst));
9802 
9803   ins_pipe( pipe_slow );
9804 %}
9805 
9806 instruct logD_reg(regD dst) %{
9807   // The source and result Double operands in XMM registers
9808   match(Set dst (LogD dst));
9809   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9810   // fyl2x        ; compute log_e(2) * log_2(x)
9811   format %{ "fldln2\t\t\t#Log_e\n\t"
9812             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9813          %}
9814   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9815               Push_SrcXD(dst),
9816               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9817               Push_ResultXD(dst));
9818   ins_pipe( pipe_slow );
9819 %}
9820 
9821 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9822   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9823   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9824   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9825   ins_encode %{
9826     __ subptr(rsp, 8);
9827     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9828     __ fld_d(Address(rsp, 0));
9829     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9830     __ fld_d(Address(rsp, 0));
9831     __ fast_pow();
9832     __ fstp_d(Address(rsp, 0));
9833     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9834     __ addptr(rsp, 8);
9835   %}
9836   ins_pipe( pipe_slow );
9837 %}
9838 
9839 instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9840   match(Set dst (ExpD src));
9841   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9842   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9843   ins_encode %{
9844     __ subptr(rsp, 8);
9845     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9846     __ fld_d(Address(rsp, 0));
9847     __ fast_exp();
9848     __ fstp_d(Address(rsp, 0));
9849     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9850     __ addptr(rsp, 8);
9851   %}
9852   ins_pipe( pipe_slow );
9853 %}
9854 
9855 //----------Arithmetic Conversion Instructions---------------------------------
9856 
9857 instruct roundFloat_nop(regF dst)
9858 %{
9859   match(Set dst (RoundFloat dst));
9860 
9861   ins_cost(0);
9862   ins_encode();
9863   ins_pipe(empty);
9864 %}
9865 
9866 instruct roundDouble_nop(regD dst)
9867 %{
9868   match(Set dst (RoundDouble dst));
9869 
9870   ins_cost(0);
9871   ins_encode();
9872   ins_pipe(empty);
9873 %}
9874 
9875 instruct convF2D_reg_reg(regD dst, regF src)
9876 %{
9877   match(Set dst (ConvF2D src));
9878 
9879   format %{ "cvtss2sd $dst, $src" %}
9880   ins_encode %{
9881     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
9882   %}
9883   ins_pipe(pipe_slow); // XXX
9884 %}
9885 
9886 instruct convF2D_reg_mem(regD dst, memory src)
9887 %{
9888   match(Set dst (ConvF2D (LoadF src)));
9889 
9890   format %{ "cvtss2sd $dst, $src" %}
9891   ins_encode %{
9892     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
9893   %}
9894   ins_pipe(pipe_slow); // XXX
9895 %}
9896 
9897 instruct convD2F_reg_reg(regF dst, regD src)
9898 %{
9899   match(Set dst (ConvD2F src));
9900 
9901   format %{ "cvtsd2ss $dst, $src" %}
9902   ins_encode %{
9903     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
9904   %}
9905   ins_pipe(pipe_slow); // XXX
9906 %}
9907 
9908 instruct convD2F_reg_mem(regF dst, memory src)
9909 %{
9910   match(Set dst (ConvD2F (LoadD src)));
9911 
9912   format %{ "cvtsd2ss $dst, $src" %}
9913   ins_encode %{
9914     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
9915   %}
9916   ins_pipe(pipe_slow); // XXX
9917 %}
9918 
9919 // XXX do mem variants
9920 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
9921 %{
9922   match(Set dst (ConvF2I src));
9923   effect(KILL cr);
9924 
9925   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
9926             "cmpl    $dst, #0x80000000\n\t"
9927             "jne,s   done\n\t"
9928             "subq    rsp, #8\n\t"
9929             "movss   [rsp], $src\n\t"
9930             "call    f2i_fixup\n\t"
9931             "popq    $dst\n"
9932     "done:   "%}
9933   ins_encode %{
9934     Label done;
9935     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
9936     __ cmpl($dst$$Register, 0x80000000);
9937     __ jccb(Assembler::notEqual, done);
9938     __ subptr(rsp, 8);
9939     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9940     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
9941     __ pop($dst$$Register);
9942     __ bind(done);
9943   %}
9944   ins_pipe(pipe_slow);
9945 %}
9946 
9947 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
9948 %{
9949   match(Set dst (ConvF2L src));
9950   effect(KILL cr);
9951 
9952   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
9953             "cmpq    $dst, [0x8000000000000000]\n\t"
9954             "jne,s   done\n\t"
9955             "subq    rsp, #8\n\t"
9956             "movss   [rsp], $src\n\t"
9957             "call    f2l_fixup\n\t"
9958             "popq    $dst\n"
9959     "done:   "%}
9960   ins_encode %{
9961     Label done;
9962     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
9963     __ cmp64($dst$$Register,
9964              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9965     __ jccb(Assembler::notEqual, done);
9966     __ subptr(rsp, 8);
9967     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9968     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
9969     __ pop($dst$$Register);
9970     __ bind(done);
9971   %}
9972   ins_pipe(pipe_slow);
9973 %}
9974 
9975 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
9976 %{
9977   match(Set dst (ConvD2I src));
9978   effect(KILL cr);
9979 
9980   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
9981             "cmpl    $dst, #0x80000000\n\t"
9982             "jne,s   done\n\t"
9983             "subq    rsp, #8\n\t"
9984             "movsd   [rsp], $src\n\t"
9985             "call    d2i_fixup\n\t"
9986             "popq    $dst\n"
9987     "done:   "%}
9988   ins_encode %{
9989     Label done;
9990     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
9991     __ cmpl($dst$$Register, 0x80000000);
9992     __ jccb(Assembler::notEqual, done);
9993     __ subptr(rsp, 8);
9994     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9995     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
9996     __ pop($dst$$Register);
9997     __ bind(done);
9998   %}
9999   ins_pipe(pipe_slow);
10000 %}
10001 
10002 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10003 %{
10004   match(Set dst (ConvD2L src));
10005   effect(KILL cr);
10006 
10007   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10008             "cmpq    $dst, [0x8000000000000000]\n\t"
10009             "jne,s   done\n\t"
10010             "subq    rsp, #8\n\t"
10011             "movsd   [rsp], $src\n\t"
10012             "call    d2l_fixup\n\t"
10013             "popq    $dst\n"
10014     "done:   "%}
10015   ins_encode %{
10016     Label done;
10017     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10018     __ cmp64($dst$$Register,
10019              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10020     __ jccb(Assembler::notEqual, done);
10021     __ subptr(rsp, 8);
10022     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10023     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10024     __ pop($dst$$Register);
10025     __ bind(done);
10026   %}
10027   ins_pipe(pipe_slow);
10028 %}
10029 
10030 instruct convI2F_reg_reg(regF dst, rRegI src)
10031 %{
10032   predicate(!UseXmmI2F);
10033   match(Set dst (ConvI2F src));
10034 
10035   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10036   ins_encode %{
10037     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10038   %}
10039   ins_pipe(pipe_slow); // XXX
10040 %}
10041 
10042 instruct convI2F_reg_mem(regF dst, memory src)
10043 %{
10044   match(Set dst (ConvI2F (LoadI src)));
10045 
10046   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10047   ins_encode %{
10048     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10049   %}
10050   ins_pipe(pipe_slow); // XXX
10051 %}
10052 
10053 instruct convI2D_reg_reg(regD dst, rRegI src)
10054 %{
10055   predicate(!UseXmmI2D);
10056   match(Set dst (ConvI2D src));
10057 
10058   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10059   ins_encode %{
10060     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10061   %}
10062   ins_pipe(pipe_slow); // XXX
10063 %}
10064 
10065 instruct convI2D_reg_mem(regD dst, memory src)
10066 %{
10067   match(Set dst (ConvI2D (LoadI src)));
10068 
10069   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10070   ins_encode %{
10071     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10072   %}
10073   ins_pipe(pipe_slow); // XXX
10074 %}
10075 
10076 instruct convXI2F_reg(regF dst, rRegI src)
10077 %{
10078   predicate(UseXmmI2F);
10079   match(Set dst (ConvI2F src));
10080 
10081   format %{ "movdl $dst, $src\n\t"
10082             "cvtdq2psl $dst, $dst\t# i2f" %}
10083   ins_encode %{
10084     __ movdl($dst$$XMMRegister, $src$$Register);
10085     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10086   %}
10087   ins_pipe(pipe_slow); // XXX
10088 %}
10089 
10090 instruct convXI2D_reg(regD dst, rRegI src)
10091 %{
10092   predicate(UseXmmI2D);
10093   match(Set dst (ConvI2D src));
10094 
10095   format %{ "movdl $dst, $src\n\t"
10096             "cvtdq2pdl $dst, $dst\t# i2d" %}
10097   ins_encode %{
10098     __ movdl($dst$$XMMRegister, $src$$Register);
10099     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10100   %}
10101   ins_pipe(pipe_slow); // XXX
10102 %}
10103 
10104 instruct convL2F_reg_reg(regF dst, rRegL src)
10105 %{
10106   match(Set dst (ConvL2F src));
10107 
10108   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10109   ins_encode %{
10110     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10111   %}
10112   ins_pipe(pipe_slow); // XXX
10113 %}
10114 
10115 instruct convL2F_reg_mem(regF dst, memory src)
10116 %{
10117   match(Set dst (ConvL2F (LoadL src)));
10118 
10119   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10120   ins_encode %{
10121     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10122   %}
10123   ins_pipe(pipe_slow); // XXX
10124 %}
10125 
10126 instruct convL2D_reg_reg(regD dst, rRegL src)
10127 %{
10128   match(Set dst (ConvL2D src));
10129 
10130   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10131   ins_encode %{
10132     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10133   %}
10134   ins_pipe(pipe_slow); // XXX
10135 %}
10136 
10137 instruct convL2D_reg_mem(regD dst, memory src)
10138 %{
10139   match(Set dst (ConvL2D (LoadL src)));
10140 
10141   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10142   ins_encode %{
10143     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10144   %}
10145   ins_pipe(pipe_slow); // XXX
10146 %}
10147 
10148 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10149 %{
10150   match(Set dst (ConvI2L src));
10151 
10152   ins_cost(125);
10153   format %{ "movslq  $dst, $src\t# i2l" %}
10154   ins_encode %{
10155     __ movslq($dst$$Register, $src$$Register);
10156   %}
10157   ins_pipe(ialu_reg_reg);
10158 %}
10159 
10160 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10161 // %{
10162 //   match(Set dst (ConvI2L src));
10163 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10164 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10165 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10166 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10167 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10168 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10169 
10170 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10171 //   ins_encode(enc_copy(dst, src));
10172 // //   opcode(0x63); // needs REX.W
10173 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10174 //   ins_pipe(ialu_reg_reg);
10175 // %}
10176 
10177 // Zero-extend convert int to long
10178 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10179 %{
10180   match(Set dst (AndL (ConvI2L src) mask));
10181 
10182   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10183   ins_encode %{
10184     if ($dst$$reg != $src$$reg) {
10185       __ movl($dst$$Register, $src$$Register);
10186     }
10187   %}
10188   ins_pipe(ialu_reg_reg);
10189 %}
10190 
10191 // Zero-extend convert int to long
10192 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10193 %{
10194   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10195 
10196   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10197   ins_encode %{
10198     __ movl($dst$$Register, $src$$Address);
10199   %}
10200   ins_pipe(ialu_reg_mem);
10201 %}
10202 
10203 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10204 %{
10205   match(Set dst (AndL src mask));
10206 
10207   format %{ "movl    $dst, $src\t# zero-extend long" %}
10208   ins_encode %{
10209     __ movl($dst$$Register, $src$$Register);
10210   %}
10211   ins_pipe(ialu_reg_reg);
10212 %}
10213 
10214 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10215 %{
10216   match(Set dst (ConvL2I src));
10217 
10218   format %{ "movl    $dst, $src\t# l2i" %}
10219   ins_encode %{
10220     __ movl($dst$$Register, $src$$Register);
10221   %}
10222   ins_pipe(ialu_reg_reg);
10223 %}
10224 
10225 
10226 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10227   match(Set dst (MoveF2I src));
10228   effect(DEF dst, USE src);
10229 
10230   ins_cost(125);
10231   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10232   ins_encode %{
10233     __ movl($dst$$Register, Address(rsp, $src$$disp));
10234   %}
10235   ins_pipe(ialu_reg_mem);
10236 %}
10237 
10238 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10239   match(Set dst (MoveI2F src));
10240   effect(DEF dst, USE src);
10241 
10242   ins_cost(125);
10243   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10244   ins_encode %{
10245     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10246   %}
10247   ins_pipe(pipe_slow);
10248 %}
10249 
10250 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10251   match(Set dst (MoveD2L src));
10252   effect(DEF dst, USE src);
10253 
10254   ins_cost(125);
10255   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10256   ins_encode %{
10257     __ movq($dst$$Register, Address(rsp, $src$$disp));
10258   %}
10259   ins_pipe(ialu_reg_mem);
10260 %}
10261 
10262 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10263   predicate(!UseXmmLoadAndClearUpper);
10264   match(Set dst (MoveL2D src));
10265   effect(DEF dst, USE src);
10266 
10267   ins_cost(125);
10268   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10269   ins_encode %{
10270     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10271   %}
10272   ins_pipe(pipe_slow);
10273 %}
10274 
10275 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10276   predicate(UseXmmLoadAndClearUpper);
10277   match(Set dst (MoveL2D src));
10278   effect(DEF dst, USE src);
10279 
10280   ins_cost(125);
10281   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10282   ins_encode %{
10283     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10284   %}
10285   ins_pipe(pipe_slow);
10286 %}
10287 
10288 
10289 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10290   match(Set dst (MoveF2I src));
10291   effect(DEF dst, USE src);
10292 
10293   ins_cost(95); // XXX
10294   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10295   ins_encode %{
10296     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10297   %}
10298   ins_pipe(pipe_slow);
10299 %}
10300 
10301 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10302   match(Set dst (MoveI2F src));
10303   effect(DEF dst, USE src);
10304 
10305   ins_cost(100);
10306   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10307   ins_encode %{
10308     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10309   %}
10310   ins_pipe( ialu_mem_reg );
10311 %}
10312 
10313 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10314   match(Set dst (MoveD2L src));
10315   effect(DEF dst, USE src);
10316 
10317   ins_cost(95); // XXX
10318   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10319   ins_encode %{
10320     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10321   %}
10322   ins_pipe(pipe_slow);
10323 %}
10324 
10325 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10326   match(Set dst (MoveL2D src));
10327   effect(DEF dst, USE src);
10328 
10329   ins_cost(100);
10330   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10331   ins_encode %{
10332     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10333   %}
10334   ins_pipe(ialu_mem_reg);
10335 %}
10336 
10337 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10338   match(Set dst (MoveF2I src));
10339   effect(DEF dst, USE src);
10340   ins_cost(85);
10341   format %{ "movd    $dst,$src\t# MoveF2I" %}
10342   ins_encode %{
10343     __ movdl($dst$$Register, $src$$XMMRegister);
10344   %}
10345   ins_pipe( pipe_slow );
10346 %}
10347 
10348 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10349   match(Set dst (MoveD2L src));
10350   effect(DEF dst, USE src);
10351   ins_cost(85);
10352   format %{ "movd    $dst,$src\t# MoveD2L" %}
10353   ins_encode %{
10354     __ movdq($dst$$Register, $src$$XMMRegister);
10355   %}
10356   ins_pipe( pipe_slow );
10357 %}
10358 
10359 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10360   match(Set dst (MoveI2F src));
10361   effect(DEF dst, USE src);
10362   ins_cost(100);
10363   format %{ "movd    $dst,$src\t# MoveI2F" %}
10364   ins_encode %{
10365     __ movdl($dst$$XMMRegister, $src$$Register);
10366   %}
10367   ins_pipe( pipe_slow );
10368 %}
10369 
10370 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10371   match(Set dst (MoveL2D src));
10372   effect(DEF dst, USE src);
10373   ins_cost(100);
10374   format %{ "movd    $dst,$src\t# MoveL2D" %}
10375   ins_encode %{
10376      __ movdq($dst$$XMMRegister, $src$$Register);
10377   %}
10378   ins_pipe( pipe_slow );
10379 %}
10380 
10381 
10382 // =======================================================================
10383 // fast clearing of an array
10384 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10385                   rFlagsReg cr)
10386 %{
10387   predicate(!UseFastStosb);
10388   match(Set dummy (ClearArray cnt base));
10389   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10390 
10391   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10392             "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
10393   ins_encode %{ 
10394     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10395   %}
10396   ins_pipe(pipe_slow);
10397 %}
10398 
10399 instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10400                         rFlagsReg cr)
10401 %{
10402   predicate(UseFastStosb);
10403   match(Set dummy (ClearArray cnt base));
10404   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10405   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10406             "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10407             "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
10408   ins_encode %{ 
10409     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10410   %}
10411   ins_pipe( pipe_slow );
10412 %}
10413 
10414 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10415                         rax_RegI result, regD tmp1, rFlagsReg cr)
10416 %{
10417   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10418   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10419 
10420   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10421   ins_encode %{
10422     __ string_compare($str1$$Register, $str2$$Register,
10423                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10424                       $tmp1$$XMMRegister);
10425   %}
10426   ins_pipe( pipe_slow );
10427 %}
10428 
10429 // fast search of substring with known size.
10430 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10431                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10432 %{
10433   predicate(UseSSE42Intrinsics);
10434   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10435   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10436 
10437   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10438   ins_encode %{
10439     int icnt2 = (int)$int_cnt2$$constant;
10440     if (icnt2 >= 8) {
10441       // IndexOf for constant substrings with size >= 8 elements
10442       // which don't need to be loaded through stack.
10443       __ string_indexofC8($str1$$Register, $str2$$Register,
10444                           $cnt1$$Register, $cnt2$$Register,
10445                           icnt2, $result$$Register,
10446                           $vec$$XMMRegister, $tmp$$Register);
10447     } else {
10448       // Small strings are loaded through stack if they cross page boundary.
10449       __ string_indexof($str1$$Register, $str2$$Register,
10450                         $cnt1$$Register, $cnt2$$Register,
10451                         icnt2, $result$$Register,
10452                         $vec$$XMMRegister, $tmp$$Register);
10453     }
10454   %}
10455   ins_pipe( pipe_slow );
10456 %}
10457 
10458 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10459                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10460 %{
10461   predicate(UseSSE42Intrinsics);
10462   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10463   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10464 
10465   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10466   ins_encode %{
10467     __ string_indexof($str1$$Register, $str2$$Register,
10468                       $cnt1$$Register, $cnt2$$Register,
10469                       (-1), $result$$Register,
10470                       $vec$$XMMRegister, $tmp$$Register);
10471   %}
10472   ins_pipe( pipe_slow );
10473 %}
10474 
10475 // fast string equals
10476 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10477                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10478 %{
10479   match(Set result (StrEquals (Binary str1 str2) cnt));
10480   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10481 
10482   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10483   ins_encode %{
10484     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10485                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10486                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10487   %}
10488   ins_pipe( pipe_slow );
10489 %}
10490 
10491 // fast array equals
10492 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10493                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10494 %{
10495   match(Set result (AryEq ary1 ary2));
10496   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10497   //ins_cost(300);
10498 
10499   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10500   ins_encode %{
10501     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10502                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10503                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10504   %}
10505   ins_pipe( pipe_slow );
10506 %}
10507 
10508 // encode char[] to byte[] in ISO_8859_1
10509 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
10510                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
10511                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
10512   match(Set result (EncodeISOArray src (Binary dst len)));
10513   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
10514 
10515   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
10516   ins_encode %{
10517     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
10518                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
10519                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
10520   %}
10521   ins_pipe( pipe_slow );
10522 %}
10523 
10524 
10525 //----------Control Flow Instructions------------------------------------------
10526 // Signed compare Instructions
10527 
10528 // XXX more variants!!
10529 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10530 %{
10531   match(Set cr (CmpI op1 op2));
10532   effect(DEF cr, USE op1, USE op2);
10533 
10534   format %{ "cmpl    $op1, $op2" %}
10535   opcode(0x3B);  /* Opcode 3B /r */
10536   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10537   ins_pipe(ialu_cr_reg_reg);
10538 %}
10539 
10540 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10541 %{
10542   match(Set cr (CmpI op1 op2));
10543 
10544   format %{ "cmpl    $op1, $op2" %}
10545   opcode(0x81, 0x07); /* Opcode 81 /7 */
10546   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10547   ins_pipe(ialu_cr_reg_imm);
10548 %}
10549 
10550 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10551 %{
10552   match(Set cr (CmpI op1 (LoadI op2)));
10553 
10554   ins_cost(500); // XXX
10555   format %{ "cmpl    $op1, $op2" %}
10556   opcode(0x3B); /* Opcode 3B /r */
10557   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10558   ins_pipe(ialu_cr_reg_mem);
10559 %}
10560 
10561 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10562 %{
10563   match(Set cr (CmpI src zero));
10564 
10565   format %{ "testl   $src, $src" %}
10566   opcode(0x85);
10567   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10568   ins_pipe(ialu_cr_reg_imm);
10569 %}
10570 
10571 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10572 %{
10573   match(Set cr (CmpI (AndI src con) zero));
10574 
10575   format %{ "testl   $src, $con" %}
10576   opcode(0xF7, 0x00);
10577   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10578   ins_pipe(ialu_cr_reg_imm);
10579 %}
10580 
10581 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10582 %{
10583   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10584 
10585   format %{ "testl   $src, $mem" %}
10586   opcode(0x85);
10587   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10588   ins_pipe(ialu_cr_reg_mem);
10589 %}
10590 
10591 // Unsigned compare Instructions; really, same as signed except they
10592 // produce an rFlagsRegU instead of rFlagsReg.
10593 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10594 %{
10595   match(Set cr (CmpU op1 op2));
10596 
10597   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10598   opcode(0x3B); /* Opcode 3B /r */
10599   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10600   ins_pipe(ialu_cr_reg_reg);
10601 %}
10602 
10603 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10604 %{
10605   match(Set cr (CmpU op1 op2));
10606 
10607   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10608   opcode(0x81,0x07); /* Opcode 81 /7 */
10609   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10610   ins_pipe(ialu_cr_reg_imm);
10611 %}
10612 
10613 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10614 %{
10615   match(Set cr (CmpU op1 (LoadI op2)));
10616 
10617   ins_cost(500); // XXX
10618   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10619   opcode(0x3B); /* Opcode 3B /r */
10620   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10621   ins_pipe(ialu_cr_reg_mem);
10622 %}
10623 
10624 // // // Cisc-spilled version of cmpU_rReg
10625 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10626 // //%{
10627 // //  match(Set cr (CmpU (LoadI op1) op2));
10628 // //
10629 // //  format %{ "CMPu   $op1,$op2" %}
10630 // //  ins_cost(500);
10631 // //  opcode(0x39);  /* Opcode 39 /r */
10632 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10633 // //%}
10634 
10635 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10636 %{
10637   match(Set cr (CmpU src zero));
10638 
10639   format %{ "testl  $src, $src\t# unsigned" %}
10640   opcode(0x85);
10641   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10642   ins_pipe(ialu_cr_reg_imm);
10643 %}
10644 
10645 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10646 %{
10647   match(Set cr (CmpP op1 op2));
10648 
10649   format %{ "cmpq    $op1, $op2\t# ptr" %}
10650   opcode(0x3B); /* Opcode 3B /r */
10651   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10652   ins_pipe(ialu_cr_reg_reg);
10653 %}
10654 
10655 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10656 %{
10657   match(Set cr (CmpP op1 (LoadP op2)));
10658 
10659   ins_cost(500); // XXX
10660   format %{ "cmpq    $op1, $op2\t# ptr" %}
10661   opcode(0x3B); /* Opcode 3B /r */
10662   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10663   ins_pipe(ialu_cr_reg_mem);
10664 %}
10665 
10666 // // // Cisc-spilled version of cmpP_rReg
10667 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10668 // //%{
10669 // //  match(Set cr (CmpP (LoadP op1) op2));
10670 // //
10671 // //  format %{ "CMPu   $op1,$op2" %}
10672 // //  ins_cost(500);
10673 // //  opcode(0x39);  /* Opcode 39 /r */
10674 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10675 // //%}
10676 
10677 // XXX this is generalized by compP_rReg_mem???
10678 // Compare raw pointer (used in out-of-heap check).
10679 // Only works because non-oop pointers must be raw pointers
10680 // and raw pointers have no anti-dependencies.
10681 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10682 %{
10683   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
10684   match(Set cr (CmpP op1 (LoadP op2)));
10685 
10686   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10687   opcode(0x3B); /* Opcode 3B /r */
10688   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10689   ins_pipe(ialu_cr_reg_mem);
10690 %}
10691 
10692 // This will generate a signed flags result. This should be OK since
10693 // any compare to a zero should be eq/neq.
10694 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10695 %{
10696   match(Set cr (CmpP src zero));
10697 
10698   format %{ "testq   $src, $src\t# ptr" %}
10699   opcode(0x85);
10700   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10701   ins_pipe(ialu_cr_reg_imm);
10702 %}
10703 
10704 // This will generate a signed flags result. This should be OK since
10705 // any compare to a zero should be eq/neq.
10706 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
10707 %{
10708   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
10709   match(Set cr (CmpP (LoadP op) zero));
10710 
10711   ins_cost(500); // XXX
10712   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10713   opcode(0xF7); /* Opcode F7 /0 */
10714   ins_encode(REX_mem_wide(op),
10715              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10716   ins_pipe(ialu_cr_reg_imm);
10717 %}
10718 
10719 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
10720 %{
10721   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
10722   match(Set cr (CmpP (LoadP mem) zero));
10723 
10724   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
10725   ins_encode %{
10726     __ cmpq(r12, $mem$$Address);
10727   %}
10728   ins_pipe(ialu_cr_reg_mem);
10729 %}
10730 
10731 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
10732 %{
10733   match(Set cr (CmpN op1 op2));
10734 
10735   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10736   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
10737   ins_pipe(ialu_cr_reg_reg);
10738 %}
10739 
10740 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
10741 %{
10742   match(Set cr (CmpN src (LoadN mem)));
10743 
10744   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
10745   ins_encode %{
10746     __ cmpl($src$$Register, $mem$$Address);
10747   %}
10748   ins_pipe(ialu_cr_reg_mem);
10749 %}
10750 
10751 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
10752   match(Set cr (CmpN op1 op2));
10753 
10754   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10755   ins_encode %{
10756     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
10757   %}
10758   ins_pipe(ialu_cr_reg_imm);
10759 %}
10760 
10761 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
10762 %{
10763   match(Set cr (CmpN src (LoadN mem)));
10764 
10765   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
10766   ins_encode %{
10767     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
10768   %}
10769   ins_pipe(ialu_cr_reg_mem);
10770 %}
10771 
10772 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
10773   match(Set cr (CmpN op1 op2));
10774 
10775   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
10776   ins_encode %{
10777     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
10778   %}
10779   ins_pipe(ialu_cr_reg_imm);
10780 %}
10781 
10782 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
10783 %{
10784   match(Set cr (CmpN src (LoadNKlass mem)));
10785 
10786   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
10787   ins_encode %{
10788     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
10789   %}
10790   ins_pipe(ialu_cr_reg_mem);
10791 %}
10792 
10793 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
10794   match(Set cr (CmpN src zero));
10795 
10796   format %{ "testl   $src, $src\t# compressed ptr" %}
10797   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
10798   ins_pipe(ialu_cr_reg_imm);
10799 %}
10800 
10801 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
10802 %{
10803   predicate(Universe::narrow_oop_base() != NULL);
10804   match(Set cr (CmpN (LoadN mem) zero));
10805 
10806   ins_cost(500); // XXX
10807   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
10808   ins_encode %{
10809     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
10810   %}
10811   ins_pipe(ialu_cr_reg_mem);
10812 %}
10813 
10814 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
10815 %{
10816   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
10817   match(Set cr (CmpN (LoadN mem) zero));
10818 
10819   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
10820   ins_encode %{
10821     __ cmpl(r12, $mem$$Address);
10822   %}
10823   ins_pipe(ialu_cr_reg_mem);
10824 %}
10825 
10826 // Yanked all unsigned pointer compare operations.
10827 // Pointer compares are done with CmpP which is already unsigned.
10828 
10829 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10830 %{
10831   match(Set cr (CmpL op1 op2));
10832 
10833   format %{ "cmpq    $op1, $op2" %}
10834   opcode(0x3B);  /* Opcode 3B /r */
10835   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10836   ins_pipe(ialu_cr_reg_reg);
10837 %}
10838 
10839 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10840 %{
10841   match(Set cr (CmpL op1 op2));
10842 
10843   format %{ "cmpq    $op1, $op2" %}
10844   opcode(0x81, 0x07); /* Opcode 81 /7 */
10845   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10846   ins_pipe(ialu_cr_reg_imm);
10847 %}
10848 
10849 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10850 %{
10851   match(Set cr (CmpL op1 (LoadL op2)));
10852 
10853   format %{ "cmpq    $op1, $op2" %}
10854   opcode(0x3B); /* Opcode 3B /r */
10855   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10856   ins_pipe(ialu_cr_reg_mem);
10857 %}
10858 
10859 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10860 %{
10861   match(Set cr (CmpL src zero));
10862 
10863   format %{ "testq   $src, $src" %}
10864   opcode(0x85);
10865   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10866   ins_pipe(ialu_cr_reg_imm);
10867 %}
10868 
10869 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
10870 %{
10871   match(Set cr (CmpL (AndL src con) zero));
10872 
10873   format %{ "testq   $src, $con\t# long" %}
10874   opcode(0xF7, 0x00);
10875   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
10876   ins_pipe(ialu_cr_reg_imm);
10877 %}
10878 
10879 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
10880 %{
10881   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
10882 
10883   format %{ "testq   $src, $mem" %}
10884   opcode(0x85);
10885   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
10886   ins_pipe(ialu_cr_reg_mem);
10887 %}
10888 
10889 // Manifest a CmpL result in an integer register.  Very painful.
10890 // This is the test to avoid.
10891 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
10892 %{
10893   match(Set dst (CmpL3 src1 src2));
10894   effect(KILL flags);
10895 
10896   ins_cost(275); // XXX
10897   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
10898             "movl    $dst, -1\n\t"
10899             "jl,s    done\n\t"
10900             "setne   $dst\n\t"
10901             "movzbl  $dst, $dst\n\t"
10902     "done:" %}
10903   ins_encode(cmpl3_flag(src1, src2, dst));
10904   ins_pipe(pipe_slow);
10905 %}
10906 
10907 //----------Max and Min--------------------------------------------------------
10908 // Min Instructions
10909 
10910 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
10911 %{
10912   effect(USE_DEF dst, USE src, USE cr);
10913 
10914   format %{ "cmovlgt $dst, $src\t# min" %}
10915   opcode(0x0F, 0x4F);
10916   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10917   ins_pipe(pipe_cmov_reg);
10918 %}
10919 
10920 
10921 instruct minI_rReg(rRegI dst, rRegI src)
10922 %{
10923   match(Set dst (MinI dst src));
10924 
10925   ins_cost(200);
10926   expand %{
10927     rFlagsReg cr;
10928     compI_rReg(cr, dst, src);
10929     cmovI_reg_g(dst, src, cr);
10930   %}
10931 %}
10932 
10933 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
10934 %{
10935   effect(USE_DEF dst, USE src, USE cr);
10936 
10937   format %{ "cmovllt $dst, $src\t# max" %}
10938   opcode(0x0F, 0x4C);
10939   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10940   ins_pipe(pipe_cmov_reg);
10941 %}
10942 
10943 
10944 instruct maxI_rReg(rRegI dst, rRegI src)
10945 %{
10946   match(Set dst (MaxI dst src));
10947 
10948   ins_cost(200);
10949   expand %{
10950     rFlagsReg cr;
10951     compI_rReg(cr, dst, src);
10952     cmovI_reg_l(dst, src, cr);
10953   %}
10954 %}
10955 
10956 // ============================================================================
10957 // Branch Instructions
10958 
10959 // Jump Direct - Label defines a relative address from JMP+1
10960 instruct jmpDir(label labl)
10961 %{
10962   match(Goto);
10963   effect(USE labl);
10964 
10965   ins_cost(300);
10966   format %{ "jmp     $labl" %}
10967   size(5);
10968   ins_encode %{
10969     Label* L = $labl$$label;
10970     __ jmp(*L, false); // Always long jump
10971   %}
10972   ins_pipe(pipe_jmp);
10973 %}
10974 
10975 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10976 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
10977 %{
10978   match(If cop cr);
10979   effect(USE labl);
10980 
10981   ins_cost(300);
10982   format %{ "j$cop     $labl" %}
10983   size(6);
10984   ins_encode %{
10985     Label* L = $labl$$label;
10986     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
10987   %}
10988   ins_pipe(pipe_jcc);
10989 %}
10990 
10991 // Jump Direct Conditional - Label defines a relative address from Jcc+1
10992 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
10993 %{
10994   match(CountedLoopEnd cop cr);
10995   effect(USE labl);
10996 
10997   ins_cost(300);
10998   format %{ "j$cop     $labl\t# loop end" %}
10999   size(6);
11000   ins_encode %{
11001     Label* L = $labl$$label;
11002     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11003   %}
11004   ins_pipe(pipe_jcc);
11005 %}
11006 
11007 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11008 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11009   match(CountedLoopEnd cop cmp);
11010   effect(USE labl);
11011 
11012   ins_cost(300);
11013   format %{ "j$cop,u   $labl\t# loop end" %}
11014   size(6);
11015   ins_encode %{
11016     Label* L = $labl$$label;
11017     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11018   %}
11019   ins_pipe(pipe_jcc);
11020 %}
11021 
11022 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11023   match(CountedLoopEnd cop cmp);
11024   effect(USE labl);
11025 
11026   ins_cost(200);
11027   format %{ "j$cop,u   $labl\t# loop end" %}
11028   size(6);
11029   ins_encode %{
11030     Label* L = $labl$$label;
11031     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11032   %}
11033   ins_pipe(pipe_jcc);
11034 %}
11035 
11036 // Jump Direct Conditional - using unsigned comparison
11037 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11038   match(If cop cmp);
11039   effect(USE labl);
11040 
11041   ins_cost(300);
11042   format %{ "j$cop,u  $labl" %}
11043   size(6);
11044   ins_encode %{
11045     Label* L = $labl$$label;
11046     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11047   %}
11048   ins_pipe(pipe_jcc);
11049 %}
11050 
11051 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11052   match(If cop cmp);
11053   effect(USE labl);
11054 
11055   ins_cost(200);
11056   format %{ "j$cop,u  $labl" %}
11057   size(6);
11058   ins_encode %{
11059     Label* L = $labl$$label;
11060     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11061   %}
11062   ins_pipe(pipe_jcc);
11063 %}
11064 
11065 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11066   match(If cop cmp);
11067   effect(USE labl);
11068 
11069   ins_cost(200);
11070   format %{ $$template
11071     if ($cop$$cmpcode == Assembler::notEqual) {
11072       $$emit$$"jp,u   $labl\n\t"
11073       $$emit$$"j$cop,u   $labl"
11074     } else {
11075       $$emit$$"jp,u   done\n\t"
11076       $$emit$$"j$cop,u   $labl\n\t"
11077       $$emit$$"done:"
11078     }
11079   %}
11080   ins_encode %{
11081     Label* l = $labl$$label;
11082     if ($cop$$cmpcode == Assembler::notEqual) {
11083       __ jcc(Assembler::parity, *l, false);
11084       __ jcc(Assembler::notEqual, *l, false);
11085     } else if ($cop$$cmpcode == Assembler::equal) {
11086       Label done;
11087       __ jccb(Assembler::parity, done);
11088       __ jcc(Assembler::equal, *l, false);
11089       __ bind(done);
11090     } else {
11091        ShouldNotReachHere();
11092     }
11093   %}
11094   ins_pipe(pipe_jcc);
11095 %}
11096 
11097 // ============================================================================
11098 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11099 // superklass array for an instance of the superklass.  Set a hidden
11100 // internal cache on a hit (cache is checked with exposed code in
11101 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11102 // encoding ALSO sets flags.
11103 
11104 instruct partialSubtypeCheck(rdi_RegP result,
11105                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11106                              rFlagsReg cr)
11107 %{
11108   match(Set result (PartialSubtypeCheck sub super));
11109   effect(KILL rcx, KILL cr);
11110 
11111   ins_cost(1100);  // slightly larger than the next version
11112   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11113             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11114             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11115             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11116             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11117             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11118             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11119     "miss:\t" %}
11120 
11121   opcode(0x1); // Force a XOR of RDI
11122   ins_encode(enc_PartialSubtypeCheck());
11123   ins_pipe(pipe_slow);
11124 %}
11125 
11126 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11127                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11128                                      immP0 zero,
11129                                      rdi_RegP result)
11130 %{
11131   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11132   effect(KILL rcx, KILL result);
11133 
11134   ins_cost(1000);
11135   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11136             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11137             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11138             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11139             "jne,s   miss\t\t# Missed: flags nz\n\t"
11140             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11141     "miss:\t" %}
11142 
11143   opcode(0x0); // No need to XOR RDI
11144   ins_encode(enc_PartialSubtypeCheck());
11145   ins_pipe(pipe_slow);
11146 %}
11147 
11148 // ============================================================================
11149 // Branch Instructions -- short offset versions
11150 //
11151 // These instructions are used to replace jumps of a long offset (the default
11152 // match) with jumps of a shorter offset.  These instructions are all tagged
11153 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11154 // match rules in general matching.  Instead, the ADLC generates a conversion
11155 // method in the MachNode which can be used to do in-place replacement of the
11156 // long variant with the shorter variant.  The compiler will determine if a
11157 // branch can be taken by the is_short_branch_offset() predicate in the machine
11158 // specific code section of the file.
11159 
11160 // Jump Direct - Label defines a relative address from JMP+1
11161 instruct jmpDir_short(label labl) %{
11162   match(Goto);
11163   effect(USE labl);
11164 
11165   ins_cost(300);
11166   format %{ "jmp,s   $labl" %}
11167   size(2);
11168   ins_encode %{
11169     Label* L = $labl$$label;
11170     __ jmpb(*L);
11171   %}
11172   ins_pipe(pipe_jmp);
11173   ins_short_branch(1);
11174 %}
11175 
11176 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11177 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11178   match(If cop cr);
11179   effect(USE labl);
11180 
11181   ins_cost(300);
11182   format %{ "j$cop,s   $labl" %}
11183   size(2);
11184   ins_encode %{
11185     Label* L = $labl$$label;
11186     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11187   %}
11188   ins_pipe(pipe_jcc);
11189   ins_short_branch(1);
11190 %}
11191 
11192 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11193 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11194   match(CountedLoopEnd cop cr);
11195   effect(USE labl);
11196 
11197   ins_cost(300);
11198   format %{ "j$cop,s   $labl\t# loop end" %}
11199   size(2);
11200   ins_encode %{
11201     Label* L = $labl$$label;
11202     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11203   %}
11204   ins_pipe(pipe_jcc);
11205   ins_short_branch(1);
11206 %}
11207 
11208 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11209 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11210   match(CountedLoopEnd cop cmp);
11211   effect(USE labl);
11212 
11213   ins_cost(300);
11214   format %{ "j$cop,us  $labl\t# loop end" %}
11215   size(2);
11216   ins_encode %{
11217     Label* L = $labl$$label;
11218     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11219   %}
11220   ins_pipe(pipe_jcc);
11221   ins_short_branch(1);
11222 %}
11223 
11224 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11225   match(CountedLoopEnd cop cmp);
11226   effect(USE labl);
11227 
11228   ins_cost(300);
11229   format %{ "j$cop,us  $labl\t# loop end" %}
11230   size(2);
11231   ins_encode %{
11232     Label* L = $labl$$label;
11233     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11234   %}
11235   ins_pipe(pipe_jcc);
11236   ins_short_branch(1);
11237 %}
11238 
11239 // Jump Direct Conditional - using unsigned comparison
11240 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11241   match(If cop cmp);
11242   effect(USE labl);
11243 
11244   ins_cost(300);
11245   format %{ "j$cop,us  $labl" %}
11246   size(2);
11247   ins_encode %{
11248     Label* L = $labl$$label;
11249     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11250   %}
11251   ins_pipe(pipe_jcc);
11252   ins_short_branch(1);
11253 %}
11254 
11255 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11256   match(If cop cmp);
11257   effect(USE labl);
11258 
11259   ins_cost(300);
11260   format %{ "j$cop,us  $labl" %}
11261   size(2);
11262   ins_encode %{
11263     Label* L = $labl$$label;
11264     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11265   %}
11266   ins_pipe(pipe_jcc);
11267   ins_short_branch(1);
11268 %}
11269 
11270 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11271   match(If cop cmp);
11272   effect(USE labl);
11273 
11274   ins_cost(300);
11275   format %{ $$template
11276     if ($cop$$cmpcode == Assembler::notEqual) {
11277       $$emit$$"jp,u,s   $labl\n\t"
11278       $$emit$$"j$cop,u,s   $labl"
11279     } else {
11280       $$emit$$"jp,u,s   done\n\t"
11281       $$emit$$"j$cop,u,s  $labl\n\t"
11282       $$emit$$"done:"
11283     }
11284   %}
11285   size(4);
11286   ins_encode %{
11287     Label* l = $labl$$label;
11288     if ($cop$$cmpcode == Assembler::notEqual) {
11289       __ jccb(Assembler::parity, *l);
11290       __ jccb(Assembler::notEqual, *l);
11291     } else if ($cop$$cmpcode == Assembler::equal) {
11292       Label done;
11293       __ jccb(Assembler::parity, done);
11294       __ jccb(Assembler::equal, *l);
11295       __ bind(done);
11296     } else {
11297        ShouldNotReachHere();
11298     }
11299   %}
11300   ins_pipe(pipe_jcc);
11301   ins_short_branch(1);
11302 %}
11303 
11304 // ============================================================================
11305 // inlined locking and unlocking
11306 
11307 instruct cmpFastLock(rFlagsReg cr,
11308                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11309 %{
11310   match(Set cr (FastLock object box));
11311   effect(TEMP tmp, TEMP scr, USE_KILL box);
11312 
11313   ins_cost(300);
11314   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11315   ins_encode(Fast_Lock(object, box, tmp, scr));
11316   ins_pipe(pipe_slow);
11317 %}
11318 
11319 instruct cmpFastUnlock(rFlagsReg cr,
11320                        rRegP object, rax_RegP box, rRegP tmp)
11321 %{
11322   match(Set cr (FastUnlock object box));
11323   effect(TEMP tmp, USE_KILL box);
11324 
11325   ins_cost(300);
11326   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11327   ins_encode(Fast_Unlock(object, box, tmp));
11328   ins_pipe(pipe_slow);
11329 %}
11330 
11331 
11332 // ============================================================================
11333 // Safepoint Instructions
11334 instruct safePoint_poll(rFlagsReg cr)
11335 %{
11336   predicate(!Assembler::is_polling_page_far());
11337   match(SafePoint);
11338   effect(KILL cr);
11339 
11340   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11341             "# Safepoint: poll for GC" %}
11342   ins_cost(125);
11343   ins_encode %{
11344     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11345     __ testl(rax, addr);
11346   %}
11347   ins_pipe(ialu_reg_mem);
11348 %}
11349 
11350 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11351 %{
11352   predicate(Assembler::is_polling_page_far());
11353   match(SafePoint poll);
11354   effect(KILL cr, USE poll);
11355 
11356   format %{ "testl  rax, [$poll]\t"
11357             "# Safepoint: poll for GC" %}
11358   ins_cost(125);
11359   ins_encode %{
11360     __ relocate(relocInfo::poll_type);
11361     __ testl(rax, Address($poll$$Register, 0));
11362   %}
11363   ins_pipe(ialu_reg_mem);
11364 %}
11365 
11366 // ============================================================================
11367 // Procedure Call/Return Instructions
11368 // Call Java Static Instruction
11369 // Note: If this code changes, the corresponding ret_addr_offset() and
11370 //       compute_padding() functions will have to be adjusted.
11371 instruct CallStaticJavaDirect(method meth) %{
11372   match(CallStaticJava);
11373   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11374   effect(USE meth);
11375 
11376   ins_cost(300);
11377   format %{ "call,static " %}
11378   opcode(0xE8); /* E8 cd */
11379   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
11380   ins_pipe(pipe_slow);
11381   ins_alignment(4);
11382 %}
11383 
11384 // Call Java Static Instruction (method handle version)
11385 // Note: If this code changes, the corresponding ret_addr_offset() and
11386 //       compute_padding() functions will have to be adjusted.
11387 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11388   match(CallStaticJava);
11389   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11390   effect(USE meth);
11391   // RBP is saved by all callees (for interpreter stack correction).
11392   // We use it here for a similar purpose, in {preserve,restore}_SP.
11393 
11394   ins_cost(300);
11395   format %{ "call,static/MethodHandle " %}
11396   opcode(0xE8); /* E8 cd */
11397   ins_encode(clear_avx, preserve_SP,
11398              Java_Static_Call(meth),
11399              restore_SP,
11400              call_epilog);
11401   ins_pipe(pipe_slow);
11402   ins_alignment(4);
11403 %}
11404 
11405 // Call Java Dynamic Instruction
11406 // Note: If this code changes, the corresponding ret_addr_offset() and
11407 //       compute_padding() functions will have to be adjusted.
11408 instruct CallDynamicJavaDirect(method meth)
11409 %{
11410   match(CallDynamicJava);
11411   effect(USE meth);
11412 
11413   ins_cost(300);
11414   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11415             "call,dynamic " %}
11416   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
11417   ins_pipe(pipe_slow);
11418   ins_alignment(4);
11419 %}
11420 
11421 // Call Runtime Instruction
11422 instruct CallRuntimeDirect(method meth)
11423 %{
11424   match(CallRuntime);
11425   effect(USE meth);
11426 
11427   ins_cost(300);
11428   format %{ "call,runtime " %}
11429   ins_encode(clear_avx, Java_To_Runtime(meth));
11430   ins_pipe(pipe_slow);
11431 %}
11432 
11433 // Call runtime without safepoint
11434 instruct CallLeafDirect(method meth)
11435 %{
11436   match(CallLeaf);
11437   effect(USE meth);
11438 
11439   ins_cost(300);
11440   format %{ "call_leaf,runtime " %}
11441   ins_encode(clear_avx, Java_To_Runtime(meth));
11442   ins_pipe(pipe_slow);
11443 %}
11444 
11445 // Call runtime without safepoint
11446 instruct CallLeafNoFPDirect(method meth)
11447 %{
11448   match(CallLeafNoFP);
11449   effect(USE meth);
11450 
11451   ins_cost(300);
11452   format %{ "call_leaf_nofp,runtime " %}
11453   ins_encode(Java_To_Runtime(meth));
11454   ins_pipe(pipe_slow);
11455 %}
11456 
11457 // Return Instruction
11458 // Remove the return address & jump to it.
11459 // Notice: We always emit a nop after a ret to make sure there is room
11460 // for safepoint patching
11461 instruct Ret()
11462 %{
11463   match(Return);
11464 
11465   format %{ "ret" %}
11466   opcode(0xC3);
11467   ins_encode(OpcP);
11468   ins_pipe(pipe_jmp);
11469 %}
11470 
11471 // Tail Call; Jump from runtime stub to Java code.
11472 // Also known as an 'interprocedural jump'.
11473 // Target of jump will eventually return to caller.
11474 // TailJump below removes the return address.
11475 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11476 %{
11477   match(TailCall jump_target method_oop);
11478 
11479   ins_cost(300);
11480   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11481   opcode(0xFF, 0x4); /* Opcode FF /4 */
11482   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11483   ins_pipe(pipe_jmp);
11484 %}
11485 
11486 // Tail Jump; remove the return address; jump to target.
11487 // TailCall above leaves the return address around.
11488 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11489 %{
11490   match(TailJump jump_target ex_oop);
11491 
11492   ins_cost(300);
11493   format %{ "popq    rdx\t# pop return address\n\t"
11494             "jmp     $jump_target" %}
11495   opcode(0xFF, 0x4); /* Opcode FF /4 */
11496   ins_encode(Opcode(0x5a), // popq rdx
11497              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11498   ins_pipe(pipe_jmp);
11499 %}
11500 
11501 // Create exception oop: created by stack-crawling runtime code.
11502 // Created exception is now available to this handler, and is setup
11503 // just prior to jumping to this handler.  No code emitted.
11504 instruct CreateException(rax_RegP ex_oop)
11505 %{
11506   match(Set ex_oop (CreateEx));
11507 
11508   size(0);
11509   // use the following format syntax
11510   format %{ "# exception oop is in rax; no code emitted" %}
11511   ins_encode();
11512   ins_pipe(empty);
11513 %}
11514 
11515 // Rethrow exception:
11516 // The exception oop will come in the first argument position.
11517 // Then JUMP (not call) to the rethrow stub code.
11518 instruct RethrowException()
11519 %{
11520   match(Rethrow);
11521 
11522   // use the following format syntax
11523   format %{ "jmp     rethrow_stub" %}
11524   ins_encode(enc_rethrow);
11525   ins_pipe(pipe_jmp);
11526 %}
11527 
11528 
11529 // ============================================================================
11530 // This name is KNOWN by the ADLC and cannot be changed.
11531 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
11532 // for this guy.
11533 instruct tlsLoadP(r15_RegP dst) %{
11534   match(Set dst (ThreadLocal));
11535   effect(DEF dst);
11536 
11537   size(0);
11538   format %{ "# TLS is in R15" %}
11539   ins_encode( /*empty encoding*/ );
11540   ins_pipe(ialu_reg_reg);
11541 %}
11542 
11543 
11544 //----------PEEPHOLE RULES-----------------------------------------------------
11545 // These must follow all instruction definitions as they use the names
11546 // defined in the instructions definitions.
11547 //
11548 // peepmatch ( root_instr_name [preceding_instruction]* );
11549 //
11550 // peepconstraint %{
11551 // (instruction_number.operand_name relational_op instruction_number.operand_name
11552 //  [, ...] );
11553 // // instruction numbers are zero-based using left to right order in peepmatch
11554 //
11555 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11556 // // provide an instruction_number.operand_name for each operand that appears
11557 // // in the replacement instruction's match rule
11558 //
11559 // ---------VM FLAGS---------------------------------------------------------
11560 //
11561 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11562 //
11563 // Each peephole rule is given an identifying number starting with zero and
11564 // increasing by one in the order seen by the parser.  An individual peephole
11565 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11566 // on the command-line.
11567 //
11568 // ---------CURRENT LIMITATIONS----------------------------------------------
11569 //
11570 // Only match adjacent instructions in same basic block
11571 // Only equality constraints
11572 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11573 // Only one replacement instruction
11574 //
11575 // ---------EXAMPLE----------------------------------------------------------
11576 //
11577 // // pertinent parts of existing instructions in architecture description
11578 // instruct movI(rRegI dst, rRegI src)
11579 // %{
11580 //   match(Set dst (CopyI src));
11581 // %}
11582 //
11583 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11584 // %{
11585 //   match(Set dst (AddI dst src));
11586 //   effect(KILL cr);
11587 // %}
11588 //
11589 // // Change (inc mov) to lea
11590 // peephole %{
11591 //   // increment preceeded by register-register move
11592 //   peepmatch ( incI_rReg movI );
11593 //   // require that the destination register of the increment
11594 //   // match the destination register of the move
11595 //   peepconstraint ( 0.dst == 1.dst );
11596 //   // construct a replacement instruction that sets
11597 //   // the destination to ( move's source register + one )
11598 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11599 // %}
11600 //
11601 
11602 // Implementation no longer uses movX instructions since
11603 // machine-independent system no longer uses CopyX nodes.
11604 //
11605 // peephole
11606 // %{
11607 //   peepmatch (incI_rReg movI);
11608 //   peepconstraint (0.dst == 1.dst);
11609 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11610 // %}
11611 
11612 // peephole
11613 // %{
11614 //   peepmatch (decI_rReg movI);
11615 //   peepconstraint (0.dst == 1.dst);
11616 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11617 // %}
11618 
11619 // peephole
11620 // %{
11621 //   peepmatch (addI_rReg_imm movI);
11622 //   peepconstraint (0.dst == 1.dst);
11623 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11624 // %}
11625 
11626 // peephole
11627 // %{
11628 //   peepmatch (incL_rReg movL);
11629 //   peepconstraint (0.dst == 1.dst);
11630 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11631 // %}
11632 
11633 // peephole
11634 // %{
11635 //   peepmatch (decL_rReg movL);
11636 //   peepconstraint (0.dst == 1.dst);
11637 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11638 // %}
11639 
11640 // peephole
11641 // %{
11642 //   peepmatch (addL_rReg_imm movL);
11643 //   peepconstraint (0.dst == 1.dst);
11644 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11645 // %}
11646 
11647 // peephole
11648 // %{
11649 //   peepmatch (addP_rReg_imm movP);
11650 //   peepconstraint (0.dst == 1.dst);
11651 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11652 // %}
11653 
11654 // // Change load of spilled value to only a spill
11655 // instruct storeI(memory mem, rRegI src)
11656 // %{
11657 //   match(Set mem (StoreI mem src));
11658 // %}
11659 //
11660 // instruct loadI(rRegI dst, memory mem)
11661 // %{
11662 //   match(Set dst (LoadI mem));
11663 // %}
11664 //
11665 
11666 peephole
11667 %{
11668   peepmatch (loadI storeI);
11669   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11670   peepreplace (storeI(1.mem 1.mem 1.src));
11671 %}
11672 
11673 peephole
11674 %{
11675   peepmatch (loadL storeL);
11676   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11677   peepreplace (storeL(1.mem 1.mem 1.src));
11678 %}
11679 
11680 //----------SMARTSPILL RULES---------------------------------------------------
11681 // These must follow all instruction definitions as they use the names
11682 // defined in the instructions definitions.