1 //
   2 // Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Class for all pointer registers (including RSP)
 170 reg_class any_reg(RAX, RAX_H,
 171                   RDX, RDX_H,
 172                   RBP, RBP_H,
 173                   RDI, RDI_H,
 174                   RSI, RSI_H,
 175                   RCX, RCX_H,
 176                   RBX, RBX_H,
 177                   RSP, RSP_H,
 178                   R8,  R8_H,
 179                   R9,  R9_H,
 180                   R10, R10_H,
 181                   R11, R11_H,
 182                   R12, R12_H,
 183                   R13, R13_H,
 184                   R14, R14_H,
 185                   R15, R15_H);
 186 
 187 // Class for all pointer registers except RSP
 188 reg_class ptr_reg(RAX, RAX_H,
 189                   RDX, RDX_H,
 190                   RBP, RBP_H,
 191                   RDI, RDI_H,
 192                   RSI, RSI_H,
 193                   RCX, RCX_H,
 194                   RBX, RBX_H,
 195                   R8,  R8_H,
 196                   R9,  R9_H,
 197                   R10, R10_H,
 198                   R11, R11_H,
 199                   R13, R13_H,
 200                   R14, R14_H);
 201 
 202 // Class for all pointer registers except RAX and RSP
 203 reg_class ptr_no_rax_reg(RDX, RDX_H,
 204                          RBP, RBP_H,
 205                          RDI, RDI_H,
 206                          RSI, RSI_H,
 207                          RCX, RCX_H,
 208                          RBX, RBX_H,
 209                          R8,  R8_H,
 210                          R9,  R9_H,
 211                          R10, R10_H,
 212                          R11, R11_H,
 213                          R13, R13_H,
 214                          R14, R14_H);
 215 
 216 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 217                          RAX, RAX_H,
 218                          RDI, RDI_H,
 219                          RSI, RSI_H,
 220                          RCX, RCX_H,
 221                          RBX, RBX_H,
 222                          R8,  R8_H,
 223                          R9,  R9_H,
 224                          R10, R10_H,
 225                          R11, R11_H,
 226                          R13, R13_H,
 227                          R14, R14_H);
 228 
 229 // Class for all pointer registers except RAX, RBX and RSP
 230 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 231                              RBP, RBP_H,
 232                              RDI, RDI_H,
 233                              RSI, RSI_H,
 234                              RCX, RCX_H,
 235                              R8,  R8_H,
 236                              R9,  R9_H,
 237                              R10, R10_H,
 238                              R11, R11_H,
 239                              R13, R13_H,
 240                              R14, R14_H);
 241 
 242 // Singleton class for RAX pointer register
 243 reg_class ptr_rax_reg(RAX, RAX_H);
 244 
 245 // Singleton class for RBX pointer register
 246 reg_class ptr_rbx_reg(RBX, RBX_H);
 247 
 248 // Singleton class for RSI pointer register
 249 reg_class ptr_rsi_reg(RSI, RSI_H);
 250 
 251 // Singleton class for RDI pointer register
 252 reg_class ptr_rdi_reg(RDI, RDI_H);
 253 
 254 // Singleton class for RBP pointer register
 255 reg_class ptr_rbp_reg(RBP, RBP_H);
 256 
 257 // Singleton class for stack pointer
 258 reg_class ptr_rsp_reg(RSP, RSP_H);
 259 
 260 // Singleton class for TLS pointer
 261 reg_class ptr_r15_reg(R15, R15_H);
 262 
 263 // Class for all long registers (except RSP)
 264 reg_class long_reg(RAX, RAX_H,
 265                    RDX, RDX_H,
 266                    RBP, RBP_H,
 267                    RDI, RDI_H,
 268                    RSI, RSI_H,
 269                    RCX, RCX_H,
 270                    RBX, RBX_H,
 271                    R8,  R8_H,
 272                    R9,  R9_H,
 273                    R10, R10_H,
 274                    R11, R11_H,
 275                    R13, R13_H,
 276                    R14, R14_H);
 277 
 278 // Class for all long registers except RAX, RDX (and RSP)
 279 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 280                               RDI, RDI_H,
 281                               RSI, RSI_H,
 282                               RCX, RCX_H,
 283                               RBX, RBX_H,
 284                               R8,  R8_H,
 285                               R9,  R9_H,
 286                               R10, R10_H,
 287                               R11, R11_H,
 288                               R13, R13_H,
 289                               R14, R14_H);
 290 
 291 // Class for all long registers except RCX (and RSP)
 292 reg_class long_no_rcx_reg(RBP, RBP_H,
 293                           RDI, RDI_H,
 294                           RSI, RSI_H,
 295                           RAX, RAX_H,
 296                           RDX, RDX_H,
 297                           RBX, RBX_H,
 298                           R8,  R8_H,
 299                           R9,  R9_H,
 300                           R10, R10_H,
 301                           R11, R11_H,
 302                           R13, R13_H,
 303                           R14, R14_H);
 304 
 305 // Class for all long registers except RAX (and RSP)
 306 reg_class long_no_rax_reg(RBP, RBP_H,
 307                           RDX, RDX_H,
 308                           RDI, RDI_H,
 309                           RSI, RSI_H,
 310                           RCX, RCX_H,
 311                           RBX, RBX_H,
 312                           R8,  R8_H,
 313                           R9,  R9_H,
 314                           R10, R10_H,
 315                           R11, R11_H,
 316                           R13, R13_H,
 317                           R14, R14_H);
 318 
 319 // Singleton class for RAX long register
 320 reg_class long_rax_reg(RAX, RAX_H);
 321 
 322 // Singleton class for RCX long register
 323 reg_class long_rcx_reg(RCX, RCX_H);
 324 
 325 // Singleton class for RDX long register
 326 reg_class long_rdx_reg(RDX, RDX_H);
 327 
 328 // Class for all int registers (except RSP)
 329 reg_class int_reg(RAX,
 330                   RDX,
 331                   RBP,
 332                   RDI,
 333                   RSI,
 334                   RCX,
 335                   RBX,
 336                   R8,
 337                   R9,
 338                   R10,
 339                   R11,
 340                   R13,
 341                   R14);
 342 
 343 // Class for all int registers except RCX (and RSP)
 344 reg_class int_no_rcx_reg(RAX,
 345                          RDX,
 346                          RBP,
 347                          RDI,
 348                          RSI,
 349                          RBX,
 350                          R8,
 351                          R9,
 352                          R10,
 353                          R11,
 354                          R13,
 355                          R14);
 356 
 357 // Class for all int registers except RAX, RDX (and RSP)
 358 reg_class int_no_rax_rdx_reg(RBP,
 359                              RDI,
 360                              RSI,
 361                              RCX,
 362                              RBX,
 363                              R8,
 364                              R9,
 365                              R10,
 366                              R11,
 367                              R13,
 368                              R14);
 369 
 370 // Singleton class for RAX int register
 371 reg_class int_rax_reg(RAX);
 372 
 373 // Singleton class for RBX int register
 374 reg_class int_rbx_reg(RBX);
 375 
 376 // Singleton class for RCX int register
 377 reg_class int_rcx_reg(RCX);
 378 
 379 // Singleton class for RCX int register
 380 reg_class int_rdx_reg(RDX);
 381 
 382 // Singleton class for RCX int register
 383 reg_class int_rdi_reg(RDI);
 384 
 385 // Singleton class for instruction pointer
 386 // reg_class ip_reg(RIP);
 387 
 388 %}
 389 
 390 //----------SOURCE BLOCK-------------------------------------------------------
 391 // This is a block of C++ code which provides values, functions, and
 392 // definitions necessary in the rest of the architecture description
 393 source %{
 394 #define   RELOC_IMM64    Assembler::imm_operand
 395 #define   RELOC_DISP32   Assembler::disp32_operand
 396 
 397 #define __ _masm.
 398 
 399 static int preserve_SP_size() {
 400   return 3;  // rex.w, op, rm(reg/reg)
 401 }
 402 static int clear_avx_size() {
 403   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 404 }
 405 
 406 // !!!!! Special hack to get all types of calls to specify the byte offset
 407 //       from the start of the call to the point where the return address
 408 //       will point.
 409 int MachCallStaticJavaNode::ret_addr_offset()
 410 {
 411   int offset = 5; // 5 bytes from start of call to where return address points
 412   offset += clear_avx_size();
 413   if (_method_handle_invoke)
 414     offset += preserve_SP_size();
 415   return offset;
 416 }
 417 
 418 int MachCallDynamicJavaNode::ret_addr_offset()
 419 {
 420   int offset = 15; // 15 bytes from start of call to where return address points
 421   offset += clear_avx_size();
 422   return offset;
 423 }
 424 
 425 int MachCallRuntimeNode::ret_addr_offset() {
 426   int offset = 13; // movq r10,#addr; callq (r10)
 427   offset += clear_avx_size();
 428   return offset;
 429 }
 430 
 431 // Indicate if the safepoint node needs the polling page as an input,
 432 // it does if the polling page is more than disp32 away.
 433 bool SafePointNode::needs_polling_address_input()
 434 {
 435   return Assembler::is_polling_page_far();
 436 }
 437 
 438 //
 439 // Compute padding required for nodes which need alignment
 440 //
 441 
 442 // The address of the call instruction needs to be 4-byte aligned to
 443 // ensure that it does not span a cache line so that it can be patched.
 444 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 445 {
 446   current_offset += clear_avx_size(); // skip vzeroupper
 447   current_offset += 1; // skip call opcode byte
 448   return round_to(current_offset, alignment_required()) - current_offset;
 449 }
 450 
 451 // The address of the call instruction needs to be 4-byte aligned to
 452 // ensure that it does not span a cache line so that it can be patched.
 453 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 454 {
 455   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 456   current_offset += clear_avx_size(); // skip vzeroupper
 457   current_offset += 1; // skip call opcode byte
 458   return round_to(current_offset, alignment_required()) - current_offset;
 459 }
 460 
 461 // The address of the call instruction needs to be 4-byte aligned to
 462 // ensure that it does not span a cache line so that it can be patched.
 463 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 464 {
 465   current_offset += clear_avx_size(); // skip vzeroupper
 466   current_offset += 11; // skip movq instruction + call opcode byte
 467   return round_to(current_offset, alignment_required()) - current_offset;
 468 }
 469 
 470 // EMIT_RM()
 471 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 472   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 473   cbuf.insts()->emit_int8(c);
 474 }
 475 
 476 // EMIT_CC()
 477 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 478   unsigned char c = (unsigned char) (f1 | f2);
 479   cbuf.insts()->emit_int8(c);
 480 }
 481 
 482 // EMIT_OPCODE()
 483 void emit_opcode(CodeBuffer &cbuf, int code) {
 484   cbuf.insts()->emit_int8((unsigned char) code);
 485 }
 486 
 487 // EMIT_OPCODE() w/ relocation information
 488 void emit_opcode(CodeBuffer &cbuf,
 489                  int code, relocInfo::relocType reloc, int offset, int format)
 490 {
 491   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 492   emit_opcode(cbuf, code);
 493 }
 494 
 495 // EMIT_D8()
 496 void emit_d8(CodeBuffer &cbuf, int d8) {
 497   cbuf.insts()->emit_int8((unsigned char) d8);
 498 }
 499 
 500 // EMIT_D16()
 501 void emit_d16(CodeBuffer &cbuf, int d16) {
 502   cbuf.insts()->emit_int16(d16);
 503 }
 504 
 505 // EMIT_D32()
 506 void emit_d32(CodeBuffer &cbuf, int d32) {
 507   cbuf.insts()->emit_int32(d32);
 508 }
 509 
 510 // EMIT_D64()
 511 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 512   cbuf.insts()->emit_int64(d64);
 513 }
 514 
 515 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 516 void emit_d32_reloc(CodeBuffer& cbuf,
 517                     int d32,
 518                     relocInfo::relocType reloc,
 519                     int format)
 520 {
 521   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 522   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 523   cbuf.insts()->emit_int32(d32);
 524 }
 525 
 526 // emit 32 bit value and construct relocation entry from RelocationHolder
 527 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 528 #ifdef ASSERT
 529   if (rspec.reloc()->type() == relocInfo::oop_type &&
 530       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 531     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 532     assert(cast_to_oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 533   }
 534 #endif
 535   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 536   cbuf.insts()->emit_int32(d32);
 537 }
 538 
 539 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 540   address next_ip = cbuf.insts_end() + 4;
 541   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 542                  external_word_Relocation::spec(addr),
 543                  RELOC_DISP32);
 544 }
 545 
 546 
 547 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 548 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 549   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 550   cbuf.insts()->emit_int64(d64);
 551 }
 552 
 553 // emit 64 bit value and construct relocation entry from RelocationHolder
 554 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 555 #ifdef ASSERT
 556   if (rspec.reloc()->type() == relocInfo::oop_type &&
 557       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 558     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 559     assert(cast_to_oop(d64)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d64)->is_scavengable()),
 560            "cannot embed scavengable oops in code");
 561   }
 562 #endif
 563   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 564   cbuf.insts()->emit_int64(d64);
 565 }
 566 
 567 // Access stack slot for load or store
 568 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 569 {
 570   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 571   if (-0x80 <= disp && disp < 0x80) {
 572     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 573     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 574     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 575   } else {
 576     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 577     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 578     emit_d32(cbuf, disp);     // Displacement // R/M byte
 579   }
 580 }
 581 
 582    // rRegI ereg, memory mem) %{    // emit_reg_mem
 583 void encode_RegMem(CodeBuffer &cbuf,
 584                    int reg,
 585                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 586 {
 587   assert(disp_reloc == relocInfo::none, "cannot have disp");
 588   int regenc = reg & 7;
 589   int baseenc = base & 7;
 590   int indexenc = index & 7;
 591 
 592   // There is no index & no scale, use form without SIB byte
 593   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 594     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 595     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 596       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 597     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 598       // If 8-bit displacement, mode 0x1
 599       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 600       emit_d8(cbuf, disp);
 601     } else {
 602       // If 32-bit displacement
 603       if (base == -1) { // Special flag for absolute address
 604         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 605         if (disp_reloc != relocInfo::none) {
 606           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 607         } else {
 608           emit_d32(cbuf, disp);
 609         }
 610       } else {
 611         // Normal base + offset
 612         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 613         if (disp_reloc != relocInfo::none) {
 614           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 615         } else {
 616           emit_d32(cbuf, disp);
 617         }
 618       }
 619     }
 620   } else {
 621     // Else, encode with the SIB byte
 622     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 623     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 624       // If no displacement
 625       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 626       emit_rm(cbuf, scale, indexenc, baseenc);
 627     } else {
 628       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 629         // If 8-bit displacement, mode 0x1
 630         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 631         emit_rm(cbuf, scale, indexenc, baseenc);
 632         emit_d8(cbuf, disp);
 633       } else {
 634         // If 32-bit displacement
 635         if (base == 0x04 ) {
 636           emit_rm(cbuf, 0x2, regenc, 0x4);
 637           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 638         } else {
 639           emit_rm(cbuf, 0x2, regenc, 0x4);
 640           emit_rm(cbuf, scale, indexenc, baseenc); // *
 641         }
 642         if (disp_reloc != relocInfo::none) {
 643           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 644         } else {
 645           emit_d32(cbuf, disp);
 646         }
 647       }
 648     }
 649   }
 650 }
 651 
 652 // This could be in MacroAssembler but it's fairly C2 specific
 653 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 654   Label exit;
 655   __ jccb(Assembler::noParity, exit);
 656   __ pushf();
 657   //
 658   // comiss/ucomiss instructions set ZF,PF,CF flags and
 659   // zero OF,AF,SF for NaN values.
 660   // Fixup flags by zeroing ZF,PF so that compare of NaN
 661   // values returns 'less than' result (CF is set).
 662   // Leave the rest of flags unchanged.
 663   //
 664   //    7 6 5 4 3 2 1 0
 665   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 666   //    0 0 1 0 1 0 1 1   (0x2B)
 667   //
 668   __ andq(Address(rsp, 0), 0xffffff2b);
 669   __ popf();
 670   __ bind(exit);
 671 }
 672 
 673 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 674   Label done;
 675   __ movl(dst, -1);
 676   __ jcc(Assembler::parity, done);
 677   __ jcc(Assembler::below, done);
 678   __ setb(Assembler::notEqual, dst);
 679   __ movzbl(dst, dst);
 680   __ bind(done);
 681 }
 682 
 683 
 684 //=============================================================================
 685 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 686 
 687 int Compile::ConstantTable::calculate_table_base_offset() const {
 688   return 0;  // absolute addressing, no offset
 689 }
 690 
 691 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 692 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 693   ShouldNotReachHere();
 694 }
 695 
 696 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 697   // Empty encoding
 698 }
 699 
 700 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 701   return 0;
 702 }
 703 
 704 #ifndef PRODUCT
 705 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 706   st->print("# MachConstantBaseNode (empty encoding)");
 707 }
 708 #endif
 709 
 710 
 711 //=============================================================================
 712 #ifndef PRODUCT
 713 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 714   Compile* C = ra_->C;
 715 
 716   int framesize = C->frame_slots() << LogBytesPerInt;
 717   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 718   // Remove wordSize for return addr which is already pushed.
 719   framesize -= wordSize;
 720 
 721   if (C->need_stack_bang(framesize)) {
 722     framesize -= wordSize;
 723     st->print("# stack bang");
 724     st->print("\n\t");
 725     st->print("pushq   rbp\t# Save rbp");
 726     if (framesize) {
 727       st->print("\n\t");
 728       st->print("subq    rsp, #%d\t# Create frame",framesize);
 729     }
 730   } else {
 731     st->print("subq    rsp, #%d\t# Create frame",framesize);
 732     st->print("\n\t");
 733     framesize -= wordSize;
 734     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 735   }
 736 
 737   if (VerifyStackAtCalls) {
 738     st->print("\n\t");
 739     framesize -= wordSize;
 740     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 741 #ifdef ASSERT
 742     st->print("\n\t");
 743     st->print("# stack alignment check");
 744 #endif
 745   }
 746   st->cr();
 747 }
 748 #endif
 749 
 750 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 751   Compile* C = ra_->C;
 752   MacroAssembler _masm(&cbuf);
 753 
 754   int framesize = C->frame_slots() << LogBytesPerInt;
 755 
 756   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 757 
 758   C->set_frame_complete(cbuf.insts_size());
 759 
 760   if (C->has_mach_constant_base_node()) {
 761     // NOTE: We set the table base offset here because users might be
 762     // emitted before MachConstantBaseNode.
 763     Compile::ConstantTable& constant_table = C->constant_table();
 764     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 765   }
 766 }
 767 
 768 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 769 {
 770   return MachNode::size(ra_); // too many variables; just compute it
 771                               // the hard way
 772 }
 773 
 774 int MachPrologNode::reloc() const
 775 {
 776   return 0; // a large enough number
 777 }
 778 
 779 //=============================================================================
 780 #ifndef PRODUCT
 781 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 782 {
 783   Compile* C = ra_->C;
 784   if (C->max_vector_size() > 16) {
 785     st->print("vzeroupper");
 786     st->cr(); st->print("\t");
 787   }
 788 
 789   int framesize = C->frame_slots() << LogBytesPerInt;
 790   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 791   // Remove word for return adr already pushed
 792   // and RBP
 793   framesize -= 2*wordSize;
 794 
 795   if (framesize) {
 796     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 797     st->print("\t");
 798   }
 799 
 800   st->print_cr("popq   rbp");
 801   if (do_polling() && C->is_method_compilation()) {
 802     st->print("\t");
 803     if (Assembler::is_polling_page_far()) {
 804       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 805                    "testl  rax, [rscratch1]\t"
 806                    "# Safepoint: poll for GC");
 807     } else {
 808       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 809                    "# Safepoint: poll for GC");
 810     }
 811   }
 812 }
 813 #endif
 814 
 815 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 816 {
 817   Compile* C = ra_->C;
 818   if (C->max_vector_size() > 16) {
 819     // Clear upper bits of YMM registers when current compiled code uses
 820     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 821     MacroAssembler _masm(&cbuf);
 822     __ vzeroupper();
 823   }
 824 
 825   int framesize = C->frame_slots() << LogBytesPerInt;
 826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 827   // Remove word for return adr already pushed
 828   // and RBP
 829   framesize -= 2*wordSize;
 830 
 831   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 832 
 833   if (framesize) {
 834     emit_opcode(cbuf, Assembler::REX_W);
 835     if (framesize < 0x80) {
 836       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 837       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 838       emit_d8(cbuf, framesize);
 839     } else {
 840       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 841       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 842       emit_d32(cbuf, framesize);
 843     }
 844   }
 845 
 846   // popq rbp
 847   emit_opcode(cbuf, 0x58 | RBP_enc);
 848 
 849   if (do_polling() && C->is_method_compilation()) {
 850     MacroAssembler _masm(&cbuf);
 851     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 852     if (Assembler::is_polling_page_far()) {
 853       __ lea(rscratch1, polling_page);
 854       __ relocate(relocInfo::poll_return_type);
 855       __ testl(rax, Address(rscratch1, 0));
 856     } else {
 857       __ testl(rax, polling_page);
 858     }
 859   }
 860 }
 861 
 862 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 863 {
 864   return MachNode::size(ra_); // too many variables; just compute it
 865                               // the hard way
 866 }
 867 
 868 int MachEpilogNode::reloc() const
 869 {
 870   return 2; // a large enough number
 871 }
 872 
 873 const Pipeline* MachEpilogNode::pipeline() const
 874 {
 875   return MachNode::pipeline_class();
 876 }
 877 
 878 int MachEpilogNode::safepoint_offset() const
 879 {
 880   return 0;
 881 }
 882 
 883 //=============================================================================
 884 
 885 enum RC {
 886   rc_bad,
 887   rc_int,
 888   rc_float,
 889   rc_stack
 890 };
 891 
 892 static enum RC rc_class(OptoReg::Name reg)
 893 {
 894   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 895 
 896   if (OptoReg::is_stack(reg)) return rc_stack;
 897 
 898   VMReg r = OptoReg::as_VMReg(reg);
 899 
 900   if (r->is_Register()) return rc_int;
 901 
 902   assert(r->is_XMMRegister(), "must be");
 903   return rc_float;
 904 }
 905 
 906 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 907 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 908                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 909 
 910 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 911                             int stack_offset, int reg, uint ireg, outputStream* st);
 912 
 913 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 914                                       int dst_offset, uint ireg, outputStream* st) {
 915   if (cbuf) {
 916     MacroAssembler _masm(cbuf);
 917     switch (ireg) {
 918     case Op_VecS:
 919       __ movq(Address(rsp, -8), rax);
 920       __ movl(rax, Address(rsp, src_offset));
 921       __ movl(Address(rsp, dst_offset), rax);
 922       __ movq(rax, Address(rsp, -8));
 923       break;
 924     case Op_VecD:
 925       __ pushq(Address(rsp, src_offset));
 926       __ popq (Address(rsp, dst_offset));
 927       break;
 928     case Op_VecX:
 929       __ pushq(Address(rsp, src_offset));
 930       __ popq (Address(rsp, dst_offset));
 931       __ pushq(Address(rsp, src_offset+8));
 932       __ popq (Address(rsp, dst_offset+8));
 933       break;
 934     case Op_VecY:
 935       __ vmovdqu(Address(rsp, -32), xmm0);
 936       __ vmovdqu(xmm0, Address(rsp, src_offset));
 937       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 938       __ vmovdqu(xmm0, Address(rsp, -32));
 939       break;
 940     default:
 941       ShouldNotReachHere();
 942     }
 943 #ifndef PRODUCT
 944   } else {
 945     switch (ireg) {
 946     case Op_VecS:
 947       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 948                 "movl    rax, [rsp + #%d]\n\t"
 949                 "movl    [rsp + #%d], rax\n\t"
 950                 "movq    rax, [rsp - #8]",
 951                 src_offset, dst_offset);
 952       break;
 953     case Op_VecD:
 954       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 955                 "popq    [rsp + #%d]",
 956                 src_offset, dst_offset);
 957       break;
 958      case Op_VecX:
 959       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 960                 "popq    [rsp + #%d]\n\t"
 961                 "pushq   [rsp + #%d]\n\t"
 962                 "popq    [rsp + #%d]",
 963                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 964       break;
 965     case Op_VecY:
 966       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 967                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 968                 "vmovdqu [rsp + #%d], xmm0\n\t"
 969                 "vmovdqu xmm0, [rsp - #32]",
 970                 src_offset, dst_offset);
 971       break;
 972     default:
 973       ShouldNotReachHere();
 974     }
 975 #endif
 976   }
 977 }
 978 
 979 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 980                                        PhaseRegAlloc* ra_,
 981                                        bool do_size,
 982                                        outputStream* st) const {
 983   assert(cbuf != NULL || st  != NULL, "sanity");
 984   // Get registers to move
 985   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 986   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 987   OptoReg::Name dst_second = ra_->get_reg_second(this);
 988   OptoReg::Name dst_first = ra_->get_reg_first(this);
 989 
 990   enum RC src_second_rc = rc_class(src_second);
 991   enum RC src_first_rc = rc_class(src_first);
 992   enum RC dst_second_rc = rc_class(dst_second);
 993   enum RC dst_first_rc = rc_class(dst_first);
 994 
 995   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 996          "must move at least 1 register" );
 997 
 998   if (src_first == dst_first && src_second == dst_second) {
 999     // Self copy, no move
1000     return 0;
1001   }
1002   if (bottom_type()->isa_vect() != NULL) {
1003     uint ireg = ideal_reg();
1004     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1005     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1006     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1007       // mem -> mem
1008       int src_offset = ra_->reg2offset(src_first);
1009       int dst_offset = ra_->reg2offset(dst_first);
1010       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1011     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1012       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1013     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1014       int stack_offset = ra_->reg2offset(dst_first);
1015       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1016     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1017       int stack_offset = ra_->reg2offset(src_first);
1018       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1019     } else {
1020       ShouldNotReachHere();
1021     }
1022     return 0;
1023   }
1024   if (src_first_rc == rc_stack) {
1025     // mem ->
1026     if (dst_first_rc == rc_stack) {
1027       // mem -> mem
1028       assert(src_second != dst_first, "overlap");
1029       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1030           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1031         // 64-bit
1032         int src_offset = ra_->reg2offset(src_first);
1033         int dst_offset = ra_->reg2offset(dst_first);
1034         if (cbuf) {
1035           MacroAssembler _masm(cbuf);
1036           __ pushq(Address(rsp, src_offset));
1037           __ popq (Address(rsp, dst_offset));
1038 #ifndef PRODUCT
1039         } else {
1040           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1041                     "popq    [rsp + #%d]",
1042                      src_offset, dst_offset);
1043 #endif
1044         }
1045       } else {
1046         // 32-bit
1047         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1048         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1049         // No pushl/popl, so:
1050         int src_offset = ra_->reg2offset(src_first);
1051         int dst_offset = ra_->reg2offset(dst_first);
1052         if (cbuf) {
1053           MacroAssembler _masm(cbuf);
1054           __ movq(Address(rsp, -8), rax);
1055           __ movl(rax, Address(rsp, src_offset));
1056           __ movl(Address(rsp, dst_offset), rax);
1057           __ movq(rax, Address(rsp, -8));
1058 #ifndef PRODUCT
1059         } else {
1060           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1061                     "movl    rax, [rsp + #%d]\n\t"
1062                     "movl    [rsp + #%d], rax\n\t"
1063                     "movq    rax, [rsp - #8]",
1064                      src_offset, dst_offset);
1065 #endif
1066         }
1067       }
1068       return 0;
1069     } else if (dst_first_rc == rc_int) {
1070       // mem -> gpr
1071       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1072           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1073         // 64-bit
1074         int offset = ra_->reg2offset(src_first);
1075         if (cbuf) {
1076           MacroAssembler _masm(cbuf);
1077           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1078 #ifndef PRODUCT
1079         } else {
1080           st->print("movq    %s, [rsp + #%d]\t# spill",
1081                      Matcher::regName[dst_first],
1082                      offset);
1083 #endif
1084         }
1085       } else {
1086         // 32-bit
1087         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1088         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1089         int offset = ra_->reg2offset(src_first);
1090         if (cbuf) {
1091           MacroAssembler _masm(cbuf);
1092           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1093 #ifndef PRODUCT
1094         } else {
1095           st->print("movl    %s, [rsp + #%d]\t# spill",
1096                      Matcher::regName[dst_first],
1097                      offset);
1098 #endif
1099         }
1100       }
1101       return 0;
1102     } else if (dst_first_rc == rc_float) {
1103       // mem-> xmm
1104       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1105           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1106         // 64-bit
1107         int offset = ra_->reg2offset(src_first);
1108         if (cbuf) {
1109           MacroAssembler _masm(cbuf);
1110           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1111 #ifndef PRODUCT
1112         } else {
1113           st->print("%s  %s, [rsp + #%d]\t# spill",
1114                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1115                      Matcher::regName[dst_first],
1116                      offset);
1117 #endif
1118         }
1119       } else {
1120         // 32-bit
1121         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1122         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1123         int offset = ra_->reg2offset(src_first);
1124         if (cbuf) {
1125           MacroAssembler _masm(cbuf);
1126           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1127 #ifndef PRODUCT
1128         } else {
1129           st->print("movss   %s, [rsp + #%d]\t# spill",
1130                      Matcher::regName[dst_first],
1131                      offset);
1132 #endif
1133         }
1134       }
1135       return 0;
1136     }
1137   } else if (src_first_rc == rc_int) {
1138     // gpr ->
1139     if (dst_first_rc == rc_stack) {
1140       // gpr -> mem
1141       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1142           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1143         // 64-bit
1144         int offset = ra_->reg2offset(dst_first);
1145         if (cbuf) {
1146           MacroAssembler _masm(cbuf);
1147           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1148 #ifndef PRODUCT
1149         } else {
1150           st->print("movq    [rsp + #%d], %s\t# spill",
1151                      offset,
1152                      Matcher::regName[src_first]);
1153 #endif
1154         }
1155       } else {
1156         // 32-bit
1157         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1158         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1159         int offset = ra_->reg2offset(dst_first);
1160         if (cbuf) {
1161           MacroAssembler _masm(cbuf);
1162           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1163 #ifndef PRODUCT
1164         } else {
1165           st->print("movl    [rsp + #%d], %s\t# spill",
1166                      offset,
1167                      Matcher::regName[src_first]);
1168 #endif
1169         }
1170       }
1171       return 0;
1172     } else if (dst_first_rc == rc_int) {
1173       // gpr -> gpr
1174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1176         // 64-bit
1177         if (cbuf) {
1178           MacroAssembler _masm(cbuf);
1179           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1180                   as_Register(Matcher::_regEncode[src_first]));
1181 #ifndef PRODUCT
1182         } else {
1183           st->print("movq    %s, %s\t# spill",
1184                      Matcher::regName[dst_first],
1185                      Matcher::regName[src_first]);
1186 #endif
1187         }
1188         return 0;
1189       } else {
1190         // 32-bit
1191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1193         if (cbuf) {
1194           MacroAssembler _masm(cbuf);
1195           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1196                   as_Register(Matcher::_regEncode[src_first]));
1197 #ifndef PRODUCT
1198         } else {
1199           st->print("movl    %s, %s\t# spill",
1200                      Matcher::regName[dst_first],
1201                      Matcher::regName[src_first]);
1202 #endif
1203         }
1204         return 0;
1205       }
1206     } else if (dst_first_rc == rc_float) {
1207       // gpr -> xmm
1208       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1209           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1210         // 64-bit
1211         if (cbuf) {
1212           MacroAssembler _masm(cbuf);
1213           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1214 #ifndef PRODUCT
1215         } else {
1216           st->print("movdq   %s, %s\t# spill",
1217                      Matcher::regName[dst_first],
1218                      Matcher::regName[src_first]);
1219 #endif
1220         }
1221       } else {
1222         // 32-bit
1223         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1224         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1225         if (cbuf) {
1226           MacroAssembler _masm(cbuf);
1227           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1228 #ifndef PRODUCT
1229         } else {
1230           st->print("movdl   %s, %s\t# spill",
1231                      Matcher::regName[dst_first],
1232                      Matcher::regName[src_first]);
1233 #endif
1234         }
1235       }
1236       return 0;
1237     }
1238   } else if (src_first_rc == rc_float) {
1239     // xmm ->
1240     if (dst_first_rc == rc_stack) {
1241       // xmm -> mem
1242       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1243           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1244         // 64-bit
1245         int offset = ra_->reg2offset(dst_first);
1246         if (cbuf) {
1247           MacroAssembler _masm(cbuf);
1248           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1249 #ifndef PRODUCT
1250         } else {
1251           st->print("movsd   [rsp + #%d], %s\t# spill",
1252                      offset,
1253                      Matcher::regName[src_first]);
1254 #endif
1255         }
1256       } else {
1257         // 32-bit
1258         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1259         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1260         int offset = ra_->reg2offset(dst_first);
1261         if (cbuf) {
1262           MacroAssembler _masm(cbuf);
1263           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1264 #ifndef PRODUCT
1265         } else {
1266           st->print("movss   [rsp + #%d], %s\t# spill",
1267                      offset,
1268                      Matcher::regName[src_first]);
1269 #endif
1270         }
1271       }
1272       return 0;
1273     } else if (dst_first_rc == rc_int) {
1274       // xmm -> gpr
1275       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1276           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1277         // 64-bit
1278         if (cbuf) {
1279           MacroAssembler _masm(cbuf);
1280           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1281 #ifndef PRODUCT
1282         } else {
1283           st->print("movdq   %s, %s\t# spill",
1284                      Matcher::regName[dst_first],
1285                      Matcher::regName[src_first]);
1286 #endif
1287         }
1288       } else {
1289         // 32-bit
1290         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1291         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1292         if (cbuf) {
1293           MacroAssembler _masm(cbuf);
1294           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1295 #ifndef PRODUCT
1296         } else {
1297           st->print("movdl   %s, %s\t# spill",
1298                      Matcher::regName[dst_first],
1299                      Matcher::regName[src_first]);
1300 #endif
1301         }
1302       }
1303       return 0;
1304     } else if (dst_first_rc == rc_float) {
1305       // xmm -> xmm
1306       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1307           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1308         // 64-bit
1309         if (cbuf) {
1310           MacroAssembler _masm(cbuf);
1311           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1312 #ifndef PRODUCT
1313         } else {
1314           st->print("%s  %s, %s\t# spill",
1315                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1316                      Matcher::regName[dst_first],
1317                      Matcher::regName[src_first]);
1318 #endif
1319         }
1320       } else {
1321         // 32-bit
1322         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1323         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1324         if (cbuf) {
1325           MacroAssembler _masm(cbuf);
1326           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1327 #ifndef PRODUCT
1328         } else {
1329           st->print("%s  %s, %s\t# spill",
1330                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1331                      Matcher::regName[dst_first],
1332                      Matcher::regName[src_first]);
1333 #endif
1334         }
1335       }
1336       return 0;
1337     }
1338   }
1339 
1340   assert(0," foo ");
1341   Unimplemented();
1342   return 0;
1343 }
1344 
1345 #ifndef PRODUCT
1346 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1347   implementation(NULL, ra_, false, st);
1348 }
1349 #endif
1350 
1351 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1352   implementation(&cbuf, ra_, false, NULL);
1353 }
1354 
1355 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1356   return MachNode::size(ra_);
1357 }
1358 
1359 //=============================================================================
1360 #ifndef PRODUCT
1361 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1362 {
1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1364   int reg = ra_->get_reg_first(this);
1365   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1366             Matcher::regName[reg], offset);
1367 }
1368 #endif
1369 
1370 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1371 {
1372   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1373   int reg = ra_->get_encode(this);
1374   if (offset >= 0x80) {
1375     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1376     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1377     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1378     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1379     emit_d32(cbuf, offset);
1380   } else {
1381     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1382     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1383     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1384     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1385     emit_d8(cbuf, offset);
1386   }
1387 }
1388 
1389 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1390 {
1391   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1392   return (offset < 0x80) ? 5 : 8; // REX
1393 }
1394 
1395 //=============================================================================
1396 #ifndef PRODUCT
1397 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1398 {
1399   if (UseCompressedClassPointers) {
1400     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1401     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1402     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1403   } else {
1404     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1405                  "# Inline cache check");
1406   }
1407   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1408   st->print_cr("\tnop\t# nops to align entry point");
1409 }
1410 #endif
1411 
1412 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1413 {
1414   MacroAssembler masm(&cbuf);
1415   uint insts_size = cbuf.insts_size();
1416   if (UseCompressedClassPointers) {
1417     masm.load_klass(rscratch1, j_rarg0);
1418     masm.cmpptr(rax, rscratch1);
1419   } else {
1420     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1421   }
1422 
1423   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1424 
1425   /* WARNING these NOPs are critical so that verified entry point is properly
1426      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1427   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1428   if (OptoBreakpoint) {
1429     // Leave space for int3
1430     nops_cnt -= 1;
1431   }
1432   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1433   if (nops_cnt > 0)
1434     masm.nop(nops_cnt);
1435 }
1436 
1437 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1438 {
1439   return MachNode::size(ra_); // too many variables; just compute it
1440                               // the hard way
1441 }
1442 
1443 
1444 //=============================================================================
1445 uint size_exception_handler()
1446 {
1447   // NativeCall instruction size is the same as NativeJump.
1448   // Note that this value is also credited (in output.cpp) to
1449   // the size of the code section.
1450   return NativeJump::instruction_size;
1451 }
1452 
1453 // Emit exception handler code.
1454 int emit_exception_handler(CodeBuffer& cbuf)
1455 {
1456 
1457   // Note that the code buffer's insts_mark is always relative to insts.
1458   // That's why we must use the macroassembler to generate a handler.
1459   MacroAssembler _masm(&cbuf);
1460   address base =
1461   __ start_a_stub(size_exception_handler());
1462   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1463   int offset = __ offset();
1464   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1465   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1466   __ end_a_stub();
1467   return offset;
1468 }
1469 
1470 uint size_deopt_handler()
1471 {
1472   // three 5 byte instructions
1473   return 15;
1474 }
1475 
1476 // Emit deopt handler code.
1477 int emit_deopt_handler(CodeBuffer& cbuf)
1478 {
1479 
1480   // Note that the code buffer's insts_mark is always relative to insts.
1481   // That's why we must use the macroassembler to generate a handler.
1482   MacroAssembler _masm(&cbuf);
1483   address base =
1484   __ start_a_stub(size_deopt_handler());
1485   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1486   int offset = __ offset();
1487   address the_pc = (address) __ pc();
1488   Label next;
1489   // push a "the_pc" on the stack without destroying any registers
1490   // as they all may be live.
1491 
1492   // push address of "next"
1493   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1494   __ bind(next);
1495   // adjust it so it matches "the_pc"
1496   __ subptr(Address(rsp, 0), __ offset() - offset);
1497   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1498   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1499   __ end_a_stub();
1500   return offset;
1501 }
1502 
1503 int Matcher::regnum_to_fpu_offset(int regnum)
1504 {
1505   return regnum - 32; // The FP registers are in the second chunk
1506 }
1507 
1508 // This is UltraSparc specific, true just means we have fast l2f conversion
1509 const bool Matcher::convL2FSupported(void) {
1510   return true;
1511 }
1512 
1513 // Is this branch offset short enough that a short branch can be used?
1514 //
1515 // NOTE: If the platform does not provide any short branch variants, then
1516 //       this method should return false for offset 0.
1517 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1518   // The passed offset is relative to address of the branch.
1519   // On 86 a branch displacement is calculated relative to address
1520   // of a next instruction.
1521   offset -= br_size;
1522 
1523   // the short version of jmpConUCF2 contains multiple branches,
1524   // making the reach slightly less
1525   if (rule == jmpConUCF2_rule)
1526     return (-126 <= offset && offset <= 125);
1527   return (-128 <= offset && offset <= 127);
1528 }
1529 
1530 const bool Matcher::isSimpleConstant64(jlong value) {
1531   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1532   //return value == (int) value;  // Cf. storeImmL and immL32.
1533 
1534   // Probably always true, even if a temp register is required.
1535   return true;
1536 }
1537 
1538 // The ecx parameter to rep stosq for the ClearArray node is in words.
1539 const bool Matcher::init_array_count_is_in_bytes = false;
1540 
1541 // Threshold size for cleararray.
1542 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1543 
1544 // No additional cost for CMOVL.
1545 const int Matcher::long_cmove_cost() { return 0; }
1546 
1547 // No CMOVF/CMOVD with SSE2
1548 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1549 
1550 // Does the CPU require late expand (see block.cpp for description of late expand)?
1551 const bool Matcher::require_postalloc_expand = false;
1552 
1553 // Should the Matcher clone shifts on addressing modes, expecting them
1554 // to be subsumed into complex addressing expressions or compute them
1555 // into registers?  True for Intel but false for most RISCs
1556 const bool Matcher::clone_shift_expressions = true;
1557 
1558 // Do we need to mask the count passed to shift instructions or does
1559 // the cpu only look at the lower 5/6 bits anyway?
1560 const bool Matcher::need_masked_shift_count = false;
1561 
1562 bool Matcher::narrow_oop_use_complex_address() {
1563   assert(UseCompressedOops, "only for compressed oops code");
1564   return (LogMinObjAlignmentInBytes <= 3);
1565 }
1566 
1567 bool Matcher::narrow_klass_use_complex_address() {
1568   assert(UseCompressedClassPointers, "only for compressed klass code");
1569   return (LogKlassAlignmentInBytes <= 3);
1570 }
1571 
1572 // Is it better to copy float constants, or load them directly from
1573 // memory?  Intel can load a float constant from a direct address,
1574 // requiring no extra registers.  Most RISCs will have to materialize
1575 // an address into a register first, so they would do better to copy
1576 // the constant from stack.
1577 const bool Matcher::rematerialize_float_constants = true; // XXX
1578 
1579 // If CPU can load and store mis-aligned doubles directly then no
1580 // fixup is needed.  Else we split the double into 2 integer pieces
1581 // and move it piece-by-piece.  Only happens when passing doubles into
1582 // C code as the Java calling convention forces doubles to be aligned.
1583 const bool Matcher::misaligned_doubles_ok = true;
1584 
1585 // No-op on amd64
1586 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1587 
1588 // Advertise here if the CPU requires explicit rounding operations to
1589 // implement the UseStrictFP mode.
1590 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1591 
1592 // Are floats conerted to double when stored to stack during deoptimization?
1593 // On x64 it is stored without convertion so we can use normal access.
1594 bool Matcher::float_in_double() { return false; }
1595 
1596 // Do ints take an entire long register or just half?
1597 const bool Matcher::int_in_long = true;
1598 
1599 // Return whether or not this register is ever used as an argument.
1600 // This function is used on startup to build the trampoline stubs in
1601 // generateOptoStub.  Registers not mentioned will be killed by the VM
1602 // call in the trampoline, and arguments in those registers not be
1603 // available to the callee.
1604 bool Matcher::can_be_java_arg(int reg)
1605 {
1606   return
1607     reg ==  RDI_num || reg == RDI_H_num ||
1608     reg ==  RSI_num || reg == RSI_H_num ||
1609     reg ==  RDX_num || reg == RDX_H_num ||
1610     reg ==  RCX_num || reg == RCX_H_num ||
1611     reg ==   R8_num || reg ==  R8_H_num ||
1612     reg ==   R9_num || reg ==  R9_H_num ||
1613     reg ==  R12_num || reg == R12_H_num ||
1614     reg == XMM0_num || reg == XMM0b_num ||
1615     reg == XMM1_num || reg == XMM1b_num ||
1616     reg == XMM2_num || reg == XMM2b_num ||
1617     reg == XMM3_num || reg == XMM3b_num ||
1618     reg == XMM4_num || reg == XMM4b_num ||
1619     reg == XMM5_num || reg == XMM5b_num ||
1620     reg == XMM6_num || reg == XMM6b_num ||
1621     reg == XMM7_num || reg == XMM7b_num;
1622 }
1623 
1624 bool Matcher::is_spillable_arg(int reg)
1625 {
1626   return can_be_java_arg(reg);
1627 }
1628 
1629 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1630   // In 64 bit mode a code which use multiply when
1631   // devisor is constant is faster than hardware
1632   // DIV instruction (it uses MulHiL).
1633   return false;
1634 }
1635 
1636 // Register for DIVI projection of divmodI
1637 RegMask Matcher::divI_proj_mask() {
1638   return INT_RAX_REG_mask();
1639 }
1640 
1641 // Register for MODI projection of divmodI
1642 RegMask Matcher::modI_proj_mask() {
1643   return INT_RDX_REG_mask();
1644 }
1645 
1646 // Register for DIVL projection of divmodL
1647 RegMask Matcher::divL_proj_mask() {
1648   return LONG_RAX_REG_mask();
1649 }
1650 
1651 // Register for MODL projection of divmodL
1652 RegMask Matcher::modL_proj_mask() {
1653   return LONG_RDX_REG_mask();
1654 }
1655 
1656 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1657   return PTR_RBP_REG_mask();
1658 }
1659 
1660 %}
1661 
1662 //----------ENCODING BLOCK-----------------------------------------------------
1663 // This block specifies the encoding classes used by the compiler to
1664 // output byte streams.  Encoding classes are parameterized macros
1665 // used by Machine Instruction Nodes in order to generate the bit
1666 // encoding of the instruction.  Operands specify their base encoding
1667 // interface with the interface keyword.  There are currently
1668 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1669 // COND_INTER.  REG_INTER causes an operand to generate a function
1670 // which returns its register number when queried.  CONST_INTER causes
1671 // an operand to generate a function which returns the value of the
1672 // constant when queried.  MEMORY_INTER causes an operand to generate
1673 // four functions which return the Base Register, the Index Register,
1674 // the Scale Value, and the Offset Value of the operand when queried.
1675 // COND_INTER causes an operand to generate six functions which return
1676 // the encoding code (ie - encoding bits for the instruction)
1677 // associated with each basic boolean condition for a conditional
1678 // instruction.
1679 //
1680 // Instructions specify two basic values for encoding.  Again, a
1681 // function is available to check if the constant displacement is an
1682 // oop. They use the ins_encode keyword to specify their encoding
1683 // classes (which must be a sequence of enc_class names, and their
1684 // parameters, specified in the encoding block), and they use the
1685 // opcode keyword to specify, in order, their primary, secondary, and
1686 // tertiary opcode.  Only the opcode sections which a particular
1687 // instruction needs for encoding need to be specified.
1688 encode %{
1689   // Build emit functions for each basic byte or larger field in the
1690   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1691   // from C++ code in the enc_class source block.  Emit functions will
1692   // live in the main source block for now.  In future, we can
1693   // generalize this by adding a syntax that specifies the sizes of
1694   // fields in an order, so that the adlc can build the emit functions
1695   // automagically
1696 
1697   // Emit primary opcode
1698   enc_class OpcP
1699   %{
1700     emit_opcode(cbuf, $primary);
1701   %}
1702 
1703   // Emit secondary opcode
1704   enc_class OpcS
1705   %{
1706     emit_opcode(cbuf, $secondary);
1707   %}
1708 
1709   // Emit tertiary opcode
1710   enc_class OpcT
1711   %{
1712     emit_opcode(cbuf, $tertiary);
1713   %}
1714 
1715   // Emit opcode directly
1716   enc_class Opcode(immI d8)
1717   %{
1718     emit_opcode(cbuf, $d8$$constant);
1719   %}
1720 
1721   // Emit size prefix
1722   enc_class SizePrefix
1723   %{
1724     emit_opcode(cbuf, 0x66);
1725   %}
1726 
1727   enc_class reg(rRegI reg)
1728   %{
1729     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1730   %}
1731 
1732   enc_class reg_reg(rRegI dst, rRegI src)
1733   %{
1734     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1735   %}
1736 
1737   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1738   %{
1739     emit_opcode(cbuf, $opcode$$constant);
1740     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1741   %}
1742 
1743   enc_class cdql_enc(no_rax_rdx_RegI div)
1744   %{
1745     // Full implementation of Java idiv and irem; checks for
1746     // special case as described in JVM spec., p.243 & p.271.
1747     //
1748     //         normal case                           special case
1749     //
1750     // input : rax: dividend                         min_int
1751     //         reg: divisor                          -1
1752     //
1753     // output: rax: quotient  (= rax idiv reg)       min_int
1754     //         rdx: remainder (= rax irem reg)       0
1755     //
1756     //  Code sequnce:
1757     //
1758     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1759     //    5:   75 07/08                jne    e <normal>
1760     //    7:   33 d2                   xor    %edx,%edx
1761     //  [div >= 8 -> offset + 1]
1762     //  [REX_B]
1763     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1764     //    c:   74 03/04                je     11 <done>
1765     // 000000000000000e <normal>:
1766     //    e:   99                      cltd
1767     //  [div >= 8 -> offset + 1]
1768     //  [REX_B]
1769     //    f:   f7 f9                   idiv   $div
1770     // 0000000000000011 <done>:
1771 
1772     // cmp    $0x80000000,%eax
1773     emit_opcode(cbuf, 0x3d);
1774     emit_d8(cbuf, 0x00);
1775     emit_d8(cbuf, 0x00);
1776     emit_d8(cbuf, 0x00);
1777     emit_d8(cbuf, 0x80);
1778 
1779     // jne    e <normal>
1780     emit_opcode(cbuf, 0x75);
1781     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1782 
1783     // xor    %edx,%edx
1784     emit_opcode(cbuf, 0x33);
1785     emit_d8(cbuf, 0xD2);
1786 
1787     // cmp    $0xffffffffffffffff,%ecx
1788     if ($div$$reg >= 8) {
1789       emit_opcode(cbuf, Assembler::REX_B);
1790     }
1791     emit_opcode(cbuf, 0x83);
1792     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1793     emit_d8(cbuf, 0xFF);
1794 
1795     // je     11 <done>
1796     emit_opcode(cbuf, 0x74);
1797     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1798 
1799     // <normal>
1800     // cltd
1801     emit_opcode(cbuf, 0x99);
1802 
1803     // idivl (note: must be emitted by the user of this rule)
1804     // <done>
1805   %}
1806 
1807   enc_class cdqq_enc(no_rax_rdx_RegL div)
1808   %{
1809     // Full implementation of Java ldiv and lrem; checks for
1810     // special case as described in JVM spec., p.243 & p.271.
1811     //
1812     //         normal case                           special case
1813     //
1814     // input : rax: dividend                         min_long
1815     //         reg: divisor                          -1
1816     //
1817     // output: rax: quotient  (= rax idiv reg)       min_long
1818     //         rdx: remainder (= rax irem reg)       0
1819     //
1820     //  Code sequnce:
1821     //
1822     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1823     //    7:   00 00 80
1824     //    a:   48 39 d0                cmp    %rdx,%rax
1825     //    d:   75 08                   jne    17 <normal>
1826     //    f:   33 d2                   xor    %edx,%edx
1827     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1828     //   15:   74 05                   je     1c <done>
1829     // 0000000000000017 <normal>:
1830     //   17:   48 99                   cqto
1831     //   19:   48 f7 f9                idiv   $div
1832     // 000000000000001c <done>:
1833 
1834     // mov    $0x8000000000000000,%rdx
1835     emit_opcode(cbuf, Assembler::REX_W);
1836     emit_opcode(cbuf, 0xBA);
1837     emit_d8(cbuf, 0x00);
1838     emit_d8(cbuf, 0x00);
1839     emit_d8(cbuf, 0x00);
1840     emit_d8(cbuf, 0x00);
1841     emit_d8(cbuf, 0x00);
1842     emit_d8(cbuf, 0x00);
1843     emit_d8(cbuf, 0x00);
1844     emit_d8(cbuf, 0x80);
1845 
1846     // cmp    %rdx,%rax
1847     emit_opcode(cbuf, Assembler::REX_W);
1848     emit_opcode(cbuf, 0x39);
1849     emit_d8(cbuf, 0xD0);
1850 
1851     // jne    17 <normal>
1852     emit_opcode(cbuf, 0x75);
1853     emit_d8(cbuf, 0x08);
1854 
1855     // xor    %edx,%edx
1856     emit_opcode(cbuf, 0x33);
1857     emit_d8(cbuf, 0xD2);
1858 
1859     // cmp    $0xffffffffffffffff,$div
1860     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1861     emit_opcode(cbuf, 0x83);
1862     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1863     emit_d8(cbuf, 0xFF);
1864 
1865     // je     1e <done>
1866     emit_opcode(cbuf, 0x74);
1867     emit_d8(cbuf, 0x05);
1868 
1869     // <normal>
1870     // cqto
1871     emit_opcode(cbuf, Assembler::REX_W);
1872     emit_opcode(cbuf, 0x99);
1873 
1874     // idivq (note: must be emitted by the user of this rule)
1875     // <done>
1876   %}
1877 
1878   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1879   enc_class OpcSE(immI imm)
1880   %{
1881     // Emit primary opcode and set sign-extend bit
1882     // Check for 8-bit immediate, and set sign extend bit in opcode
1883     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1884       emit_opcode(cbuf, $primary | 0x02);
1885     } else {
1886       // 32-bit immediate
1887       emit_opcode(cbuf, $primary);
1888     }
1889   %}
1890 
1891   enc_class OpcSErm(rRegI dst, immI imm)
1892   %{
1893     // OpcSEr/m
1894     int dstenc = $dst$$reg;
1895     if (dstenc >= 8) {
1896       emit_opcode(cbuf, Assembler::REX_B);
1897       dstenc -= 8;
1898     }
1899     // Emit primary opcode and set sign-extend bit
1900     // Check for 8-bit immediate, and set sign extend bit in opcode
1901     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1902       emit_opcode(cbuf, $primary | 0x02);
1903     } else {
1904       // 32-bit immediate
1905       emit_opcode(cbuf, $primary);
1906     }
1907     // Emit r/m byte with secondary opcode, after primary opcode.
1908     emit_rm(cbuf, 0x3, $secondary, dstenc);
1909   %}
1910 
1911   enc_class OpcSErm_wide(rRegL dst, immI imm)
1912   %{
1913     // OpcSEr/m
1914     int dstenc = $dst$$reg;
1915     if (dstenc < 8) {
1916       emit_opcode(cbuf, Assembler::REX_W);
1917     } else {
1918       emit_opcode(cbuf, Assembler::REX_WB);
1919       dstenc -= 8;
1920     }
1921     // Emit primary opcode and set sign-extend bit
1922     // Check for 8-bit immediate, and set sign extend bit in opcode
1923     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1924       emit_opcode(cbuf, $primary | 0x02);
1925     } else {
1926       // 32-bit immediate
1927       emit_opcode(cbuf, $primary);
1928     }
1929     // Emit r/m byte with secondary opcode, after primary opcode.
1930     emit_rm(cbuf, 0x3, $secondary, dstenc);
1931   %}
1932 
1933   enc_class Con8or32(immI imm)
1934   %{
1935     // Check for 8-bit immediate, and set sign extend bit in opcode
1936     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1937       $$$emit8$imm$$constant;
1938     } else {
1939       // 32-bit immediate
1940       $$$emit32$imm$$constant;
1941     }
1942   %}
1943 
1944   enc_class opc2_reg(rRegI dst)
1945   %{
1946     // BSWAP
1947     emit_cc(cbuf, $secondary, $dst$$reg);
1948   %}
1949 
1950   enc_class opc3_reg(rRegI dst)
1951   %{
1952     // BSWAP
1953     emit_cc(cbuf, $tertiary, $dst$$reg);
1954   %}
1955 
1956   enc_class reg_opc(rRegI div)
1957   %{
1958     // INC, DEC, IDIV, IMOD, JMP indirect, ...
1959     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
1960   %}
1961 
1962   enc_class enc_cmov(cmpOp cop)
1963   %{
1964     // CMOV
1965     $$$emit8$primary;
1966     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1967   %}
1968 
1969   enc_class enc_PartialSubtypeCheck()
1970   %{
1971     Register Rrdi = as_Register(RDI_enc); // result register
1972     Register Rrax = as_Register(RAX_enc); // super class
1973     Register Rrcx = as_Register(RCX_enc); // killed
1974     Register Rrsi = as_Register(RSI_enc); // sub class
1975     Label miss;
1976     const bool set_cond_codes = true;
1977 
1978     MacroAssembler _masm(&cbuf);
1979     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1980                                      NULL, &miss,
1981                                      /*set_cond_codes:*/ true);
1982     if ($primary) {
1983       __ xorptr(Rrdi, Rrdi);
1984     }
1985     __ bind(miss);
1986   %}
1987 
1988   enc_class clear_avx %{
1989     debug_only(int off0 = cbuf.insts_size());
1990     if (ra_->C->max_vector_size() > 16) {
1991       // Clear upper bits of YMM registers when current compiled code uses
1992       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1993       MacroAssembler _masm(&cbuf);
1994       __ vzeroupper();
1995     }
1996     debug_only(int off1 = cbuf.insts_size());
1997     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1998   %}
1999 
2000   enc_class Java_To_Runtime(method meth) %{
2001     // No relocation needed
2002     MacroAssembler _masm(&cbuf);
2003     __ mov64(r10, (int64_t) $meth$$method);
2004     __ call(r10);
2005   %}
2006 
2007   enc_class Java_To_Interpreter(method meth)
2008   %{
2009     // CALL Java_To_Interpreter
2010     // This is the instruction starting address for relocation info.
2011     cbuf.set_insts_mark();
2012     $$$emit8$primary;
2013     // CALL directly to the runtime
2014     emit_d32_reloc(cbuf,
2015                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2016                    runtime_call_Relocation::spec(),
2017                    RELOC_DISP32);
2018   %}
2019 
2020   enc_class Java_Static_Call(method meth)
2021   %{
2022     // JAVA STATIC CALL
2023     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2024     // determine who we intended to call.
2025     cbuf.set_insts_mark();
2026     $$$emit8$primary;
2027 
2028     if (!_method) {
2029       emit_d32_reloc(cbuf,
2030                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2031                      runtime_call_Relocation::spec(),
2032                      RELOC_DISP32);
2033     } else if (_optimized_virtual) {
2034       emit_d32_reloc(cbuf,
2035                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2036                      opt_virtual_call_Relocation::spec(),
2037                      RELOC_DISP32);
2038     } else {
2039       emit_d32_reloc(cbuf,
2040                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2041                      static_call_Relocation::spec(),
2042                      RELOC_DISP32);
2043     }
2044     if (_method) {
2045       // Emit stub for static call.
2046       CompiledStaticCall::emit_to_interp_stub(cbuf);
2047     }
2048   %}
2049 
2050   enc_class Java_Dynamic_Call(method meth) %{
2051     MacroAssembler _masm(&cbuf);
2052     __ ic_call((address)$meth$$method);
2053   %}
2054 
2055   enc_class Java_Compiled_Call(method meth)
2056   %{
2057     // JAVA COMPILED CALL
2058     int disp = in_bytes(Method:: from_compiled_offset());
2059 
2060     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2061     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2062 
2063     // callq *disp(%rax)
2064     cbuf.set_insts_mark();
2065     $$$emit8$primary;
2066     if (disp < 0x80) {
2067       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2068       emit_d8(cbuf, disp); // Displacement
2069     } else {
2070       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2071       emit_d32(cbuf, disp); // Displacement
2072     }
2073   %}
2074 
2075   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2076   %{
2077     // SAL, SAR, SHR
2078     int dstenc = $dst$$reg;
2079     if (dstenc >= 8) {
2080       emit_opcode(cbuf, Assembler::REX_B);
2081       dstenc -= 8;
2082     }
2083     $$$emit8$primary;
2084     emit_rm(cbuf, 0x3, $secondary, dstenc);
2085     $$$emit8$shift$$constant;
2086   %}
2087 
2088   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2089   %{
2090     // SAL, SAR, SHR
2091     int dstenc = $dst$$reg;
2092     if (dstenc < 8) {
2093       emit_opcode(cbuf, Assembler::REX_W);
2094     } else {
2095       emit_opcode(cbuf, Assembler::REX_WB);
2096       dstenc -= 8;
2097     }
2098     $$$emit8$primary;
2099     emit_rm(cbuf, 0x3, $secondary, dstenc);
2100     $$$emit8$shift$$constant;
2101   %}
2102 
2103   enc_class load_immI(rRegI dst, immI src)
2104   %{
2105     int dstenc = $dst$$reg;
2106     if (dstenc >= 8) {
2107       emit_opcode(cbuf, Assembler::REX_B);
2108       dstenc -= 8;
2109     }
2110     emit_opcode(cbuf, 0xB8 | dstenc);
2111     $$$emit32$src$$constant;
2112   %}
2113 
2114   enc_class load_immL(rRegL dst, immL src)
2115   %{
2116     int dstenc = $dst$$reg;
2117     if (dstenc < 8) {
2118       emit_opcode(cbuf, Assembler::REX_W);
2119     } else {
2120       emit_opcode(cbuf, Assembler::REX_WB);
2121       dstenc -= 8;
2122     }
2123     emit_opcode(cbuf, 0xB8 | dstenc);
2124     emit_d64(cbuf, $src$$constant);
2125   %}
2126 
2127   enc_class load_immUL32(rRegL dst, immUL32 src)
2128   %{
2129     // same as load_immI, but this time we care about zeroes in the high word
2130     int dstenc = $dst$$reg;
2131     if (dstenc >= 8) {
2132       emit_opcode(cbuf, Assembler::REX_B);
2133       dstenc -= 8;
2134     }
2135     emit_opcode(cbuf, 0xB8 | dstenc);
2136     $$$emit32$src$$constant;
2137   %}
2138 
2139   enc_class load_immL32(rRegL dst, immL32 src)
2140   %{
2141     int dstenc = $dst$$reg;
2142     if (dstenc < 8) {
2143       emit_opcode(cbuf, Assembler::REX_W);
2144     } else {
2145       emit_opcode(cbuf, Assembler::REX_WB);
2146       dstenc -= 8;
2147     }
2148     emit_opcode(cbuf, 0xC7);
2149     emit_rm(cbuf, 0x03, 0x00, dstenc);
2150     $$$emit32$src$$constant;
2151   %}
2152 
2153   enc_class load_immP31(rRegP dst, immP32 src)
2154   %{
2155     // same as load_immI, but this time we care about zeroes in the high word
2156     int dstenc = $dst$$reg;
2157     if (dstenc >= 8) {
2158       emit_opcode(cbuf, Assembler::REX_B);
2159       dstenc -= 8;
2160     }
2161     emit_opcode(cbuf, 0xB8 | dstenc);
2162     $$$emit32$src$$constant;
2163   %}
2164 
2165   enc_class load_immP(rRegP dst, immP src)
2166   %{
2167     int dstenc = $dst$$reg;
2168     if (dstenc < 8) {
2169       emit_opcode(cbuf, Assembler::REX_W);
2170     } else {
2171       emit_opcode(cbuf, Assembler::REX_WB);
2172       dstenc -= 8;
2173     }
2174     emit_opcode(cbuf, 0xB8 | dstenc);
2175     // This next line should be generated from ADLC
2176     if ($src->constant_reloc() != relocInfo::none) {
2177       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2178     } else {
2179       emit_d64(cbuf, $src$$constant);
2180     }
2181   %}
2182 
2183   enc_class Con32(immI src)
2184   %{
2185     // Output immediate
2186     $$$emit32$src$$constant;
2187   %}
2188 
2189   enc_class Con32F_as_bits(immF src)
2190   %{
2191     // Output Float immediate bits
2192     jfloat jf = $src$$constant;
2193     jint jf_as_bits = jint_cast(jf);
2194     emit_d32(cbuf, jf_as_bits);
2195   %}
2196 
2197   enc_class Con16(immI src)
2198   %{
2199     // Output immediate
2200     $$$emit16$src$$constant;
2201   %}
2202 
2203   // How is this different from Con32??? XXX
2204   enc_class Con_d32(immI src)
2205   %{
2206     emit_d32(cbuf,$src$$constant);
2207   %}
2208 
2209   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2210     // Output immediate memory reference
2211     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2212     emit_d32(cbuf, 0x00);
2213   %}
2214 
2215   enc_class lock_prefix()
2216   %{
2217     if (os::is_MP()) {
2218       emit_opcode(cbuf, 0xF0); // lock
2219     }
2220   %}
2221 
2222   enc_class REX_mem(memory mem)
2223   %{
2224     if ($mem$$base >= 8) {
2225       if ($mem$$index < 8) {
2226         emit_opcode(cbuf, Assembler::REX_B);
2227       } else {
2228         emit_opcode(cbuf, Assembler::REX_XB);
2229       }
2230     } else {
2231       if ($mem$$index >= 8) {
2232         emit_opcode(cbuf, Assembler::REX_X);
2233       }
2234     }
2235   %}
2236 
2237   enc_class REX_mem_wide(memory mem)
2238   %{
2239     if ($mem$$base >= 8) {
2240       if ($mem$$index < 8) {
2241         emit_opcode(cbuf, Assembler::REX_WB);
2242       } else {
2243         emit_opcode(cbuf, Assembler::REX_WXB);
2244       }
2245     } else {
2246       if ($mem$$index < 8) {
2247         emit_opcode(cbuf, Assembler::REX_W);
2248       } else {
2249         emit_opcode(cbuf, Assembler::REX_WX);
2250       }
2251     }
2252   %}
2253 
2254   // for byte regs
2255   enc_class REX_breg(rRegI reg)
2256   %{
2257     if ($reg$$reg >= 4) {
2258       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2259     }
2260   %}
2261 
2262   // for byte regs
2263   enc_class REX_reg_breg(rRegI dst, rRegI src)
2264   %{
2265     if ($dst$$reg < 8) {
2266       if ($src$$reg >= 4) {
2267         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2268       }
2269     } else {
2270       if ($src$$reg < 8) {
2271         emit_opcode(cbuf, Assembler::REX_R);
2272       } else {
2273         emit_opcode(cbuf, Assembler::REX_RB);
2274       }
2275     }
2276   %}
2277 
2278   // for byte regs
2279   enc_class REX_breg_mem(rRegI reg, memory mem)
2280   %{
2281     if ($reg$$reg < 8) {
2282       if ($mem$$base < 8) {
2283         if ($mem$$index >= 8) {
2284           emit_opcode(cbuf, Assembler::REX_X);
2285         } else if ($reg$$reg >= 4) {
2286           emit_opcode(cbuf, Assembler::REX);
2287         }
2288       } else {
2289         if ($mem$$index < 8) {
2290           emit_opcode(cbuf, Assembler::REX_B);
2291         } else {
2292           emit_opcode(cbuf, Assembler::REX_XB);
2293         }
2294       }
2295     } else {
2296       if ($mem$$base < 8) {
2297         if ($mem$$index < 8) {
2298           emit_opcode(cbuf, Assembler::REX_R);
2299         } else {
2300           emit_opcode(cbuf, Assembler::REX_RX);
2301         }
2302       } else {
2303         if ($mem$$index < 8) {
2304           emit_opcode(cbuf, Assembler::REX_RB);
2305         } else {
2306           emit_opcode(cbuf, Assembler::REX_RXB);
2307         }
2308       }
2309     }
2310   %}
2311 
2312   enc_class REX_reg(rRegI reg)
2313   %{
2314     if ($reg$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317   %}
2318 
2319   enc_class REX_reg_wide(rRegI reg)
2320   %{
2321     if ($reg$$reg < 8) {
2322       emit_opcode(cbuf, Assembler::REX_W);
2323     } else {
2324       emit_opcode(cbuf, Assembler::REX_WB);
2325     }
2326   %}
2327 
2328   enc_class REX_reg_reg(rRegI dst, rRegI src)
2329   %{
2330     if ($dst$$reg < 8) {
2331       if ($src$$reg >= 8) {
2332         emit_opcode(cbuf, Assembler::REX_B);
2333       }
2334     } else {
2335       if ($src$$reg < 8) {
2336         emit_opcode(cbuf, Assembler::REX_R);
2337       } else {
2338         emit_opcode(cbuf, Assembler::REX_RB);
2339       }
2340     }
2341   %}
2342 
2343   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2344   %{
2345     if ($dst$$reg < 8) {
2346       if ($src$$reg < 8) {
2347         emit_opcode(cbuf, Assembler::REX_W);
2348       } else {
2349         emit_opcode(cbuf, Assembler::REX_WB);
2350       }
2351     } else {
2352       if ($src$$reg < 8) {
2353         emit_opcode(cbuf, Assembler::REX_WR);
2354       } else {
2355         emit_opcode(cbuf, Assembler::REX_WRB);
2356       }
2357     }
2358   %}
2359 
2360   enc_class REX_reg_mem(rRegI reg, memory mem)
2361   %{
2362     if ($reg$$reg < 8) {
2363       if ($mem$$base < 8) {
2364         if ($mem$$index >= 8) {
2365           emit_opcode(cbuf, Assembler::REX_X);
2366         }
2367       } else {
2368         if ($mem$$index < 8) {
2369           emit_opcode(cbuf, Assembler::REX_B);
2370         } else {
2371           emit_opcode(cbuf, Assembler::REX_XB);
2372         }
2373       }
2374     } else {
2375       if ($mem$$base < 8) {
2376         if ($mem$$index < 8) {
2377           emit_opcode(cbuf, Assembler::REX_R);
2378         } else {
2379           emit_opcode(cbuf, Assembler::REX_RX);
2380         }
2381       } else {
2382         if ($mem$$index < 8) {
2383           emit_opcode(cbuf, Assembler::REX_RB);
2384         } else {
2385           emit_opcode(cbuf, Assembler::REX_RXB);
2386         }
2387       }
2388     }
2389   %}
2390 
2391   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2392   %{
2393     if ($reg$$reg < 8) {
2394       if ($mem$$base < 8) {
2395         if ($mem$$index < 8) {
2396           emit_opcode(cbuf, Assembler::REX_W);
2397         } else {
2398           emit_opcode(cbuf, Assembler::REX_WX);
2399         }
2400       } else {
2401         if ($mem$$index < 8) {
2402           emit_opcode(cbuf, Assembler::REX_WB);
2403         } else {
2404           emit_opcode(cbuf, Assembler::REX_WXB);
2405         }
2406       }
2407     } else {
2408       if ($mem$$base < 8) {
2409         if ($mem$$index < 8) {
2410           emit_opcode(cbuf, Assembler::REX_WR);
2411         } else {
2412           emit_opcode(cbuf, Assembler::REX_WRX);
2413         }
2414       } else {
2415         if ($mem$$index < 8) {
2416           emit_opcode(cbuf, Assembler::REX_WRB);
2417         } else {
2418           emit_opcode(cbuf, Assembler::REX_WRXB);
2419         }
2420       }
2421     }
2422   %}
2423 
2424   enc_class reg_mem(rRegI ereg, memory mem)
2425   %{
2426     // High registers handle in encode_RegMem
2427     int reg = $ereg$$reg;
2428     int base = $mem$$base;
2429     int index = $mem$$index;
2430     int scale = $mem$$scale;
2431     int disp = $mem$$disp;
2432     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2433 
2434     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2435   %}
2436 
2437   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2438   %{
2439     int rm_byte_opcode = $rm_opcode$$constant;
2440 
2441     // High registers handle in encode_RegMem
2442     int base = $mem$$base;
2443     int index = $mem$$index;
2444     int scale = $mem$$scale;
2445     int displace = $mem$$disp;
2446 
2447     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2448                                             // working with static
2449                                             // globals
2450     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2451                   disp_reloc);
2452   %}
2453 
2454   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2455   %{
2456     int reg_encoding = $dst$$reg;
2457     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2458     int index        = 0x04;            // 0x04 indicates no index
2459     int scale        = 0x00;            // 0x00 indicates no scale
2460     int displace     = $src1$$constant; // 0x00 indicates no displacement
2461     relocInfo::relocType disp_reloc = relocInfo::none;
2462     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2463                   disp_reloc);
2464   %}
2465 
2466   enc_class neg_reg(rRegI dst)
2467   %{
2468     int dstenc = $dst$$reg;
2469     if (dstenc >= 8) {
2470       emit_opcode(cbuf, Assembler::REX_B);
2471       dstenc -= 8;
2472     }
2473     // NEG $dst
2474     emit_opcode(cbuf, 0xF7);
2475     emit_rm(cbuf, 0x3, 0x03, dstenc);
2476   %}
2477 
2478   enc_class neg_reg_wide(rRegI dst)
2479   %{
2480     int dstenc = $dst$$reg;
2481     if (dstenc < 8) {
2482       emit_opcode(cbuf, Assembler::REX_W);
2483     } else {
2484       emit_opcode(cbuf, Assembler::REX_WB);
2485       dstenc -= 8;
2486     }
2487     // NEG $dst
2488     emit_opcode(cbuf, 0xF7);
2489     emit_rm(cbuf, 0x3, 0x03, dstenc);
2490   %}
2491 
2492   enc_class setLT_reg(rRegI dst)
2493   %{
2494     int dstenc = $dst$$reg;
2495     if (dstenc >= 8) {
2496       emit_opcode(cbuf, Assembler::REX_B);
2497       dstenc -= 8;
2498     } else if (dstenc >= 4) {
2499       emit_opcode(cbuf, Assembler::REX);
2500     }
2501     // SETLT $dst
2502     emit_opcode(cbuf, 0x0F);
2503     emit_opcode(cbuf, 0x9C);
2504     emit_rm(cbuf, 0x3, 0x0, dstenc);
2505   %}
2506 
2507   enc_class setNZ_reg(rRegI dst)
2508   %{
2509     int dstenc = $dst$$reg;
2510     if (dstenc >= 8) {
2511       emit_opcode(cbuf, Assembler::REX_B);
2512       dstenc -= 8;
2513     } else if (dstenc >= 4) {
2514       emit_opcode(cbuf, Assembler::REX);
2515     }
2516     // SETNZ $dst
2517     emit_opcode(cbuf, 0x0F);
2518     emit_opcode(cbuf, 0x95);
2519     emit_rm(cbuf, 0x3, 0x0, dstenc);
2520   %}
2521 
2522 
2523   // Compare the lonogs and set -1, 0, or 1 into dst
2524   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2525   %{
2526     int src1enc = $src1$$reg;
2527     int src2enc = $src2$$reg;
2528     int dstenc = $dst$$reg;
2529 
2530     // cmpq $src1, $src2
2531     if (src1enc < 8) {
2532       if (src2enc < 8) {
2533         emit_opcode(cbuf, Assembler::REX_W);
2534       } else {
2535         emit_opcode(cbuf, Assembler::REX_WB);
2536       }
2537     } else {
2538       if (src2enc < 8) {
2539         emit_opcode(cbuf, Assembler::REX_WR);
2540       } else {
2541         emit_opcode(cbuf, Assembler::REX_WRB);
2542       }
2543     }
2544     emit_opcode(cbuf, 0x3B);
2545     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2546 
2547     // movl $dst, -1
2548     if (dstenc >= 8) {
2549       emit_opcode(cbuf, Assembler::REX_B);
2550     }
2551     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2552     emit_d32(cbuf, -1);
2553 
2554     // jl,s done
2555     emit_opcode(cbuf, 0x7C);
2556     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2557 
2558     // setne $dst
2559     if (dstenc >= 4) {
2560       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2561     }
2562     emit_opcode(cbuf, 0x0F);
2563     emit_opcode(cbuf, 0x95);
2564     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2565 
2566     // movzbl $dst, $dst
2567     if (dstenc >= 4) {
2568       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2569     }
2570     emit_opcode(cbuf, 0x0F);
2571     emit_opcode(cbuf, 0xB6);
2572     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2573   %}
2574 
2575   enc_class Push_ResultXD(regD dst) %{
2576     MacroAssembler _masm(&cbuf);
2577     __ fstp_d(Address(rsp, 0));
2578     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2579     __ addptr(rsp, 8);
2580   %}
2581 
2582   enc_class Push_SrcXD(regD src) %{
2583     MacroAssembler _masm(&cbuf);
2584     __ subptr(rsp, 8);
2585     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2586     __ fld_d(Address(rsp, 0));
2587   %}
2588 
2589 
2590   // obj: object to lock
2591   // box: box address (header location) -- killed
2592   // tmp: rax -- killed
2593   // scr: rbx -- killed
2594   //
2595   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2596   // from i486.ad.  See that file for comments.
2597   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2598   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2599 
2600 
2601   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2602   %{
2603     Register objReg = as_Register((int)$obj$$reg);
2604     Register boxReg = as_Register((int)$box$$reg);
2605     Register tmpReg = as_Register($tmp$$reg);
2606     Register scrReg = as_Register($scr$$reg);
2607     MacroAssembler masm(&cbuf);
2608 
2609     // Verify uniqueness of register assignments -- necessary but not sufficient
2610     assert (objReg != boxReg && objReg != tmpReg &&
2611             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2612 
2613     if (_counters != NULL) {
2614       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2615     }
2616     if (EmitSync & 1) {
2617         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2618         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2619         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2620     } else
2621     if (EmitSync & 2) {
2622         Label DONE_LABEL;
2623         if (UseBiasedLocking) {
2624            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2625           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2626         }
2627         // QQQ was movl...
2628         masm.movptr(tmpReg, 0x1);
2629         masm.orptr(tmpReg, Address(objReg, 0));
2630         masm.movptr(Address(boxReg, 0), tmpReg);
2631         if (os::is_MP()) {
2632           masm.lock();
2633         }
2634         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2635         masm.jcc(Assembler::equal, DONE_LABEL);
2636 
2637         // Recursive locking
2638         masm.subptr(tmpReg, rsp);
2639         masm.andptr(tmpReg, 7 - os::vm_page_size());
2640         masm.movptr(Address(boxReg, 0), tmpReg);
2641 
2642         masm.bind(DONE_LABEL);
2643         masm.nop(); // avoid branch to branch
2644     } else {
2645         Label DONE_LABEL, IsInflated, Egress;
2646 
2647         masm.movptr(tmpReg, Address(objReg, 0)) ;
2648         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2649         masm.jcc   (Assembler::notZero, IsInflated) ;
2650 
2651         // it's stack-locked, biased or neutral
2652         // TODO: optimize markword triage order to reduce the number of
2653         // conditional branches in the most common cases.
2654         // Beware -- there's a subtle invariant that fetch of the markword
2655         // at [FETCH], below, will never observe a biased encoding (*101b).
2656         // If this invariant is not held we'll suffer exclusion (safety) failure.
2657 
2658         if (UseBiasedLocking && !UseOptoBiasInlining) {
2659           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2660           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2661         }
2662 
2663         // was q will it destroy high?
2664         masm.orl   (tmpReg, 1) ;
2665         masm.movptr(Address(boxReg, 0), tmpReg) ;
2666         if (os::is_MP()) { masm.lock(); }
2667         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2668         if (_counters != NULL) {
2669            masm.cond_inc32(Assembler::equal,
2670                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2671         }
2672         masm.jcc   (Assembler::equal, DONE_LABEL);
2673 
2674         // Recursive locking
2675         masm.subptr(tmpReg, rsp);
2676         masm.andptr(tmpReg, 7 - os::vm_page_size());
2677         masm.movptr(Address(boxReg, 0), tmpReg);
2678         if (_counters != NULL) {
2679            masm.cond_inc32(Assembler::equal,
2680                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2681         }
2682         masm.jmp   (DONE_LABEL) ;
2683 
2684         masm.bind  (IsInflated) ;
2685         // It's inflated
2686 
2687         // TODO: someday avoid the ST-before-CAS penalty by
2688         // relocating (deferring) the following ST.
2689         // We should also think about trying a CAS without having
2690         // fetched _owner.  If the CAS is successful we may
2691         // avoid an RTO->RTS upgrade on the $line.
2692         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2693         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2694 
2695         masm.mov    (boxReg, tmpReg) ;
2696         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2697         masm.testptr(tmpReg, tmpReg) ;
2698         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2699 
2700         // It's inflated and appears unlocked
2701         if (os::is_MP()) { masm.lock(); }
2702         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2703         // Intentional fall-through into DONE_LABEL ...
2704 
2705         masm.bind  (DONE_LABEL) ;
2706         masm.nop   () ;                 // avoid jmp to jmp
2707     }
2708   %}
2709 
2710   // obj: object to unlock
2711   // box: box address (displaced header location), killed
2712   // RBX: killed tmp; cannot be obj nor box
2713   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2714   %{
2715 
2716     Register objReg = as_Register($obj$$reg);
2717     Register boxReg = as_Register($box$$reg);
2718     Register tmpReg = as_Register($tmp$$reg);
2719     MacroAssembler masm(&cbuf);
2720 
2721     if (EmitSync & 4) {
2722        masm.cmpptr(rsp, 0) ;
2723     } else
2724     if (EmitSync & 8) {
2725        Label DONE_LABEL;
2726        if (UseBiasedLocking) {
2727          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2728        }
2729 
2730        // Check whether the displaced header is 0
2731        //(=> recursive unlock)
2732        masm.movptr(tmpReg, Address(boxReg, 0));
2733        masm.testptr(tmpReg, tmpReg);
2734        masm.jcc(Assembler::zero, DONE_LABEL);
2735 
2736        // If not recursive lock, reset the header to displaced header
2737        if (os::is_MP()) {
2738          masm.lock();
2739        }
2740        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2741        masm.bind(DONE_LABEL);
2742        masm.nop(); // avoid branch to branch
2743     } else {
2744        Label DONE_LABEL, Stacked, CheckSucc ;
2745 
2746        if (UseBiasedLocking && !UseOptoBiasInlining) {
2747          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2748        }
2749 
2750        masm.movptr(tmpReg, Address(objReg, 0)) ;
2751        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
2752        masm.jcc   (Assembler::zero, DONE_LABEL) ;
2753        masm.testl (tmpReg, 0x02) ;
2754        masm.jcc   (Assembler::zero, Stacked) ;
2755 
2756        // It's inflated
2757        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2758        masm.xorptr(boxReg, r15_thread) ;
2759        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2760        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
2761        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2762        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2763        masm.jcc   (Assembler::notZero, CheckSucc) ;
2764        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2765        masm.jmp   (DONE_LABEL) ;
2766 
2767        if ((EmitSync & 65536) == 0) {
2768          Label LSuccess, LGoSlowPath ;
2769          masm.bind  (CheckSucc) ;
2770          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2771          masm.jcc   (Assembler::zero, LGoSlowPath) ;
2772 
2773          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
2774          // the explicit ST;MEMBAR combination, but masm doesn't currently support
2775          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
2776          // are all faster when the write buffer is populated.
2777          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2778          if (os::is_MP()) {
2779             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
2780          }
2781          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2782          masm.jcc   (Assembler::notZero, LSuccess) ;
2783 
2784          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
2785          if (os::is_MP()) { masm.lock(); }
2786          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
2787          masm.jcc   (Assembler::notEqual, LSuccess) ;
2788          // Intentional fall-through into slow-path
2789 
2790          masm.bind  (LGoSlowPath) ;
2791          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
2792          masm.jmp   (DONE_LABEL) ;
2793 
2794          masm.bind  (LSuccess) ;
2795          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
2796          masm.jmp   (DONE_LABEL) ;
2797        }
2798 
2799        masm.bind  (Stacked) ;
2800        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
2801        if (os::is_MP()) { masm.lock(); }
2802        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2803 
2804        if (EmitSync & 65536) {
2805           masm.bind (CheckSucc) ;
2806        }
2807        masm.bind(DONE_LABEL);
2808        if (EmitSync & 32768) {
2809           masm.nop();                      // avoid branch to branch
2810        }
2811     }
2812   %}
2813 
2814 
2815   enc_class enc_rethrow()
2816   %{
2817     cbuf.set_insts_mark();
2818     emit_opcode(cbuf, 0xE9); // jmp entry
2819     emit_d32_reloc(cbuf,
2820                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2821                    runtime_call_Relocation::spec(),
2822                    RELOC_DISP32);
2823   %}
2824 
2825 %}
2826 
2827 
2828 
2829 //----------FRAME--------------------------------------------------------------
2830 // Definition of frame structure and management information.
2831 //
2832 //  S T A C K   L A Y O U T    Allocators stack-slot number
2833 //                             |   (to get allocators register number
2834 //  G  Owned by    |        |  v    add OptoReg::stack0())
2835 //  r   CALLER     |        |
2836 //  o     |        +--------+      pad to even-align allocators stack-slot
2837 //  w     V        |  pad0  |        numbers; owned by CALLER
2838 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2839 //  h     ^        |   in   |  5
2840 //        |        |  args  |  4   Holes in incoming args owned by SELF
2841 //  |     |        |        |  3
2842 //  |     |        +--------+
2843 //  V     |        | old out|      Empty on Intel, window on Sparc
2844 //        |    old |preserve|      Must be even aligned.
2845 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2846 //        |        |   in   |  3   area for Intel ret address
2847 //     Owned by    |preserve|      Empty on Sparc.
2848 //       SELF      +--------+
2849 //        |        |  pad2  |  2   pad to align old SP
2850 //        |        +--------+  1
2851 //        |        | locks  |  0
2852 //        |        +--------+----> OptoReg::stack0(), even aligned
2853 //        |        |  pad1  | 11   pad to align new SP
2854 //        |        +--------+
2855 //        |        |        | 10
2856 //        |        | spills |  9   spills
2857 //        V        |        |  8   (pad0 slot for callee)
2858 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2859 //        ^        |  out   |  7
2860 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2861 //     Owned by    +--------+
2862 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2863 //        |    new |preserve|      Must be even-aligned.
2864 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2865 //        |        |        |
2866 //
2867 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2868 //         known from SELF's arguments and the Java calling convention.
2869 //         Region 6-7 is determined per call site.
2870 // Note 2: If the calling convention leaves holes in the incoming argument
2871 //         area, those holes are owned by SELF.  Holes in the outgoing area
2872 //         are owned by the CALLEE.  Holes should not be nessecary in the
2873 //         incoming area, as the Java calling convention is completely under
2874 //         the control of the AD file.  Doubles can be sorted and packed to
2875 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2876 //         varargs C calling conventions.
2877 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2878 //         even aligned with pad0 as needed.
2879 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2880 //         region 6-11 is even aligned; it may be padded out more so that
2881 //         the region from SP to FP meets the minimum stack alignment.
2882 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2883 //         alignment.  Region 11, pad1, may be dynamically extended so that
2884 //         SP meets the minimum alignment.
2885 
2886 frame
2887 %{
2888   // What direction does stack grow in (assumed to be same for C & Java)
2889   stack_direction(TOWARDS_LOW);
2890 
2891   // These three registers define part of the calling convention
2892   // between compiled code and the interpreter.
2893   inline_cache_reg(RAX);                // Inline Cache Register
2894   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2895                                         // calling interpreter
2896 
2897   // Optional: name the operand used by cisc-spilling to access
2898   // [stack_pointer + offset]
2899   cisc_spilling_operand_name(indOffset32);
2900 
2901   // Number of stack slots consumed by locking an object
2902   sync_stack_slots(2);
2903 
2904   // Compiled code's Frame Pointer
2905   frame_pointer(RSP);
2906 
2907   // Interpreter stores its frame pointer in a register which is
2908   // stored to the stack by I2CAdaptors.
2909   // I2CAdaptors convert from interpreted java to compiled java.
2910   interpreter_frame_pointer(RBP);
2911 
2912   // Stack alignment requirement
2913   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2914 
2915   // Number of stack slots between incoming argument block and the start of
2916   // a new frame.  The PROLOG must add this many slots to the stack.  The
2917   // EPILOG must remove this many slots.  amd64 needs two slots for
2918   // return address.
2919   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2920 
2921   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2922   // for calls to C.  Supports the var-args backing area for register parms.
2923   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2924 
2925   // The after-PROLOG location of the return address.  Location of
2926   // return address specifies a type (REG or STACK) and a number
2927   // representing the register number (i.e. - use a register name) or
2928   // stack slot.
2929   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2930   // Otherwise, it is above the locks and verification slot and alignment word
2931   return_addr(STACK - 2 +
2932               round_to((Compile::current()->in_preserve_stack_slots() +
2933                         Compile::current()->fixed_slots()),
2934                        stack_alignment_in_slots()));
2935 
2936   // Body of function which returns an integer array locating
2937   // arguments either in registers or in stack slots.  Passed an array
2938   // of ideal registers called "sig" and a "length" count.  Stack-slot
2939   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2940   // arguments for a CALLEE.  Incoming stack arguments are
2941   // automatically biased by the preserve_stack_slots field above.
2942 
2943   calling_convention
2944   %{
2945     // No difference between ingoing/outgoing just pass false
2946     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2947   %}
2948 
2949   c_calling_convention
2950   %{
2951     // This is obviously always outgoing
2952     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2953   %}
2954 
2955   // Location of compiled Java return values.  Same as C for now.
2956   return_value
2957   %{
2958     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2959            "only return normal values");
2960 
2961     static const int lo[Op_RegL + 1] = {
2962       0,
2963       0,
2964       RAX_num,  // Op_RegN
2965       RAX_num,  // Op_RegI
2966       RAX_num,  // Op_RegP
2967       XMM0_num, // Op_RegF
2968       XMM0_num, // Op_RegD
2969       RAX_num   // Op_RegL
2970     };
2971     static const int hi[Op_RegL + 1] = {
2972       0,
2973       0,
2974       OptoReg::Bad, // Op_RegN
2975       OptoReg::Bad, // Op_RegI
2976       RAX_H_num,    // Op_RegP
2977       OptoReg::Bad, // Op_RegF
2978       XMM0b_num,    // Op_RegD
2979       RAX_H_num     // Op_RegL
2980     };
2981     // Excluded flags and vector registers.
2982     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
2983     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2984   %}
2985 %}
2986 
2987 //----------ATTRIBUTES---------------------------------------------------------
2988 //----------Operand Attributes-------------------------------------------------
2989 op_attrib op_cost(0);        // Required cost attribute
2990 
2991 //----------Instruction Attributes---------------------------------------------
2992 ins_attrib ins_cost(100);       // Required cost attribute
2993 ins_attrib ins_size(8);         // Required size attribute (in bits)
2994 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2995                                 // a non-matching short branch variant
2996                                 // of some long branch?
2997 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2998                                 // be a power of 2) specifies the
2999                                 // alignment that some part of the
3000                                 // instruction (not necessarily the
3001                                 // start) requires.  If > 1, a
3002                                 // compute_padding() function must be
3003                                 // provided for the instruction
3004 
3005 //----------OPERANDS-----------------------------------------------------------
3006 // Operand definitions must precede instruction definitions for correct parsing
3007 // in the ADLC because operands constitute user defined types which are used in
3008 // instruction definitions.
3009 
3010 //----------Simple Operands----------------------------------------------------
3011 // Immediate Operands
3012 // Integer Immediate
3013 operand immI()
3014 %{
3015   match(ConI);
3016 
3017   op_cost(10);
3018   format %{ %}
3019   interface(CONST_INTER);
3020 %}
3021 
3022 // Constant for test vs zero
3023 operand immI0()
3024 %{
3025   predicate(n->get_int() == 0);
3026   match(ConI);
3027 
3028   op_cost(0);
3029   format %{ %}
3030   interface(CONST_INTER);
3031 %}
3032 
3033 // Constant for increment
3034 operand immI1()
3035 %{
3036   predicate(n->get_int() == 1);
3037   match(ConI);
3038 
3039   op_cost(0);
3040   format %{ %}
3041   interface(CONST_INTER);
3042 %}
3043 
3044 // Constant for decrement
3045 operand immI_M1()
3046 %{
3047   predicate(n->get_int() == -1);
3048   match(ConI);
3049 
3050   op_cost(0);
3051   format %{ %}
3052   interface(CONST_INTER);
3053 %}
3054 
3055 // Valid scale values for addressing modes
3056 operand immI2()
3057 %{
3058   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3059   match(ConI);
3060 
3061   format %{ %}
3062   interface(CONST_INTER);
3063 %}
3064 
3065 operand immI8()
3066 %{
3067   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3068   match(ConI);
3069 
3070   op_cost(5);
3071   format %{ %}
3072   interface(CONST_INTER);
3073 %}
3074 
3075 operand immI16()
3076 %{
3077   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3078   match(ConI);
3079 
3080   op_cost(10);
3081   format %{ %}
3082   interface(CONST_INTER);
3083 %}
3084 
3085 // Int Immediate non-negative
3086 operand immU31()
3087 %{
3088   predicate(n->get_int() >= 0);
3089   match(ConI);
3090 
3091   op_cost(0);
3092   format %{ %}
3093   interface(CONST_INTER);
3094 %}
3095 
3096 // Constant for long shifts
3097 operand immI_32()
3098 %{
3099   predicate( n->get_int() == 32 );
3100   match(ConI);
3101 
3102   op_cost(0);
3103   format %{ %}
3104   interface(CONST_INTER);
3105 %}
3106 
3107 // Constant for long shifts
3108 operand immI_64()
3109 %{
3110   predicate( n->get_int() == 64 );
3111   match(ConI);
3112 
3113   op_cost(0);
3114   format %{ %}
3115   interface(CONST_INTER);
3116 %}
3117 
3118 // Pointer Immediate
3119 operand immP()
3120 %{
3121   match(ConP);
3122 
3123   op_cost(10);
3124   format %{ %}
3125   interface(CONST_INTER);
3126 %}
3127 
3128 // NULL Pointer Immediate
3129 operand immP0()
3130 %{
3131   predicate(n->get_ptr() == 0);
3132   match(ConP);
3133 
3134   op_cost(5);
3135   format %{ %}
3136   interface(CONST_INTER);
3137 %}
3138 
3139 // Pointer Immediate
3140 operand immN() %{
3141   match(ConN);
3142 
3143   op_cost(10);
3144   format %{ %}
3145   interface(CONST_INTER);
3146 %}
3147 
3148 operand immNKlass() %{
3149   match(ConNKlass);
3150 
3151   op_cost(10);
3152   format %{ %}
3153   interface(CONST_INTER);
3154 %}
3155 
3156 // NULL Pointer Immediate
3157 operand immN0() %{
3158   predicate(n->get_narrowcon() == 0);
3159   match(ConN);
3160 
3161   op_cost(5);
3162   format %{ %}
3163   interface(CONST_INTER);
3164 %}
3165 
3166 operand immP31()
3167 %{
3168   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3169             && (n->get_ptr() >> 31) == 0);
3170   match(ConP);
3171 
3172   op_cost(5);
3173   format %{ %}
3174   interface(CONST_INTER);
3175 %}
3176 
3177 
3178 // Long Immediate
3179 operand immL()
3180 %{
3181   match(ConL);
3182 
3183   op_cost(20);
3184   format %{ %}
3185   interface(CONST_INTER);
3186 %}
3187 
3188 // Long Immediate 8-bit
3189 operand immL8()
3190 %{
3191   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3192   match(ConL);
3193 
3194   op_cost(5);
3195   format %{ %}
3196   interface(CONST_INTER);
3197 %}
3198 
3199 // Long Immediate 32-bit unsigned
3200 operand immUL32()
3201 %{
3202   predicate(n->get_long() == (unsigned int) (n->get_long()));
3203   match(ConL);
3204 
3205   op_cost(10);
3206   format %{ %}
3207   interface(CONST_INTER);
3208 %}
3209 
3210 // Long Immediate 32-bit signed
3211 operand immL32()
3212 %{
3213   predicate(n->get_long() == (int) (n->get_long()));
3214   match(ConL);
3215 
3216   op_cost(15);
3217   format %{ %}
3218   interface(CONST_INTER);
3219 %}
3220 
3221 // Long Immediate zero
3222 operand immL0()
3223 %{
3224   predicate(n->get_long() == 0L);
3225   match(ConL);
3226 
3227   op_cost(10);
3228   format %{ %}
3229   interface(CONST_INTER);
3230 %}
3231 
3232 // Constant for increment
3233 operand immL1()
3234 %{
3235   predicate(n->get_long() == 1);
3236   match(ConL);
3237 
3238   format %{ %}
3239   interface(CONST_INTER);
3240 %}
3241 
3242 // Constant for decrement
3243 operand immL_M1()
3244 %{
3245   predicate(n->get_long() == -1);
3246   match(ConL);
3247 
3248   format %{ %}
3249   interface(CONST_INTER);
3250 %}
3251 
3252 // Long Immediate: the value 10
3253 operand immL10()
3254 %{
3255   predicate(n->get_long() == 10);
3256   match(ConL);
3257 
3258   format %{ %}
3259   interface(CONST_INTER);
3260 %}
3261 
3262 // Long immediate from 0 to 127.
3263 // Used for a shorter form of long mul by 10.
3264 operand immL_127()
3265 %{
3266   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3267   match(ConL);
3268 
3269   op_cost(10);
3270   format %{ %}
3271   interface(CONST_INTER);
3272 %}
3273 
3274 // Long Immediate: low 32-bit mask
3275 operand immL_32bits()
3276 %{
3277   predicate(n->get_long() == 0xFFFFFFFFL);
3278   match(ConL);
3279   op_cost(20);
3280 
3281   format %{ %}
3282   interface(CONST_INTER);
3283 %}
3284 
3285 // Float Immediate zero
3286 operand immF0()
3287 %{
3288   predicate(jint_cast(n->getf()) == 0);
3289   match(ConF);
3290 
3291   op_cost(5);
3292   format %{ %}
3293   interface(CONST_INTER);
3294 %}
3295 
3296 // Float Immediate
3297 operand immF()
3298 %{
3299   match(ConF);
3300 
3301   op_cost(15);
3302   format %{ %}
3303   interface(CONST_INTER);
3304 %}
3305 
3306 // Double Immediate zero
3307 operand immD0()
3308 %{
3309   predicate(jlong_cast(n->getd()) == 0);
3310   match(ConD);
3311 
3312   op_cost(5);
3313   format %{ %}
3314   interface(CONST_INTER);
3315 %}
3316 
3317 // Double Immediate
3318 operand immD()
3319 %{
3320   match(ConD);
3321 
3322   op_cost(15);
3323   format %{ %}
3324   interface(CONST_INTER);
3325 %}
3326 
3327 // Immediates for special shifts (sign extend)
3328 
3329 // Constants for increment
3330 operand immI_16()
3331 %{
3332   predicate(n->get_int() == 16);
3333   match(ConI);
3334 
3335   format %{ %}
3336   interface(CONST_INTER);
3337 %}
3338 
3339 operand immI_24()
3340 %{
3341   predicate(n->get_int() == 24);
3342   match(ConI);
3343 
3344   format %{ %}
3345   interface(CONST_INTER);
3346 %}
3347 
3348 // Constant for byte-wide masking
3349 operand immI_255()
3350 %{
3351   predicate(n->get_int() == 255);
3352   match(ConI);
3353 
3354   format %{ %}
3355   interface(CONST_INTER);
3356 %}
3357 
3358 // Constant for short-wide masking
3359 operand immI_65535()
3360 %{
3361   predicate(n->get_int() == 65535);
3362   match(ConI);
3363 
3364   format %{ %}
3365   interface(CONST_INTER);
3366 %}
3367 
3368 // Constant for byte-wide masking
3369 operand immL_255()
3370 %{
3371   predicate(n->get_long() == 255);
3372   match(ConL);
3373 
3374   format %{ %}
3375   interface(CONST_INTER);
3376 %}
3377 
3378 // Constant for short-wide masking
3379 operand immL_65535()
3380 %{
3381   predicate(n->get_long() == 65535);
3382   match(ConL);
3383 
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 // Register Operands
3389 // Integer Register
3390 operand rRegI()
3391 %{
3392   constraint(ALLOC_IN_RC(int_reg));
3393   match(RegI);
3394 
3395   match(rax_RegI);
3396   match(rbx_RegI);
3397   match(rcx_RegI);
3398   match(rdx_RegI);
3399   match(rdi_RegI);
3400 
3401   format %{ %}
3402   interface(REG_INTER);
3403 %}
3404 
3405 // Special Registers
3406 operand rax_RegI()
3407 %{
3408   constraint(ALLOC_IN_RC(int_rax_reg));
3409   match(RegI);
3410   match(rRegI);
3411 
3412   format %{ "RAX" %}
3413   interface(REG_INTER);
3414 %}
3415 
3416 // Special Registers
3417 operand rbx_RegI()
3418 %{
3419   constraint(ALLOC_IN_RC(int_rbx_reg));
3420   match(RegI);
3421   match(rRegI);
3422 
3423   format %{ "RBX" %}
3424   interface(REG_INTER);
3425 %}
3426 
3427 operand rcx_RegI()
3428 %{
3429   constraint(ALLOC_IN_RC(int_rcx_reg));
3430   match(RegI);
3431   match(rRegI);
3432 
3433   format %{ "RCX" %}
3434   interface(REG_INTER);
3435 %}
3436 
3437 operand rdx_RegI()
3438 %{
3439   constraint(ALLOC_IN_RC(int_rdx_reg));
3440   match(RegI);
3441   match(rRegI);
3442 
3443   format %{ "RDX" %}
3444   interface(REG_INTER);
3445 %}
3446 
3447 operand rdi_RegI()
3448 %{
3449   constraint(ALLOC_IN_RC(int_rdi_reg));
3450   match(RegI);
3451   match(rRegI);
3452 
3453   format %{ "RDI" %}
3454   interface(REG_INTER);
3455 %}
3456 
3457 operand no_rcx_RegI()
3458 %{
3459   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3460   match(RegI);
3461   match(rax_RegI);
3462   match(rbx_RegI);
3463   match(rdx_RegI);
3464   match(rdi_RegI);
3465 
3466   format %{ %}
3467   interface(REG_INTER);
3468 %}
3469 
3470 operand no_rax_rdx_RegI()
3471 %{
3472   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3473   match(RegI);
3474   match(rbx_RegI);
3475   match(rcx_RegI);
3476   match(rdi_RegI);
3477 
3478   format %{ %}
3479   interface(REG_INTER);
3480 %}
3481 
3482 // Pointer Register
3483 operand any_RegP()
3484 %{
3485   constraint(ALLOC_IN_RC(any_reg));
3486   match(RegP);
3487   match(rax_RegP);
3488   match(rbx_RegP);
3489   match(rdi_RegP);
3490   match(rsi_RegP);
3491   match(rbp_RegP);
3492   match(r15_RegP);
3493   match(rRegP);
3494 
3495   format %{ %}
3496   interface(REG_INTER);
3497 %}
3498 
3499 operand rRegP()
3500 %{
3501   constraint(ALLOC_IN_RC(ptr_reg));
3502   match(RegP);
3503   match(rax_RegP);
3504   match(rbx_RegP);
3505   match(rdi_RegP);
3506   match(rsi_RegP);
3507   match(rbp_RegP);
3508   match(r15_RegP);  // See Q&A below about r15_RegP.
3509 
3510   format %{ %}
3511   interface(REG_INTER);
3512 %}
3513 
3514 operand rRegN() %{
3515   constraint(ALLOC_IN_RC(int_reg));
3516   match(RegN);
3517 
3518   format %{ %}
3519   interface(REG_INTER);
3520 %}
3521 
3522 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3523 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3524 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3525 // The output of an instruction is controlled by the allocator, which respects
3526 // register class masks, not match rules.  Unless an instruction mentions
3527 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3528 // by the allocator as an input.
3529 
3530 operand no_rax_RegP()
3531 %{
3532   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3533   match(RegP);
3534   match(rbx_RegP);
3535   match(rsi_RegP);
3536   match(rdi_RegP);
3537 
3538   format %{ %}
3539   interface(REG_INTER);
3540 %}
3541 
3542 operand no_rbp_RegP()
3543 %{
3544   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3545   match(RegP);
3546   match(rbx_RegP);
3547   match(rsi_RegP);
3548   match(rdi_RegP);
3549 
3550   format %{ %}
3551   interface(REG_INTER);
3552 %}
3553 
3554 operand no_rax_rbx_RegP()
3555 %{
3556   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3557   match(RegP);
3558   match(rsi_RegP);
3559   match(rdi_RegP);
3560 
3561   format %{ %}
3562   interface(REG_INTER);
3563 %}
3564 
3565 // Special Registers
3566 // Return a pointer value
3567 operand rax_RegP()
3568 %{
3569   constraint(ALLOC_IN_RC(ptr_rax_reg));
3570   match(RegP);
3571   match(rRegP);
3572 
3573   format %{ %}
3574   interface(REG_INTER);
3575 %}
3576 
3577 // Special Registers
3578 // Return a compressed pointer value
3579 operand rax_RegN()
3580 %{
3581   constraint(ALLOC_IN_RC(int_rax_reg));
3582   match(RegN);
3583   match(rRegN);
3584 
3585   format %{ %}
3586   interface(REG_INTER);
3587 %}
3588 
3589 // Used in AtomicAdd
3590 operand rbx_RegP()
3591 %{
3592   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3593   match(RegP);
3594   match(rRegP);
3595 
3596   format %{ %}
3597   interface(REG_INTER);
3598 %}
3599 
3600 operand rsi_RegP()
3601 %{
3602   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3603   match(RegP);
3604   match(rRegP);
3605 
3606   format %{ %}
3607   interface(REG_INTER);
3608 %}
3609 
3610 // Used in rep stosq
3611 operand rdi_RegP()
3612 %{
3613   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3614   match(RegP);
3615   match(rRegP);
3616 
3617   format %{ %}
3618   interface(REG_INTER);
3619 %}
3620 
3621 operand rbp_RegP()
3622 %{
3623   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3624   match(RegP);
3625   match(rRegP);
3626 
3627   format %{ %}
3628   interface(REG_INTER);
3629 %}
3630 
3631 operand r15_RegP()
3632 %{
3633   constraint(ALLOC_IN_RC(ptr_r15_reg));
3634   match(RegP);
3635   match(rRegP);
3636 
3637   format %{ %}
3638   interface(REG_INTER);
3639 %}
3640 
3641 operand rRegL()
3642 %{
3643   constraint(ALLOC_IN_RC(long_reg));
3644   match(RegL);
3645   match(rax_RegL);
3646   match(rdx_RegL);
3647 
3648   format %{ %}
3649   interface(REG_INTER);
3650 %}
3651 
3652 // Special Registers
3653 operand no_rax_rdx_RegL()
3654 %{
3655   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3656   match(RegL);
3657   match(rRegL);
3658 
3659   format %{ %}
3660   interface(REG_INTER);
3661 %}
3662 
3663 operand no_rax_RegL()
3664 %{
3665   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3666   match(RegL);
3667   match(rRegL);
3668   match(rdx_RegL);
3669 
3670   format %{ %}
3671   interface(REG_INTER);
3672 %}
3673 
3674 operand no_rcx_RegL()
3675 %{
3676   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3677   match(RegL);
3678   match(rRegL);
3679 
3680   format %{ %}
3681   interface(REG_INTER);
3682 %}
3683 
3684 operand rax_RegL()
3685 %{
3686   constraint(ALLOC_IN_RC(long_rax_reg));
3687   match(RegL);
3688   match(rRegL);
3689 
3690   format %{ "RAX" %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 operand rcx_RegL()
3695 %{
3696   constraint(ALLOC_IN_RC(long_rcx_reg));
3697   match(RegL);
3698   match(rRegL);
3699 
3700   format %{ %}
3701   interface(REG_INTER);
3702 %}
3703 
3704 operand rdx_RegL()
3705 %{
3706   constraint(ALLOC_IN_RC(long_rdx_reg));
3707   match(RegL);
3708   match(rRegL);
3709 
3710   format %{ %}
3711   interface(REG_INTER);
3712 %}
3713 
3714 // Flags register, used as output of compare instructions
3715 operand rFlagsReg()
3716 %{
3717   constraint(ALLOC_IN_RC(int_flags));
3718   match(RegFlags);
3719 
3720   format %{ "RFLAGS" %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Flags register, used as output of FLOATING POINT compare instructions
3725 operand rFlagsRegU()
3726 %{
3727   constraint(ALLOC_IN_RC(int_flags));
3728   match(RegFlags);
3729 
3730   format %{ "RFLAGS_U" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand rFlagsRegUCF() %{
3735   constraint(ALLOC_IN_RC(int_flags));
3736   match(RegFlags);
3737   predicate(false);
3738 
3739   format %{ "RFLAGS_U_CF" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 // Float register operands
3744 operand regF()
3745 %{
3746   constraint(ALLOC_IN_RC(float_reg));
3747   match(RegF);
3748 
3749   format %{ %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 // Double register operands
3754 operand regD()
3755 %{
3756   constraint(ALLOC_IN_RC(double_reg));
3757   match(RegD);
3758 
3759   format %{ %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 //----------Memory Operands----------------------------------------------------
3764 // Direct Memory Operand
3765 // operand direct(immP addr)
3766 // %{
3767 //   match(addr);
3768 
3769 //   format %{ "[$addr]" %}
3770 //   interface(MEMORY_INTER) %{
3771 //     base(0xFFFFFFFF);
3772 //     index(0x4);
3773 //     scale(0x0);
3774 //     disp($addr);
3775 //   %}
3776 // %}
3777 
3778 // Indirect Memory Operand
3779 operand indirect(any_RegP reg)
3780 %{
3781   constraint(ALLOC_IN_RC(ptr_reg));
3782   match(reg);
3783 
3784   format %{ "[$reg]" %}
3785   interface(MEMORY_INTER) %{
3786     base($reg);
3787     index(0x4);
3788     scale(0x0);
3789     disp(0x0);
3790   %}
3791 %}
3792 
3793 // Indirect Memory Plus Short Offset Operand
3794 operand indOffset8(any_RegP reg, immL8 off)
3795 %{
3796   constraint(ALLOC_IN_RC(ptr_reg));
3797   match(AddP reg off);
3798 
3799   format %{ "[$reg + $off (8-bit)]" %}
3800   interface(MEMORY_INTER) %{
3801     base($reg);
3802     index(0x4);
3803     scale(0x0);
3804     disp($off);
3805   %}
3806 %}
3807 
3808 // Indirect Memory Plus Long Offset Operand
3809 operand indOffset32(any_RegP reg, immL32 off)
3810 %{
3811   constraint(ALLOC_IN_RC(ptr_reg));
3812   match(AddP reg off);
3813 
3814   format %{ "[$reg + $off (32-bit)]" %}
3815   interface(MEMORY_INTER) %{
3816     base($reg);
3817     index(0x4);
3818     scale(0x0);
3819     disp($off);
3820   %}
3821 %}
3822 
3823 // Indirect Memory Plus Index Register Plus Offset Operand
3824 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3825 %{
3826   constraint(ALLOC_IN_RC(ptr_reg));
3827   match(AddP (AddP reg lreg) off);
3828 
3829   op_cost(10);
3830   format %{"[$reg + $off + $lreg]" %}
3831   interface(MEMORY_INTER) %{
3832     base($reg);
3833     index($lreg);
3834     scale(0x0);
3835     disp($off);
3836   %}
3837 %}
3838 
3839 // Indirect Memory Plus Index Register Plus Offset Operand
3840 operand indIndex(any_RegP reg, rRegL lreg)
3841 %{
3842   constraint(ALLOC_IN_RC(ptr_reg));
3843   match(AddP reg lreg);
3844 
3845   op_cost(10);
3846   format %{"[$reg + $lreg]" %}
3847   interface(MEMORY_INTER) %{
3848     base($reg);
3849     index($lreg);
3850     scale(0x0);
3851     disp(0x0);
3852   %}
3853 %}
3854 
3855 // Indirect Memory Times Scale Plus Index Register
3856 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3857 %{
3858   constraint(ALLOC_IN_RC(ptr_reg));
3859   match(AddP reg (LShiftL lreg scale));
3860 
3861   op_cost(10);
3862   format %{"[$reg + $lreg << $scale]" %}
3863   interface(MEMORY_INTER) %{
3864     base($reg);
3865     index($lreg);
3866     scale($scale);
3867     disp(0x0);
3868   %}
3869 %}
3870 
3871 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3872 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3873 %{
3874   constraint(ALLOC_IN_RC(ptr_reg));
3875   match(AddP (AddP reg (LShiftL lreg scale)) off);
3876 
3877   op_cost(10);
3878   format %{"[$reg + $off + $lreg << $scale]" %}
3879   interface(MEMORY_INTER) %{
3880     base($reg);
3881     index($lreg);
3882     scale($scale);
3883     disp($off);
3884   %}
3885 %}
3886 
3887 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3888 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3889 %{
3890   constraint(ALLOC_IN_RC(ptr_reg));
3891   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3892   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3893 
3894   op_cost(10);
3895   format %{"[$reg + $off + $idx << $scale]" %}
3896   interface(MEMORY_INTER) %{
3897     base($reg);
3898     index($idx);
3899     scale($scale);
3900     disp($off);
3901   %}
3902 %}
3903 
3904 // Indirect Narrow Oop Plus Offset Operand
3905 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3906 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3907 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3908   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3909   constraint(ALLOC_IN_RC(ptr_reg));
3910   match(AddP (DecodeN reg) off);
3911 
3912   op_cost(10);
3913   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3914   interface(MEMORY_INTER) %{
3915     base(0xc); // R12
3916     index($reg);
3917     scale(0x3);
3918     disp($off);
3919   %}
3920 %}
3921 
3922 // Indirect Memory Operand
3923 operand indirectNarrow(rRegN reg)
3924 %{
3925   predicate(Universe::narrow_oop_shift() == 0);
3926   constraint(ALLOC_IN_RC(ptr_reg));
3927   match(DecodeN reg);
3928 
3929   format %{ "[$reg]" %}
3930   interface(MEMORY_INTER) %{
3931     base($reg);
3932     index(0x4);
3933     scale(0x0);
3934     disp(0x0);
3935   %}
3936 %}
3937 
3938 // Indirect Memory Plus Short Offset Operand
3939 operand indOffset8Narrow(rRegN reg, immL8 off)
3940 %{
3941   predicate(Universe::narrow_oop_shift() == 0);
3942   constraint(ALLOC_IN_RC(ptr_reg));
3943   match(AddP (DecodeN reg) off);
3944 
3945   format %{ "[$reg + $off (8-bit)]" %}
3946   interface(MEMORY_INTER) %{
3947     base($reg);
3948     index(0x4);
3949     scale(0x0);
3950     disp($off);
3951   %}
3952 %}
3953 
3954 // Indirect Memory Plus Long Offset Operand
3955 operand indOffset32Narrow(rRegN reg, immL32 off)
3956 %{
3957   predicate(Universe::narrow_oop_shift() == 0);
3958   constraint(ALLOC_IN_RC(ptr_reg));
3959   match(AddP (DecodeN reg) off);
3960 
3961   format %{ "[$reg + $off (32-bit)]" %}
3962   interface(MEMORY_INTER) %{
3963     base($reg);
3964     index(0x4);
3965     scale(0x0);
3966     disp($off);
3967   %}
3968 %}
3969 
3970 // Indirect Memory Plus Index Register Plus Offset Operand
3971 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3972 %{
3973   predicate(Universe::narrow_oop_shift() == 0);
3974   constraint(ALLOC_IN_RC(ptr_reg));
3975   match(AddP (AddP (DecodeN reg) lreg) off);
3976 
3977   op_cost(10);
3978   format %{"[$reg + $off + $lreg]" %}
3979   interface(MEMORY_INTER) %{
3980     base($reg);
3981     index($lreg);
3982     scale(0x0);
3983     disp($off);
3984   %}
3985 %}
3986 
3987 // Indirect Memory Plus Index Register Plus Offset Operand
3988 operand indIndexNarrow(rRegN reg, rRegL lreg)
3989 %{
3990   predicate(Universe::narrow_oop_shift() == 0);
3991   constraint(ALLOC_IN_RC(ptr_reg));
3992   match(AddP (DecodeN reg) lreg);
3993 
3994   op_cost(10);
3995   format %{"[$reg + $lreg]" %}
3996   interface(MEMORY_INTER) %{
3997     base($reg);
3998     index($lreg);
3999     scale(0x0);
4000     disp(0x0);
4001   %}
4002 %}
4003 
4004 // Indirect Memory Times Scale Plus Index Register
4005 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4006 %{
4007   predicate(Universe::narrow_oop_shift() == 0);
4008   constraint(ALLOC_IN_RC(ptr_reg));
4009   match(AddP (DecodeN reg) (LShiftL lreg scale));
4010 
4011   op_cost(10);
4012   format %{"[$reg + $lreg << $scale]" %}
4013   interface(MEMORY_INTER) %{
4014     base($reg);
4015     index($lreg);
4016     scale($scale);
4017     disp(0x0);
4018   %}
4019 %}
4020 
4021 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4022 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4023 %{
4024   predicate(Universe::narrow_oop_shift() == 0);
4025   constraint(ALLOC_IN_RC(ptr_reg));
4026   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4027 
4028   op_cost(10);
4029   format %{"[$reg + $off + $lreg << $scale]" %}
4030   interface(MEMORY_INTER) %{
4031     base($reg);
4032     index($lreg);
4033     scale($scale);
4034     disp($off);
4035   %}
4036 %}
4037 
4038 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4039 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4040 %{
4041   constraint(ALLOC_IN_RC(ptr_reg));
4042   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4043   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4044 
4045   op_cost(10);
4046   format %{"[$reg + $off + $idx << $scale]" %}
4047   interface(MEMORY_INTER) %{
4048     base($reg);
4049     index($idx);
4050     scale($scale);
4051     disp($off);
4052   %}
4053 %}
4054 
4055 //----------Special Memory Operands--------------------------------------------
4056 // Stack Slot Operand - This operand is used for loading and storing temporary
4057 //                      values on the stack where a match requires a value to
4058 //                      flow through memory.
4059 operand stackSlotP(sRegP reg)
4060 %{
4061   constraint(ALLOC_IN_RC(stack_slots));
4062   // No match rule because this operand is only generated in matching
4063 
4064   format %{ "[$reg]" %}
4065   interface(MEMORY_INTER) %{
4066     base(0x4);   // RSP
4067     index(0x4);  // No Index
4068     scale(0x0);  // No Scale
4069     disp($reg);  // Stack Offset
4070   %}
4071 %}
4072 
4073 operand stackSlotI(sRegI reg)
4074 %{
4075   constraint(ALLOC_IN_RC(stack_slots));
4076   // No match rule because this operand is only generated in matching
4077 
4078   format %{ "[$reg]" %}
4079   interface(MEMORY_INTER) %{
4080     base(0x4);   // RSP
4081     index(0x4);  // No Index
4082     scale(0x0);  // No Scale
4083     disp($reg);  // Stack Offset
4084   %}
4085 %}
4086 
4087 operand stackSlotF(sRegF reg)
4088 %{
4089   constraint(ALLOC_IN_RC(stack_slots));
4090   // No match rule because this operand is only generated in matching
4091 
4092   format %{ "[$reg]" %}
4093   interface(MEMORY_INTER) %{
4094     base(0x4);   // RSP
4095     index(0x4);  // No Index
4096     scale(0x0);  // No Scale
4097     disp($reg);  // Stack Offset
4098   %}
4099 %}
4100 
4101 operand stackSlotD(sRegD reg)
4102 %{
4103   constraint(ALLOC_IN_RC(stack_slots));
4104   // No match rule because this operand is only generated in matching
4105 
4106   format %{ "[$reg]" %}
4107   interface(MEMORY_INTER) %{
4108     base(0x4);   // RSP
4109     index(0x4);  // No Index
4110     scale(0x0);  // No Scale
4111     disp($reg);  // Stack Offset
4112   %}
4113 %}
4114 operand stackSlotL(sRegL reg)
4115 %{
4116   constraint(ALLOC_IN_RC(stack_slots));
4117   // No match rule because this operand is only generated in matching
4118 
4119   format %{ "[$reg]" %}
4120   interface(MEMORY_INTER) %{
4121     base(0x4);   // RSP
4122     index(0x4);  // No Index
4123     scale(0x0);  // No Scale
4124     disp($reg);  // Stack Offset
4125   %}
4126 %}
4127 
4128 //----------Conditional Branch Operands----------------------------------------
4129 // Comparison Op  - This is the operation of the comparison, and is limited to
4130 //                  the following set of codes:
4131 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4132 //
4133 // Other attributes of the comparison, such as unsignedness, are specified
4134 // by the comparison instruction that sets a condition code flags register.
4135 // That result is represented by a flags operand whose subtype is appropriate
4136 // to the unsignedness (etc.) of the comparison.
4137 //
4138 // Later, the instruction which matches both the Comparison Op (a Bool) and
4139 // the flags (produced by the Cmp) specifies the coding of the comparison op
4140 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4141 
4142 // Comparision Code
4143 operand cmpOp()
4144 %{
4145   match(Bool);
4146 
4147   format %{ "" %}
4148   interface(COND_INTER) %{
4149     equal(0x4, "e");
4150     not_equal(0x5, "ne");
4151     less(0xC, "l");
4152     greater_equal(0xD, "ge");
4153     less_equal(0xE, "le");
4154     greater(0xF, "g");
4155     overflow(0x0, "o");
4156     no_overflow(0x1, "no");
4157   %}
4158 %}
4159 
4160 // Comparison Code, unsigned compare.  Used by FP also, with
4161 // C2 (unordered) turned into GT or LT already.  The other bits
4162 // C0 and C3 are turned into Carry & Zero flags.
4163 operand cmpOpU()
4164 %{
4165   match(Bool);
4166 
4167   format %{ "" %}
4168   interface(COND_INTER) %{
4169     equal(0x4, "e");
4170     not_equal(0x5, "ne");
4171     less(0x2, "b");
4172     greater_equal(0x3, "nb");
4173     less_equal(0x6, "be");
4174     greater(0x7, "nbe");
4175     overflow(0x0, "o");
4176     no_overflow(0x1, "no");
4177   %}
4178 %}
4179 
4180 
4181 // Floating comparisons that don't require any fixup for the unordered case
4182 operand cmpOpUCF() %{
4183   match(Bool);
4184   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4185             n->as_Bool()->_test._test == BoolTest::ge ||
4186             n->as_Bool()->_test._test == BoolTest::le ||
4187             n->as_Bool()->_test._test == BoolTest::gt);
4188   format %{ "" %}
4189   interface(COND_INTER) %{
4190     equal(0x4, "e");
4191     not_equal(0x5, "ne");
4192     less(0x2, "b");
4193     greater_equal(0x3, "nb");
4194     less_equal(0x6, "be");
4195     greater(0x7, "nbe");
4196     overflow(0x0, "o");
4197     no_overflow(0x1, "no");
4198   %}
4199 %}
4200 
4201 
4202 // Floating comparisons that can be fixed up with extra conditional jumps
4203 operand cmpOpUCF2() %{
4204   match(Bool);
4205   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4206             n->as_Bool()->_test._test == BoolTest::eq);
4207   format %{ "" %}
4208   interface(COND_INTER) %{
4209     equal(0x4, "e");
4210     not_equal(0x5, "ne");
4211     less(0x2, "b");
4212     greater_equal(0x3, "nb");
4213     less_equal(0x6, "be");
4214     greater(0x7, "nbe");
4215     overflow(0x0, "o");
4216     no_overflow(0x1, "no");
4217   %}
4218 %}
4219 
4220 
4221 //----------OPERAND CLASSES----------------------------------------------------
4222 // Operand Classes are groups of operands that are used as to simplify
4223 // instruction definitions by not requiring the AD writer to specify separate
4224 // instructions for every form of operand when the instruction accepts
4225 // multiple operand types with the same basic encoding and format.  The classic
4226 // case of this is memory operands.
4227 
4228 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4229                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4230                indCompressedOopOffset,
4231                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4232                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4233                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
4234 
4235 //----------PIPELINE-----------------------------------------------------------
4236 // Rules which define the behavior of the target architectures pipeline.
4237 pipeline %{
4238 
4239 //----------ATTRIBUTES---------------------------------------------------------
4240 attributes %{
4241   variable_size_instructions;        // Fixed size instructions
4242   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4243   instruction_unit_size = 1;         // An instruction is 1 bytes long
4244   instruction_fetch_unit_size = 16;  // The processor fetches one line
4245   instruction_fetch_units = 1;       // of 16 bytes
4246 
4247   // List of nop instructions
4248   nops( MachNop );
4249 %}
4250 
4251 //----------RESOURCES----------------------------------------------------------
4252 // Resources are the functional units available to the machine
4253 
4254 // Generic P2/P3 pipeline
4255 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4256 // 3 instructions decoded per cycle.
4257 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4258 // 3 ALU op, only ALU0 handles mul instructions.
4259 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4260            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4261            BR, FPU,
4262            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4263 
4264 //----------PIPELINE DESCRIPTION-----------------------------------------------
4265 // Pipeline Description specifies the stages in the machine's pipeline
4266 
4267 // Generic P2/P3 pipeline
4268 pipe_desc(S0, S1, S2, S3, S4, S5);
4269 
4270 //----------PIPELINE CLASSES---------------------------------------------------
4271 // Pipeline Classes describe the stages in which input and output are
4272 // referenced by the hardware pipeline.
4273 
4274 // Naming convention: ialu or fpu
4275 // Then: _reg
4276 // Then: _reg if there is a 2nd register
4277 // Then: _long if it's a pair of instructions implementing a long
4278 // Then: _fat if it requires the big decoder
4279 //   Or: _mem if it requires the big decoder and a memory unit.
4280 
4281 // Integer ALU reg operation
4282 pipe_class ialu_reg(rRegI dst)
4283 %{
4284     single_instruction;
4285     dst    : S4(write);
4286     dst    : S3(read);
4287     DECODE : S0;        // any decoder
4288     ALU    : S3;        // any alu
4289 %}
4290 
4291 // Long ALU reg operation
4292 pipe_class ialu_reg_long(rRegL dst)
4293 %{
4294     instruction_count(2);
4295     dst    : S4(write);
4296     dst    : S3(read);
4297     DECODE : S0(2);     // any 2 decoders
4298     ALU    : S3(2);     // both alus
4299 %}
4300 
4301 // Integer ALU reg operation using big decoder
4302 pipe_class ialu_reg_fat(rRegI dst)
4303 %{
4304     single_instruction;
4305     dst    : S4(write);
4306     dst    : S3(read);
4307     D0     : S0;        // big decoder only
4308     ALU    : S3;        // any alu
4309 %}
4310 
4311 // Long ALU reg operation using big decoder
4312 pipe_class ialu_reg_long_fat(rRegL dst)
4313 %{
4314     instruction_count(2);
4315     dst    : S4(write);
4316     dst    : S3(read);
4317     D0     : S0(2);     // big decoder only; twice
4318     ALU    : S3(2);     // any 2 alus
4319 %}
4320 
4321 // Integer ALU reg-reg operation
4322 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4323 %{
4324     single_instruction;
4325     dst    : S4(write);
4326     src    : S3(read);
4327     DECODE : S0;        // any decoder
4328     ALU    : S3;        // any alu
4329 %}
4330 
4331 // Long ALU reg-reg operation
4332 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4333 %{
4334     instruction_count(2);
4335     dst    : S4(write);
4336     src    : S3(read);
4337     DECODE : S0(2);     // any 2 decoders
4338     ALU    : S3(2);     // both alus
4339 %}
4340 
4341 // Integer ALU reg-reg operation
4342 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4343 %{
4344     single_instruction;
4345     dst    : S4(write);
4346     src    : S3(read);
4347     D0     : S0;        // big decoder only
4348     ALU    : S3;        // any alu
4349 %}
4350 
4351 // Long ALU reg-reg operation
4352 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4353 %{
4354     instruction_count(2);
4355     dst    : S4(write);
4356     src    : S3(read);
4357     D0     : S0(2);     // big decoder only; twice
4358     ALU    : S3(2);     // both alus
4359 %}
4360 
4361 // Integer ALU reg-mem operation
4362 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4363 %{
4364     single_instruction;
4365     dst    : S5(write);
4366     mem    : S3(read);
4367     D0     : S0;        // big decoder only
4368     ALU    : S4;        // any alu
4369     MEM    : S3;        // any mem
4370 %}
4371 
4372 // Integer mem operation (prefetch)
4373 pipe_class ialu_mem(memory mem)
4374 %{
4375     single_instruction;
4376     mem    : S3(read);
4377     D0     : S0;        // big decoder only
4378     MEM    : S3;        // any mem
4379 %}
4380 
4381 // Integer Store to Memory
4382 pipe_class ialu_mem_reg(memory mem, rRegI src)
4383 %{
4384     single_instruction;
4385     mem    : S3(read);
4386     src    : S5(read);
4387     D0     : S0;        // big decoder only
4388     ALU    : S4;        // any alu
4389     MEM    : S3;
4390 %}
4391 
4392 // // Long Store to Memory
4393 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4394 // %{
4395 //     instruction_count(2);
4396 //     mem    : S3(read);
4397 //     src    : S5(read);
4398 //     D0     : S0(2);          // big decoder only; twice
4399 //     ALU    : S4(2);     // any 2 alus
4400 //     MEM    : S3(2);  // Both mems
4401 // %}
4402 
4403 // Integer Store to Memory
4404 pipe_class ialu_mem_imm(memory mem)
4405 %{
4406     single_instruction;
4407     mem    : S3(read);
4408     D0     : S0;        // big decoder only
4409     ALU    : S4;        // any alu
4410     MEM    : S3;
4411 %}
4412 
4413 // Integer ALU0 reg-reg operation
4414 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4415 %{
4416     single_instruction;
4417     dst    : S4(write);
4418     src    : S3(read);
4419     D0     : S0;        // Big decoder only
4420     ALU0   : S3;        // only alu0
4421 %}
4422 
4423 // Integer ALU0 reg-mem operation
4424 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4425 %{
4426     single_instruction;
4427     dst    : S5(write);
4428     mem    : S3(read);
4429     D0     : S0;        // big decoder only
4430     ALU0   : S4;        // ALU0 only
4431     MEM    : S3;        // any mem
4432 %}
4433 
4434 // Integer ALU reg-reg operation
4435 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4436 %{
4437     single_instruction;
4438     cr     : S4(write);
4439     src1   : S3(read);
4440     src2   : S3(read);
4441     DECODE : S0;        // any decoder
4442     ALU    : S3;        // any alu
4443 %}
4444 
4445 // Integer ALU reg-imm operation
4446 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4447 %{
4448     single_instruction;
4449     cr     : S4(write);
4450     src1   : S3(read);
4451     DECODE : S0;        // any decoder
4452     ALU    : S3;        // any alu
4453 %}
4454 
4455 // Integer ALU reg-mem operation
4456 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4457 %{
4458     single_instruction;
4459     cr     : S4(write);
4460     src1   : S3(read);
4461     src2   : S3(read);
4462     D0     : S0;        // big decoder only
4463     ALU    : S4;        // any alu
4464     MEM    : S3;
4465 %}
4466 
4467 // Conditional move reg-reg
4468 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4469 %{
4470     instruction_count(4);
4471     y      : S4(read);
4472     q      : S3(read);
4473     p      : S3(read);
4474     DECODE : S0(4);     // any decoder
4475 %}
4476 
4477 // Conditional move reg-reg
4478 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4479 %{
4480     single_instruction;
4481     dst    : S4(write);
4482     src    : S3(read);
4483     cr     : S3(read);
4484     DECODE : S0;        // any decoder
4485 %}
4486 
4487 // Conditional move reg-mem
4488 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4489 %{
4490     single_instruction;
4491     dst    : S4(write);
4492     src    : S3(read);
4493     cr     : S3(read);
4494     DECODE : S0;        // any decoder
4495     MEM    : S3;
4496 %}
4497 
4498 // Conditional move reg-reg long
4499 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4500 %{
4501     single_instruction;
4502     dst    : S4(write);
4503     src    : S3(read);
4504     cr     : S3(read);
4505     DECODE : S0(2);     // any 2 decoders
4506 %}
4507 
4508 // XXX
4509 // // Conditional move double reg-reg
4510 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4511 // %{
4512 //     single_instruction;
4513 //     dst    : S4(write);
4514 //     src    : S3(read);
4515 //     cr     : S3(read);
4516 //     DECODE : S0;     // any decoder
4517 // %}
4518 
4519 // Float reg-reg operation
4520 pipe_class fpu_reg(regD dst)
4521 %{
4522     instruction_count(2);
4523     dst    : S3(read);
4524     DECODE : S0(2);     // any 2 decoders
4525     FPU    : S3;
4526 %}
4527 
4528 // Float reg-reg operation
4529 pipe_class fpu_reg_reg(regD dst, regD src)
4530 %{
4531     instruction_count(2);
4532     dst    : S4(write);
4533     src    : S3(read);
4534     DECODE : S0(2);     // any 2 decoders
4535     FPU    : S3;
4536 %}
4537 
4538 // Float reg-reg operation
4539 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4540 %{
4541     instruction_count(3);
4542     dst    : S4(write);
4543     src1   : S3(read);
4544     src2   : S3(read);
4545     DECODE : S0(3);     // any 3 decoders
4546     FPU    : S3(2);
4547 %}
4548 
4549 // Float reg-reg operation
4550 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4551 %{
4552     instruction_count(4);
4553     dst    : S4(write);
4554     src1   : S3(read);
4555     src2   : S3(read);
4556     src3   : S3(read);
4557     DECODE : S0(4);     // any 3 decoders
4558     FPU    : S3(2);
4559 %}
4560 
4561 // Float reg-reg operation
4562 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4563 %{
4564     instruction_count(4);
4565     dst    : S4(write);
4566     src1   : S3(read);
4567     src2   : S3(read);
4568     src3   : S3(read);
4569     DECODE : S1(3);     // any 3 decoders
4570     D0     : S0;        // Big decoder only
4571     FPU    : S3(2);
4572     MEM    : S3;
4573 %}
4574 
4575 // Float reg-mem operation
4576 pipe_class fpu_reg_mem(regD dst, memory mem)
4577 %{
4578     instruction_count(2);
4579     dst    : S5(write);
4580     mem    : S3(read);
4581     D0     : S0;        // big decoder only
4582     DECODE : S1;        // any decoder for FPU POP
4583     FPU    : S4;
4584     MEM    : S3;        // any mem
4585 %}
4586 
4587 // Float reg-mem operation
4588 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4589 %{
4590     instruction_count(3);
4591     dst    : S5(write);
4592     src1   : S3(read);
4593     mem    : S3(read);
4594     D0     : S0;        // big decoder only
4595     DECODE : S1(2);     // any decoder for FPU POP
4596     FPU    : S4;
4597     MEM    : S3;        // any mem
4598 %}
4599 
4600 // Float mem-reg operation
4601 pipe_class fpu_mem_reg(memory mem, regD src)
4602 %{
4603     instruction_count(2);
4604     src    : S5(read);
4605     mem    : S3(read);
4606     DECODE : S0;        // any decoder for FPU PUSH
4607     D0     : S1;        // big decoder only
4608     FPU    : S4;
4609     MEM    : S3;        // any mem
4610 %}
4611 
4612 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4613 %{
4614     instruction_count(3);
4615     src1   : S3(read);
4616     src2   : S3(read);
4617     mem    : S3(read);
4618     DECODE : S0(2);     // any decoder for FPU PUSH
4619     D0     : S1;        // big decoder only
4620     FPU    : S4;
4621     MEM    : S3;        // any mem
4622 %}
4623 
4624 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4625 %{
4626     instruction_count(3);
4627     src1   : S3(read);
4628     src2   : S3(read);
4629     mem    : S4(read);
4630     DECODE : S0;        // any decoder for FPU PUSH
4631     D0     : S0(2);     // big decoder only
4632     FPU    : S4;
4633     MEM    : S3(2);     // any mem
4634 %}
4635 
4636 pipe_class fpu_mem_mem(memory dst, memory src1)
4637 %{
4638     instruction_count(2);
4639     src1   : S3(read);
4640     dst    : S4(read);
4641     D0     : S0(2);     // big decoder only
4642     MEM    : S3(2);     // any mem
4643 %}
4644 
4645 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4646 %{
4647     instruction_count(3);
4648     src1   : S3(read);
4649     src2   : S3(read);
4650     dst    : S4(read);
4651     D0     : S0(3);     // big decoder only
4652     FPU    : S4;
4653     MEM    : S3(3);     // any mem
4654 %}
4655 
4656 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4657 %{
4658     instruction_count(3);
4659     src1   : S4(read);
4660     mem    : S4(read);
4661     DECODE : S0;        // any decoder for FPU PUSH
4662     D0     : S0(2);     // big decoder only
4663     FPU    : S4;
4664     MEM    : S3(2);     // any mem
4665 %}
4666 
4667 // Float load constant
4668 pipe_class fpu_reg_con(regD dst)
4669 %{
4670     instruction_count(2);
4671     dst    : S5(write);
4672     D0     : S0;        // big decoder only for the load
4673     DECODE : S1;        // any decoder for FPU POP
4674     FPU    : S4;
4675     MEM    : S3;        // any mem
4676 %}
4677 
4678 // Float load constant
4679 pipe_class fpu_reg_reg_con(regD dst, regD src)
4680 %{
4681     instruction_count(3);
4682     dst    : S5(write);
4683     src    : S3(read);
4684     D0     : S0;        // big decoder only for the load
4685     DECODE : S1(2);     // any decoder for FPU POP
4686     FPU    : S4;
4687     MEM    : S3;        // any mem
4688 %}
4689 
4690 // UnConditional branch
4691 pipe_class pipe_jmp(label labl)
4692 %{
4693     single_instruction;
4694     BR   : S3;
4695 %}
4696 
4697 // Conditional branch
4698 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4699 %{
4700     single_instruction;
4701     cr    : S1(read);
4702     BR    : S3;
4703 %}
4704 
4705 // Allocation idiom
4706 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4707 %{
4708     instruction_count(1); force_serialization;
4709     fixed_latency(6);
4710     heap_ptr : S3(read);
4711     DECODE   : S0(3);
4712     D0       : S2;
4713     MEM      : S3;
4714     ALU      : S3(2);
4715     dst      : S5(write);
4716     BR       : S5;
4717 %}
4718 
4719 // Generic big/slow expanded idiom
4720 pipe_class pipe_slow()
4721 %{
4722     instruction_count(10); multiple_bundles; force_serialization;
4723     fixed_latency(100);
4724     D0  : S0(2);
4725     MEM : S3(2);
4726 %}
4727 
4728 // The real do-nothing guy
4729 pipe_class empty()
4730 %{
4731     instruction_count(0);
4732 %}
4733 
4734 // Define the class for the Nop node
4735 define
4736 %{
4737    MachNop = empty;
4738 %}
4739 
4740 %}
4741 
4742 //----------INSTRUCTIONS-------------------------------------------------------
4743 //
4744 // match      -- States which machine-independent subtree may be replaced
4745 //               by this instruction.
4746 // ins_cost   -- The estimated cost of this instruction is used by instruction
4747 //               selection to identify a minimum cost tree of machine
4748 //               instructions that matches a tree of machine-independent
4749 //               instructions.
4750 // format     -- A string providing the disassembly for this instruction.
4751 //               The value of an instruction's operand may be inserted
4752 //               by referring to it with a '$' prefix.
4753 // opcode     -- Three instruction opcodes may be provided.  These are referred
4754 //               to within an encode class as $primary, $secondary, and $tertiary
4755 //               rrspectively.  The primary opcode is commonly used to
4756 //               indicate the type of machine instruction, while secondary
4757 //               and tertiary are often used for prefix options or addressing
4758 //               modes.
4759 // ins_encode -- A list of encode classes with parameters. The encode class
4760 //               name must have been defined in an 'enc_class' specification
4761 //               in the encode section of the architecture description.
4762 
4763 
4764 //----------Load/Store/Move Instructions---------------------------------------
4765 //----------Load Instructions--------------------------------------------------
4766 
4767 // Load Byte (8 bit signed)
4768 instruct loadB(rRegI dst, memory mem)
4769 %{
4770   match(Set dst (LoadB mem));
4771 
4772   ins_cost(125);
4773   format %{ "movsbl  $dst, $mem\t# byte" %}
4774 
4775   ins_encode %{
4776     __ movsbl($dst$$Register, $mem$$Address);
4777   %}
4778 
4779   ins_pipe(ialu_reg_mem);
4780 %}
4781 
4782 // Load Byte (8 bit signed) into Long Register
4783 instruct loadB2L(rRegL dst, memory mem)
4784 %{
4785   match(Set dst (ConvI2L (LoadB mem)));
4786 
4787   ins_cost(125);
4788   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4789 
4790   ins_encode %{
4791     __ movsbq($dst$$Register, $mem$$Address);
4792   %}
4793 
4794   ins_pipe(ialu_reg_mem);
4795 %}
4796 
4797 // Load Unsigned Byte (8 bit UNsigned)
4798 instruct loadUB(rRegI dst, memory mem)
4799 %{
4800   match(Set dst (LoadUB mem));
4801 
4802   ins_cost(125);
4803   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4804 
4805   ins_encode %{
4806     __ movzbl($dst$$Register, $mem$$Address);
4807   %}
4808 
4809   ins_pipe(ialu_reg_mem);
4810 %}
4811 
4812 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4813 instruct loadUB2L(rRegL dst, memory mem)
4814 %{
4815   match(Set dst (ConvI2L (LoadUB mem)));
4816 
4817   ins_cost(125);
4818   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4819 
4820   ins_encode %{
4821     __ movzbq($dst$$Register, $mem$$Address);
4822   %}
4823 
4824   ins_pipe(ialu_reg_mem);
4825 %}
4826 
4827 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
4828 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
4829   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4830   effect(KILL cr);
4831 
4832   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
4833             "andl    $dst, $mask" %}
4834   ins_encode %{
4835     Register Rdst = $dst$$Register;
4836     __ movzbq(Rdst, $mem$$Address);
4837     __ andl(Rdst, $mask$$constant);
4838   %}
4839   ins_pipe(ialu_reg_mem);
4840 %}
4841 
4842 // Load Short (16 bit signed)
4843 instruct loadS(rRegI dst, memory mem)
4844 %{
4845   match(Set dst (LoadS mem));
4846 
4847   ins_cost(125);
4848   format %{ "movswl $dst, $mem\t# short" %}
4849 
4850   ins_encode %{
4851     __ movswl($dst$$Register, $mem$$Address);
4852   %}
4853 
4854   ins_pipe(ialu_reg_mem);
4855 %}
4856 
4857 // Load Short (16 bit signed) to Byte (8 bit signed)
4858 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4859   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4860 
4861   ins_cost(125);
4862   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4863   ins_encode %{
4864     __ movsbl($dst$$Register, $mem$$Address);
4865   %}
4866   ins_pipe(ialu_reg_mem);
4867 %}
4868 
4869 // Load Short (16 bit signed) into Long Register
4870 instruct loadS2L(rRegL dst, memory mem)
4871 %{
4872   match(Set dst (ConvI2L (LoadS mem)));
4873 
4874   ins_cost(125);
4875   format %{ "movswq $dst, $mem\t# short -> long" %}
4876 
4877   ins_encode %{
4878     __ movswq($dst$$Register, $mem$$Address);
4879   %}
4880 
4881   ins_pipe(ialu_reg_mem);
4882 %}
4883 
4884 // Load Unsigned Short/Char (16 bit UNsigned)
4885 instruct loadUS(rRegI dst, memory mem)
4886 %{
4887   match(Set dst (LoadUS mem));
4888 
4889   ins_cost(125);
4890   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
4891 
4892   ins_encode %{
4893     __ movzwl($dst$$Register, $mem$$Address);
4894   %}
4895 
4896   ins_pipe(ialu_reg_mem);
4897 %}
4898 
4899 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
4900 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4901   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
4902 
4903   ins_cost(125);
4904   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
4905   ins_encode %{
4906     __ movsbl($dst$$Register, $mem$$Address);
4907   %}
4908   ins_pipe(ialu_reg_mem);
4909 %}
4910 
4911 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
4912 instruct loadUS2L(rRegL dst, memory mem)
4913 %{
4914   match(Set dst (ConvI2L (LoadUS mem)));
4915 
4916   ins_cost(125);
4917   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
4918 
4919   ins_encode %{
4920     __ movzwq($dst$$Register, $mem$$Address);
4921   %}
4922 
4923   ins_pipe(ialu_reg_mem);
4924 %}
4925 
4926 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
4927 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
4928   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4929 
4930   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
4931   ins_encode %{
4932     __ movzbq($dst$$Register, $mem$$Address);
4933   %}
4934   ins_pipe(ialu_reg_mem);
4935 %}
4936 
4937 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
4938 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
4939   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4940   effect(KILL cr);
4941 
4942   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
4943             "andl    $dst, $mask" %}
4944   ins_encode %{
4945     Register Rdst = $dst$$Register;
4946     __ movzwq(Rdst, $mem$$Address);
4947     __ andl(Rdst, $mask$$constant);
4948   %}
4949   ins_pipe(ialu_reg_mem);
4950 %}
4951 
4952 // Load Integer
4953 instruct loadI(rRegI dst, memory mem)
4954 %{
4955   match(Set dst (LoadI mem));
4956 
4957   ins_cost(125);
4958   format %{ "movl    $dst, $mem\t# int" %}
4959 
4960   ins_encode %{
4961     __ movl($dst$$Register, $mem$$Address);
4962   %}
4963 
4964   ins_pipe(ialu_reg_mem);
4965 %}
4966 
4967 // Load Integer (32 bit signed) to Byte (8 bit signed)
4968 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4969   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
4970 
4971   ins_cost(125);
4972   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
4973   ins_encode %{
4974     __ movsbl($dst$$Register, $mem$$Address);
4975   %}
4976   ins_pipe(ialu_reg_mem);
4977 %}
4978 
4979 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
4980 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
4981   match(Set dst (AndI (LoadI mem) mask));
4982 
4983   ins_cost(125);
4984   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
4985   ins_encode %{
4986     __ movzbl($dst$$Register, $mem$$Address);
4987   %}
4988   ins_pipe(ialu_reg_mem);
4989 %}
4990 
4991 // Load Integer (32 bit signed) to Short (16 bit signed)
4992 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
4993   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
4994 
4995   ins_cost(125);
4996   format %{ "movswl  $dst, $mem\t# int -> short" %}
4997   ins_encode %{
4998     __ movswl($dst$$Register, $mem$$Address);
4999   %}
5000   ins_pipe(ialu_reg_mem);
5001 %}
5002 
5003 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5004 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5005   match(Set dst (AndI (LoadI mem) mask));
5006 
5007   ins_cost(125);
5008   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5009   ins_encode %{
5010     __ movzwl($dst$$Register, $mem$$Address);
5011   %}
5012   ins_pipe(ialu_reg_mem);
5013 %}
5014 
5015 // Load Integer into Long Register
5016 instruct loadI2L(rRegL dst, memory mem)
5017 %{
5018   match(Set dst (ConvI2L (LoadI mem)));
5019 
5020   ins_cost(125);
5021   format %{ "movslq  $dst, $mem\t# int -> long" %}
5022 
5023   ins_encode %{
5024     __ movslq($dst$$Register, $mem$$Address);
5025   %}
5026 
5027   ins_pipe(ialu_reg_mem);
5028 %}
5029 
5030 // Load Integer with mask 0xFF into Long Register
5031 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5032   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5033 
5034   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5035   ins_encode %{
5036     __ movzbq($dst$$Register, $mem$$Address);
5037   %}
5038   ins_pipe(ialu_reg_mem);
5039 %}
5040 
5041 // Load Integer with mask 0xFFFF into Long Register
5042 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5043   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5044 
5045   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5046   ins_encode %{
5047     __ movzwq($dst$$Register, $mem$$Address);
5048   %}
5049   ins_pipe(ialu_reg_mem);
5050 %}
5051 
5052 // Load Integer with a 31-bit mask into Long Register
5053 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5054   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5055   effect(KILL cr);
5056 
5057   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5058             "andl    $dst, $mask" %}
5059   ins_encode %{
5060     Register Rdst = $dst$$Register;
5061     __ movl(Rdst, $mem$$Address);
5062     __ andl(Rdst, $mask$$constant);
5063   %}
5064   ins_pipe(ialu_reg_mem);
5065 %}
5066 
5067 // Load Unsigned Integer into Long Register
5068 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
5069 %{
5070   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5071 
5072   ins_cost(125);
5073   format %{ "movl    $dst, $mem\t# uint -> long" %}
5074 
5075   ins_encode %{
5076     __ movl($dst$$Register, $mem$$Address);
5077   %}
5078 
5079   ins_pipe(ialu_reg_mem);
5080 %}
5081 
5082 // Load Long
5083 instruct loadL(rRegL dst, memory mem)
5084 %{
5085   match(Set dst (LoadL mem));
5086 
5087   ins_cost(125);
5088   format %{ "movq    $dst, $mem\t# long" %}
5089 
5090   ins_encode %{
5091     __ movq($dst$$Register, $mem$$Address);
5092   %}
5093 
5094   ins_pipe(ialu_reg_mem); // XXX
5095 %}
5096 
5097 // Load Range
5098 instruct loadRange(rRegI dst, memory mem)
5099 %{
5100   match(Set dst (LoadRange mem));
5101 
5102   ins_cost(125); // XXX
5103   format %{ "movl    $dst, $mem\t# range" %}
5104   opcode(0x8B);
5105   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5106   ins_pipe(ialu_reg_mem);
5107 %}
5108 
5109 // Load Pointer
5110 instruct loadP(rRegP dst, memory mem)
5111 %{
5112   match(Set dst (LoadP mem));
5113 
5114   ins_cost(125); // XXX
5115   format %{ "movq    $dst, $mem\t# ptr" %}
5116   opcode(0x8B);
5117   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5118   ins_pipe(ialu_reg_mem); // XXX
5119 %}
5120 
5121 // Load Compressed Pointer
5122 instruct loadN(rRegN dst, memory mem)
5123 %{
5124    match(Set dst (LoadN mem));
5125 
5126    ins_cost(125); // XXX
5127    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5128    ins_encode %{
5129      __ movl($dst$$Register, $mem$$Address);
5130    %}
5131    ins_pipe(ialu_reg_mem); // XXX
5132 %}
5133 
5134 
5135 // Load Klass Pointer
5136 instruct loadKlass(rRegP dst, memory mem)
5137 %{
5138   match(Set dst (LoadKlass mem));
5139 
5140   ins_cost(125); // XXX
5141   format %{ "movq    $dst, $mem\t# class" %}
5142   opcode(0x8B);
5143   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5144   ins_pipe(ialu_reg_mem); // XXX
5145 %}
5146 
5147 // Load narrow Klass Pointer
5148 instruct loadNKlass(rRegN dst, memory mem)
5149 %{
5150   match(Set dst (LoadNKlass mem));
5151 
5152   ins_cost(125); // XXX
5153   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5154   ins_encode %{
5155     __ movl($dst$$Register, $mem$$Address);
5156   %}
5157   ins_pipe(ialu_reg_mem); // XXX
5158 %}
5159 
5160 // Load Float
5161 instruct loadF(regF dst, memory mem)
5162 %{
5163   match(Set dst (LoadF mem));
5164 
5165   ins_cost(145); // XXX
5166   format %{ "movss   $dst, $mem\t# float" %}
5167   ins_encode %{
5168     __ movflt($dst$$XMMRegister, $mem$$Address);
5169   %}
5170   ins_pipe(pipe_slow); // XXX
5171 %}
5172 
5173 // Load Double
5174 instruct loadD_partial(regD dst, memory mem)
5175 %{
5176   predicate(!UseXmmLoadAndClearUpper);
5177   match(Set dst (LoadD mem));
5178 
5179   ins_cost(145); // XXX
5180   format %{ "movlpd  $dst, $mem\t# double" %}
5181   ins_encode %{
5182     __ movdbl($dst$$XMMRegister, $mem$$Address);
5183   %}
5184   ins_pipe(pipe_slow); // XXX
5185 %}
5186 
5187 instruct loadD(regD dst, memory mem)
5188 %{
5189   predicate(UseXmmLoadAndClearUpper);
5190   match(Set dst (LoadD mem));
5191 
5192   ins_cost(145); // XXX
5193   format %{ "movsd   $dst, $mem\t# double" %}
5194   ins_encode %{
5195     __ movdbl($dst$$XMMRegister, $mem$$Address);
5196   %}
5197   ins_pipe(pipe_slow); // XXX
5198 %}
5199 
5200 // Load Effective Address
5201 instruct leaP8(rRegP dst, indOffset8 mem)
5202 %{
5203   match(Set dst mem);
5204 
5205   ins_cost(110); // XXX
5206   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5207   opcode(0x8D);
5208   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5209   ins_pipe(ialu_reg_reg_fat);
5210 %}
5211 
5212 instruct leaP32(rRegP dst, indOffset32 mem)
5213 %{
5214   match(Set dst mem);
5215 
5216   ins_cost(110);
5217   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5218   opcode(0x8D);
5219   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5220   ins_pipe(ialu_reg_reg_fat);
5221 %}
5222 
5223 // instruct leaPIdx(rRegP dst, indIndex mem)
5224 // %{
5225 //   match(Set dst mem);
5226 
5227 //   ins_cost(110);
5228 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5229 //   opcode(0x8D);
5230 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5231 //   ins_pipe(ialu_reg_reg_fat);
5232 // %}
5233 
5234 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5235 %{
5236   match(Set dst mem);
5237 
5238   ins_cost(110);
5239   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5240   opcode(0x8D);
5241   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5242   ins_pipe(ialu_reg_reg_fat);
5243 %}
5244 
5245 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5246 %{
5247   match(Set dst mem);
5248 
5249   ins_cost(110);
5250   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5251   opcode(0x8D);
5252   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5253   ins_pipe(ialu_reg_reg_fat);
5254 %}
5255 
5256 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5257 %{
5258   match(Set dst mem);
5259 
5260   ins_cost(110);
5261   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5262   opcode(0x8D);
5263   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5264   ins_pipe(ialu_reg_reg_fat);
5265 %}
5266 
5267 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5268 %{
5269   match(Set dst mem);
5270 
5271   ins_cost(110);
5272   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5273   opcode(0x8D);
5274   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5275   ins_pipe(ialu_reg_reg_fat);
5276 %}
5277 
5278 // Load Effective Address which uses Narrow (32-bits) oop
5279 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5280 %{
5281   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5282   match(Set dst mem);
5283 
5284   ins_cost(110);
5285   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5286   opcode(0x8D);
5287   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5288   ins_pipe(ialu_reg_reg_fat);
5289 %}
5290 
5291 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5292 %{
5293   predicate(Universe::narrow_oop_shift() == 0);
5294   match(Set dst mem);
5295 
5296   ins_cost(110); // XXX
5297   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5298   opcode(0x8D);
5299   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5300   ins_pipe(ialu_reg_reg_fat);
5301 %}
5302 
5303 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5304 %{
5305   predicate(Universe::narrow_oop_shift() == 0);
5306   match(Set dst mem);
5307 
5308   ins_cost(110);
5309   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5310   opcode(0x8D);
5311   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5312   ins_pipe(ialu_reg_reg_fat);
5313 %}
5314 
5315 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5316 %{
5317   predicate(Universe::narrow_oop_shift() == 0);
5318   match(Set dst mem);
5319 
5320   ins_cost(110);
5321   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5322   opcode(0x8D);
5323   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5324   ins_pipe(ialu_reg_reg_fat);
5325 %}
5326 
5327 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5328 %{
5329   predicate(Universe::narrow_oop_shift() == 0);
5330   match(Set dst mem);
5331 
5332   ins_cost(110);
5333   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5334   opcode(0x8D);
5335   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5336   ins_pipe(ialu_reg_reg_fat);
5337 %}
5338 
5339 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5340 %{
5341   predicate(Universe::narrow_oop_shift() == 0);
5342   match(Set dst mem);
5343 
5344   ins_cost(110);
5345   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5346   opcode(0x8D);
5347   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5348   ins_pipe(ialu_reg_reg_fat);
5349 %}
5350 
5351 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5352 %{
5353   predicate(Universe::narrow_oop_shift() == 0);
5354   match(Set dst mem);
5355 
5356   ins_cost(110);
5357   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5358   opcode(0x8D);
5359   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5360   ins_pipe(ialu_reg_reg_fat);
5361 %}
5362 
5363 instruct loadConI(rRegI dst, immI src)
5364 %{
5365   match(Set dst src);
5366 
5367   format %{ "movl    $dst, $src\t# int" %}
5368   ins_encode(load_immI(dst, src));
5369   ins_pipe(ialu_reg_fat); // XXX
5370 %}
5371 
5372 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5373 %{
5374   match(Set dst src);
5375   effect(KILL cr);
5376 
5377   ins_cost(50);
5378   format %{ "xorl    $dst, $dst\t# int" %}
5379   opcode(0x33); /* + rd */
5380   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5381   ins_pipe(ialu_reg);
5382 %}
5383 
5384 instruct loadConL(rRegL dst, immL src)
5385 %{
5386   match(Set dst src);
5387 
5388   ins_cost(150);
5389   format %{ "movq    $dst, $src\t# long" %}
5390   ins_encode(load_immL(dst, src));
5391   ins_pipe(ialu_reg);
5392 %}
5393 
5394 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5395 %{
5396   match(Set dst src);
5397   effect(KILL cr);
5398 
5399   ins_cost(50);
5400   format %{ "xorl    $dst, $dst\t# long" %}
5401   opcode(0x33); /* + rd */
5402   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5403   ins_pipe(ialu_reg); // XXX
5404 %}
5405 
5406 instruct loadConUL32(rRegL dst, immUL32 src)
5407 %{
5408   match(Set dst src);
5409 
5410   ins_cost(60);
5411   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5412   ins_encode(load_immUL32(dst, src));
5413   ins_pipe(ialu_reg);
5414 %}
5415 
5416 instruct loadConL32(rRegL dst, immL32 src)
5417 %{
5418   match(Set dst src);
5419 
5420   ins_cost(70);
5421   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5422   ins_encode(load_immL32(dst, src));
5423   ins_pipe(ialu_reg);
5424 %}
5425 
5426 instruct loadConP(rRegP dst, immP con) %{
5427   match(Set dst con);
5428 
5429   format %{ "movq    $dst, $con\t# ptr" %}
5430   ins_encode(load_immP(dst, con));
5431   ins_pipe(ialu_reg_fat); // XXX
5432 %}
5433 
5434 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5435 %{
5436   match(Set dst src);
5437   effect(KILL cr);
5438 
5439   ins_cost(50);
5440   format %{ "xorl    $dst, $dst\t# ptr" %}
5441   opcode(0x33); /* + rd */
5442   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5443   ins_pipe(ialu_reg);
5444 %}
5445 
5446 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5447 %{
5448   match(Set dst src);
5449   effect(KILL cr);
5450 
5451   ins_cost(60);
5452   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5453   ins_encode(load_immP31(dst, src));
5454   ins_pipe(ialu_reg);
5455 %}
5456 
5457 instruct loadConF(regF dst, immF con) %{
5458   match(Set dst con);
5459   ins_cost(125);
5460   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5461   ins_encode %{
5462     __ movflt($dst$$XMMRegister, $constantaddress($con));
5463   %}
5464   ins_pipe(pipe_slow);
5465 %}
5466 
5467 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5468   match(Set dst src);
5469   effect(KILL cr);
5470   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5471   ins_encode %{
5472     __ xorq($dst$$Register, $dst$$Register);
5473   %}
5474   ins_pipe(ialu_reg);
5475 %}
5476 
5477 instruct loadConN(rRegN dst, immN src) %{
5478   match(Set dst src);
5479 
5480   ins_cost(125);
5481   format %{ "movl    $dst, $src\t# compressed ptr" %}
5482   ins_encode %{
5483     address con = (address)$src$$constant;
5484     if (con == NULL) {
5485       ShouldNotReachHere();
5486     } else {
5487       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5488     }
5489   %}
5490   ins_pipe(ialu_reg_fat); // XXX
5491 %}
5492 
5493 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5494   match(Set dst src);
5495 
5496   ins_cost(125);
5497   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5498   ins_encode %{
5499     address con = (address)$src$$constant;
5500     if (con == NULL) {
5501       ShouldNotReachHere();
5502     } else {
5503       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5504     }
5505   %}
5506   ins_pipe(ialu_reg_fat); // XXX
5507 %}
5508 
5509 instruct loadConF0(regF dst, immF0 src)
5510 %{
5511   match(Set dst src);
5512   ins_cost(100);
5513 
5514   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5515   ins_encode %{
5516     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5517   %}
5518   ins_pipe(pipe_slow);
5519 %}
5520 
5521 // Use the same format since predicate() can not be used here.
5522 instruct loadConD(regD dst, immD con) %{
5523   match(Set dst con);
5524   ins_cost(125);
5525   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5526   ins_encode %{
5527     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5528   %}
5529   ins_pipe(pipe_slow);
5530 %}
5531 
5532 instruct loadConD0(regD dst, immD0 src)
5533 %{
5534   match(Set dst src);
5535   ins_cost(100);
5536 
5537   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5538   ins_encode %{
5539     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5540   %}
5541   ins_pipe(pipe_slow);
5542 %}
5543 
5544 instruct loadSSI(rRegI dst, stackSlotI src)
5545 %{
5546   match(Set dst src);
5547 
5548   ins_cost(125);
5549   format %{ "movl    $dst, $src\t# int stk" %}
5550   opcode(0x8B);
5551   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5552   ins_pipe(ialu_reg_mem);
5553 %}
5554 
5555 instruct loadSSL(rRegL dst, stackSlotL src)
5556 %{
5557   match(Set dst src);
5558 
5559   ins_cost(125);
5560   format %{ "movq    $dst, $src\t# long stk" %}
5561   opcode(0x8B);
5562   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5563   ins_pipe(ialu_reg_mem);
5564 %}
5565 
5566 instruct loadSSP(rRegP dst, stackSlotP src)
5567 %{
5568   match(Set dst src);
5569 
5570   ins_cost(125);
5571   format %{ "movq    $dst, $src\t# ptr stk" %}
5572   opcode(0x8B);
5573   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5574   ins_pipe(ialu_reg_mem);
5575 %}
5576 
5577 instruct loadSSF(regF dst, stackSlotF src)
5578 %{
5579   match(Set dst src);
5580 
5581   ins_cost(125);
5582   format %{ "movss   $dst, $src\t# float stk" %}
5583   ins_encode %{
5584     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5585   %}
5586   ins_pipe(pipe_slow); // XXX
5587 %}
5588 
5589 // Use the same format since predicate() can not be used here.
5590 instruct loadSSD(regD dst, stackSlotD src)
5591 %{
5592   match(Set dst src);
5593 
5594   ins_cost(125);
5595   format %{ "movsd   $dst, $src\t# double stk" %}
5596   ins_encode  %{
5597     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5598   %}
5599   ins_pipe(pipe_slow); // XXX
5600 %}
5601 
5602 // Prefetch instructions.
5603 // Must be safe to execute with invalid address (cannot fault).
5604 
5605 instruct prefetchr( memory mem ) %{
5606   predicate(ReadPrefetchInstr==3);
5607   match(PrefetchRead mem);
5608   ins_cost(125);
5609 
5610   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5611   ins_encode %{
5612     __ prefetchr($mem$$Address);
5613   %}
5614   ins_pipe(ialu_mem);
5615 %}
5616 
5617 instruct prefetchrNTA( memory mem ) %{
5618   predicate(ReadPrefetchInstr==0);
5619   match(PrefetchRead mem);
5620   ins_cost(125);
5621 
5622   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5623   ins_encode %{
5624     __ prefetchnta($mem$$Address);
5625   %}
5626   ins_pipe(ialu_mem);
5627 %}
5628 
5629 instruct prefetchrT0( memory mem ) %{
5630   predicate(ReadPrefetchInstr==1);
5631   match(PrefetchRead mem);
5632   ins_cost(125);
5633 
5634   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5635   ins_encode %{
5636     __ prefetcht0($mem$$Address);
5637   %}
5638   ins_pipe(ialu_mem);
5639 %}
5640 
5641 instruct prefetchrT2( memory mem ) %{
5642   predicate(ReadPrefetchInstr==2);
5643   match(PrefetchRead mem);
5644   ins_cost(125);
5645 
5646   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5647   ins_encode %{
5648     __ prefetcht2($mem$$Address);
5649   %}
5650   ins_pipe(ialu_mem);
5651 %}
5652 
5653 instruct prefetchwNTA( memory mem ) %{
5654   match(PrefetchWrite mem);
5655   ins_cost(125);
5656 
5657   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5658   ins_encode %{
5659     __ prefetchnta($mem$$Address);
5660   %}
5661   ins_pipe(ialu_mem);
5662 %}
5663 
5664 // Prefetch instructions for allocation.
5665 
5666 instruct prefetchAlloc( memory mem ) %{
5667   predicate(AllocatePrefetchInstr==3);
5668   match(PrefetchAllocation mem);
5669   ins_cost(125);
5670 
5671   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5672   ins_encode %{
5673     __ prefetchw($mem$$Address);
5674   %}
5675   ins_pipe(ialu_mem);
5676 %}
5677 
5678 instruct prefetchAllocNTA( memory mem ) %{
5679   predicate(AllocatePrefetchInstr==0);
5680   match(PrefetchAllocation mem);
5681   ins_cost(125);
5682 
5683   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5684   ins_encode %{
5685     __ prefetchnta($mem$$Address);
5686   %}
5687   ins_pipe(ialu_mem);
5688 %}
5689 
5690 instruct prefetchAllocT0( memory mem ) %{
5691   predicate(AllocatePrefetchInstr==1);
5692   match(PrefetchAllocation mem);
5693   ins_cost(125);
5694 
5695   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5696   ins_encode %{
5697     __ prefetcht0($mem$$Address);
5698   %}
5699   ins_pipe(ialu_mem);
5700 %}
5701 
5702 instruct prefetchAllocT2( memory mem ) %{
5703   predicate(AllocatePrefetchInstr==2);
5704   match(PrefetchAllocation mem);
5705   ins_cost(125);
5706 
5707   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5708   ins_encode %{
5709     __ prefetcht2($mem$$Address);
5710   %}
5711   ins_pipe(ialu_mem);
5712 %}
5713 
5714 //----------Store Instructions-------------------------------------------------
5715 
5716 // Store Byte
5717 instruct storeB(memory mem, rRegI src)
5718 %{
5719   match(Set mem (StoreB mem src));
5720 
5721   ins_cost(125); // XXX
5722   format %{ "movb    $mem, $src\t# byte" %}
5723   opcode(0x88);
5724   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5725   ins_pipe(ialu_mem_reg);
5726 %}
5727 
5728 // Store Char/Short
5729 instruct storeC(memory mem, rRegI src)
5730 %{
5731   match(Set mem (StoreC mem src));
5732 
5733   ins_cost(125); // XXX
5734   format %{ "movw    $mem, $src\t# char/short" %}
5735   opcode(0x89);
5736   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5737   ins_pipe(ialu_mem_reg);
5738 %}
5739 
5740 // Store Integer
5741 instruct storeI(memory mem, rRegI src)
5742 %{
5743   match(Set mem (StoreI mem src));
5744 
5745   ins_cost(125); // XXX
5746   format %{ "movl    $mem, $src\t# int" %}
5747   opcode(0x89);
5748   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5749   ins_pipe(ialu_mem_reg);
5750 %}
5751 
5752 // Store Long
5753 instruct storeL(memory mem, rRegL src)
5754 %{
5755   match(Set mem (StoreL mem src));
5756 
5757   ins_cost(125); // XXX
5758   format %{ "movq    $mem, $src\t# long" %}
5759   opcode(0x89);
5760   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5761   ins_pipe(ialu_mem_reg); // XXX
5762 %}
5763 
5764 // Store Pointer
5765 instruct storeP(memory mem, any_RegP src)
5766 %{
5767   match(Set mem (StoreP mem src));
5768 
5769   ins_cost(125); // XXX
5770   format %{ "movq    $mem, $src\t# ptr" %}
5771   opcode(0x89);
5772   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5773   ins_pipe(ialu_mem_reg);
5774 %}
5775 
5776 instruct storeImmP0(memory mem, immP0 zero)
5777 %{
5778   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5779   match(Set mem (StoreP mem zero));
5780 
5781   ins_cost(125); // XXX
5782   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5783   ins_encode %{
5784     __ movq($mem$$Address, r12);
5785   %}
5786   ins_pipe(ialu_mem_reg);
5787 %}
5788 
5789 // Store NULL Pointer, mark word, or other simple pointer constant.
5790 instruct storeImmP(memory mem, immP31 src)
5791 %{
5792   match(Set mem (StoreP mem src));
5793 
5794   ins_cost(150); // XXX
5795   format %{ "movq    $mem, $src\t# ptr" %}
5796   opcode(0xC7); /* C7 /0 */
5797   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5798   ins_pipe(ialu_mem_imm);
5799 %}
5800 
5801 // Store Compressed Pointer
5802 instruct storeN(memory mem, rRegN src)
5803 %{
5804   match(Set mem (StoreN mem src));
5805 
5806   ins_cost(125); // XXX
5807   format %{ "movl    $mem, $src\t# compressed ptr" %}
5808   ins_encode %{
5809     __ movl($mem$$Address, $src$$Register);
5810   %}
5811   ins_pipe(ialu_mem_reg);
5812 %}
5813 
5814 instruct storeNKlass(memory mem, rRegN src)
5815 %{
5816   match(Set mem (StoreNKlass mem src));
5817 
5818   ins_cost(125); // XXX
5819   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5820   ins_encode %{
5821     __ movl($mem$$Address, $src$$Register);
5822   %}
5823   ins_pipe(ialu_mem_reg);
5824 %}
5825 
5826 instruct storeImmN0(memory mem, immN0 zero)
5827 %{
5828   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5829   match(Set mem (StoreN mem zero));
5830 
5831   ins_cost(125); // XXX
5832   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5833   ins_encode %{
5834     __ movl($mem$$Address, r12);
5835   %}
5836   ins_pipe(ialu_mem_reg);
5837 %}
5838 
5839 instruct storeImmN(memory mem, immN src)
5840 %{
5841   match(Set mem (StoreN mem src));
5842 
5843   ins_cost(150); // XXX
5844   format %{ "movl    $mem, $src\t# compressed ptr" %}
5845   ins_encode %{
5846     address con = (address)$src$$constant;
5847     if (con == NULL) {
5848       __ movl($mem$$Address, (int32_t)0);
5849     } else {
5850       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5851     }
5852   %}
5853   ins_pipe(ialu_mem_imm);
5854 %}
5855 
5856 instruct storeImmNKlass(memory mem, immNKlass src)
5857 %{
5858   match(Set mem (StoreNKlass mem src));
5859 
5860   ins_cost(150); // XXX
5861   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5862   ins_encode %{
5863     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5864   %}
5865   ins_pipe(ialu_mem_imm);
5866 %}
5867 
5868 // Store Integer Immediate
5869 instruct storeImmI0(memory mem, immI0 zero)
5870 %{
5871   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5872   match(Set mem (StoreI mem zero));
5873 
5874   ins_cost(125); // XXX
5875   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
5876   ins_encode %{
5877     __ movl($mem$$Address, r12);
5878   %}
5879   ins_pipe(ialu_mem_reg);
5880 %}
5881 
5882 instruct storeImmI(memory mem, immI src)
5883 %{
5884   match(Set mem (StoreI mem src));
5885 
5886   ins_cost(150);
5887   format %{ "movl    $mem, $src\t# int" %}
5888   opcode(0xC7); /* C7 /0 */
5889   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5890   ins_pipe(ialu_mem_imm);
5891 %}
5892 
5893 // Store Long Immediate
5894 instruct storeImmL0(memory mem, immL0 zero)
5895 %{
5896   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5897   match(Set mem (StoreL mem zero));
5898 
5899   ins_cost(125); // XXX
5900   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
5901   ins_encode %{
5902     __ movq($mem$$Address, r12);
5903   %}
5904   ins_pipe(ialu_mem_reg);
5905 %}
5906 
5907 instruct storeImmL(memory mem, immL32 src)
5908 %{
5909   match(Set mem (StoreL mem src));
5910 
5911   ins_cost(150);
5912   format %{ "movq    $mem, $src\t# long" %}
5913   opcode(0xC7); /* C7 /0 */
5914   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5915   ins_pipe(ialu_mem_imm);
5916 %}
5917 
5918 // Store Short/Char Immediate
5919 instruct storeImmC0(memory mem, immI0 zero)
5920 %{
5921   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5922   match(Set mem (StoreC mem zero));
5923 
5924   ins_cost(125); // XXX
5925   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
5926   ins_encode %{
5927     __ movw($mem$$Address, r12);
5928   %}
5929   ins_pipe(ialu_mem_reg);
5930 %}
5931 
5932 instruct storeImmI16(memory mem, immI16 src)
5933 %{
5934   predicate(UseStoreImmI16);
5935   match(Set mem (StoreC mem src));
5936 
5937   ins_cost(150);
5938   format %{ "movw    $mem, $src\t# short/char" %}
5939   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
5940   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
5941   ins_pipe(ialu_mem_imm);
5942 %}
5943 
5944 // Store Byte Immediate
5945 instruct storeImmB0(memory mem, immI0 zero)
5946 %{
5947   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5948   match(Set mem (StoreB mem zero));
5949 
5950   ins_cost(125); // XXX
5951   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
5952   ins_encode %{
5953     __ movb($mem$$Address, r12);
5954   %}
5955   ins_pipe(ialu_mem_reg);
5956 %}
5957 
5958 instruct storeImmB(memory mem, immI8 src)
5959 %{
5960   match(Set mem (StoreB mem src));
5961 
5962   ins_cost(150); // XXX
5963   format %{ "movb    $mem, $src\t# byte" %}
5964   opcode(0xC6); /* C6 /0 */
5965   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
5966   ins_pipe(ialu_mem_imm);
5967 %}
5968 
5969 // Store CMS card-mark Immediate
5970 instruct storeImmCM0_reg(memory mem, immI0 zero)
5971 %{
5972   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5973   match(Set mem (StoreCM mem zero));
5974 
5975   ins_cost(125); // XXX
5976   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
5977   ins_encode %{
5978     __ movb($mem$$Address, r12);
5979   %}
5980   ins_pipe(ialu_mem_reg);
5981 %}
5982 
5983 instruct storeImmCM0(memory mem, immI0 src)
5984 %{
5985   match(Set mem (StoreCM mem src));
5986 
5987   ins_cost(150); // XXX
5988   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
5989   opcode(0xC6); /* C6 /0 */
5990   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
5991   ins_pipe(ialu_mem_imm);
5992 %}
5993 
5994 // Store Float
5995 instruct storeF(memory mem, regF src)
5996 %{
5997   match(Set mem (StoreF mem src));
5998 
5999   ins_cost(95); // XXX
6000   format %{ "movss   $mem, $src\t# float" %}
6001   ins_encode %{
6002     __ movflt($mem$$Address, $src$$XMMRegister);
6003   %}
6004   ins_pipe(pipe_slow); // XXX
6005 %}
6006 
6007 // Store immediate Float value (it is faster than store from XMM register)
6008 instruct storeF0(memory mem, immF0 zero)
6009 %{
6010   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6011   match(Set mem (StoreF mem zero));
6012 
6013   ins_cost(25); // XXX
6014   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6015   ins_encode %{
6016     __ movl($mem$$Address, r12);
6017   %}
6018   ins_pipe(ialu_mem_reg);
6019 %}
6020 
6021 instruct storeF_imm(memory mem, immF src)
6022 %{
6023   match(Set mem (StoreF mem src));
6024 
6025   ins_cost(50);
6026   format %{ "movl    $mem, $src\t# float" %}
6027   opcode(0xC7); /* C7 /0 */
6028   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6029   ins_pipe(ialu_mem_imm);
6030 %}
6031 
6032 // Store Double
6033 instruct storeD(memory mem, regD src)
6034 %{
6035   match(Set mem (StoreD mem src));
6036 
6037   ins_cost(95); // XXX
6038   format %{ "movsd   $mem, $src\t# double" %}
6039   ins_encode %{
6040     __ movdbl($mem$$Address, $src$$XMMRegister);
6041   %}
6042   ins_pipe(pipe_slow); // XXX
6043 %}
6044 
6045 // Store immediate double 0.0 (it is faster than store from XMM register)
6046 instruct storeD0_imm(memory mem, immD0 src)
6047 %{
6048   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6049   match(Set mem (StoreD mem src));
6050 
6051   ins_cost(50);
6052   format %{ "movq    $mem, $src\t# double 0." %}
6053   opcode(0xC7); /* C7 /0 */
6054   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6055   ins_pipe(ialu_mem_imm);
6056 %}
6057 
6058 instruct storeD0(memory mem, immD0 zero)
6059 %{
6060   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6061   match(Set mem (StoreD mem zero));
6062 
6063   ins_cost(25); // XXX
6064   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6065   ins_encode %{
6066     __ movq($mem$$Address, r12);
6067   %}
6068   ins_pipe(ialu_mem_reg);
6069 %}
6070 
6071 instruct storeSSI(stackSlotI dst, rRegI src)
6072 %{
6073   match(Set dst src);
6074 
6075   ins_cost(100);
6076   format %{ "movl    $dst, $src\t# int stk" %}
6077   opcode(0x89);
6078   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6079   ins_pipe( ialu_mem_reg );
6080 %}
6081 
6082 instruct storeSSL(stackSlotL dst, rRegL src)
6083 %{
6084   match(Set dst src);
6085 
6086   ins_cost(100);
6087   format %{ "movq    $dst, $src\t# long stk" %}
6088   opcode(0x89);
6089   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6090   ins_pipe(ialu_mem_reg);
6091 %}
6092 
6093 instruct storeSSP(stackSlotP dst, rRegP src)
6094 %{
6095   match(Set dst src);
6096 
6097   ins_cost(100);
6098   format %{ "movq    $dst, $src\t# ptr stk" %}
6099   opcode(0x89);
6100   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6101   ins_pipe(ialu_mem_reg);
6102 %}
6103 
6104 instruct storeSSF(stackSlotF dst, regF src)
6105 %{
6106   match(Set dst src);
6107 
6108   ins_cost(95); // XXX
6109   format %{ "movss   $dst, $src\t# float stk" %}
6110   ins_encode %{
6111     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6112   %}
6113   ins_pipe(pipe_slow); // XXX
6114 %}
6115 
6116 instruct storeSSD(stackSlotD dst, regD src)
6117 %{
6118   match(Set dst src);
6119 
6120   ins_cost(95); // XXX
6121   format %{ "movsd   $dst, $src\t# double stk" %}
6122   ins_encode %{
6123     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6124   %}
6125   ins_pipe(pipe_slow); // XXX
6126 %}
6127 
6128 //----------BSWAP Instructions-------------------------------------------------
6129 instruct bytes_reverse_int(rRegI dst) %{
6130   match(Set dst (ReverseBytesI dst));
6131 
6132   format %{ "bswapl  $dst" %}
6133   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6134   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6135   ins_pipe( ialu_reg );
6136 %}
6137 
6138 instruct bytes_reverse_long(rRegL dst) %{
6139   match(Set dst (ReverseBytesL dst));
6140 
6141   format %{ "bswapq  $dst" %}
6142   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6143   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6144   ins_pipe( ialu_reg);
6145 %}
6146 
6147 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6148   match(Set dst (ReverseBytesUS dst));
6149   effect(KILL cr);
6150 
6151   format %{ "bswapl  $dst\n\t"
6152             "shrl    $dst,16\n\t" %}
6153   ins_encode %{
6154     __ bswapl($dst$$Register);
6155     __ shrl($dst$$Register, 16);
6156   %}
6157   ins_pipe( ialu_reg );
6158 %}
6159 
6160 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6161   match(Set dst (ReverseBytesS dst));
6162   effect(KILL cr);
6163 
6164   format %{ "bswapl  $dst\n\t"
6165             "sar     $dst,16\n\t" %}
6166   ins_encode %{
6167     __ bswapl($dst$$Register);
6168     __ sarl($dst$$Register, 16);
6169   %}
6170   ins_pipe( ialu_reg );
6171 %}
6172 
6173 //---------- Zeros Count Instructions ------------------------------------------
6174 
6175 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6176   predicate(UseCountLeadingZerosInstruction);
6177   match(Set dst (CountLeadingZerosI src));
6178   effect(KILL cr);
6179 
6180   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6181   ins_encode %{
6182     __ lzcntl($dst$$Register, $src$$Register);
6183   %}
6184   ins_pipe(ialu_reg);
6185 %}
6186 
6187 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6188   predicate(!UseCountLeadingZerosInstruction);
6189   match(Set dst (CountLeadingZerosI src));
6190   effect(KILL cr);
6191 
6192   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6193             "jnz     skip\n\t"
6194             "movl    $dst, -1\n"
6195       "skip:\n\t"
6196             "negl    $dst\n\t"
6197             "addl    $dst, 31" %}
6198   ins_encode %{
6199     Register Rdst = $dst$$Register;
6200     Register Rsrc = $src$$Register;
6201     Label skip;
6202     __ bsrl(Rdst, Rsrc);
6203     __ jccb(Assembler::notZero, skip);
6204     __ movl(Rdst, -1);
6205     __ bind(skip);
6206     __ negl(Rdst);
6207     __ addl(Rdst, BitsPerInt - 1);
6208   %}
6209   ins_pipe(ialu_reg);
6210 %}
6211 
6212 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6213   predicate(UseCountLeadingZerosInstruction);
6214   match(Set dst (CountLeadingZerosL src));
6215   effect(KILL cr);
6216 
6217   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6218   ins_encode %{
6219     __ lzcntq($dst$$Register, $src$$Register);
6220   %}
6221   ins_pipe(ialu_reg);
6222 %}
6223 
6224 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6225   predicate(!UseCountLeadingZerosInstruction);
6226   match(Set dst (CountLeadingZerosL src));
6227   effect(KILL cr);
6228 
6229   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6230             "jnz     skip\n\t"
6231             "movl    $dst, -1\n"
6232       "skip:\n\t"
6233             "negl    $dst\n\t"
6234             "addl    $dst, 63" %}
6235   ins_encode %{
6236     Register Rdst = $dst$$Register;
6237     Register Rsrc = $src$$Register;
6238     Label skip;
6239     __ bsrq(Rdst, Rsrc);
6240     __ jccb(Assembler::notZero, skip);
6241     __ movl(Rdst, -1);
6242     __ bind(skip);
6243     __ negl(Rdst);
6244     __ addl(Rdst, BitsPerLong - 1);
6245   %}
6246   ins_pipe(ialu_reg);
6247 %}
6248 
6249 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6250   match(Set dst (CountTrailingZerosI src));
6251   effect(KILL cr);
6252 
6253   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6254             "jnz     done\n\t"
6255             "movl    $dst, 32\n"
6256       "done:" %}
6257   ins_encode %{
6258     Register Rdst = $dst$$Register;
6259     Label done;
6260     __ bsfl(Rdst, $src$$Register);
6261     __ jccb(Assembler::notZero, done);
6262     __ movl(Rdst, BitsPerInt);
6263     __ bind(done);
6264   %}
6265   ins_pipe(ialu_reg);
6266 %}
6267 
6268 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6269   match(Set dst (CountTrailingZerosL src));
6270   effect(KILL cr);
6271 
6272   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6273             "jnz     done\n\t"
6274             "movl    $dst, 64\n"
6275       "done:" %}
6276   ins_encode %{
6277     Register Rdst = $dst$$Register;
6278     Label done;
6279     __ bsfq(Rdst, $src$$Register);
6280     __ jccb(Assembler::notZero, done);
6281     __ movl(Rdst, BitsPerLong);
6282     __ bind(done);
6283   %}
6284   ins_pipe(ialu_reg);
6285 %}
6286 
6287 
6288 //---------- Population Count Instructions -------------------------------------
6289 
6290 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6291   predicate(UsePopCountInstruction);
6292   match(Set dst (PopCountI src));
6293   effect(KILL cr);
6294 
6295   format %{ "popcnt  $dst, $src" %}
6296   ins_encode %{
6297     __ popcntl($dst$$Register, $src$$Register);
6298   %}
6299   ins_pipe(ialu_reg);
6300 %}
6301 
6302 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6303   predicate(UsePopCountInstruction);
6304   match(Set dst (PopCountI (LoadI mem)));
6305   effect(KILL cr);
6306 
6307   format %{ "popcnt  $dst, $mem" %}
6308   ins_encode %{
6309     __ popcntl($dst$$Register, $mem$$Address);
6310   %}
6311   ins_pipe(ialu_reg);
6312 %}
6313 
6314 // Note: Long.bitCount(long) returns an int.
6315 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6316   predicate(UsePopCountInstruction);
6317   match(Set dst (PopCountL src));
6318   effect(KILL cr);
6319 
6320   format %{ "popcnt  $dst, $src" %}
6321   ins_encode %{
6322     __ popcntq($dst$$Register, $src$$Register);
6323   %}
6324   ins_pipe(ialu_reg);
6325 %}
6326 
6327 // Note: Long.bitCount(long) returns an int.
6328 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6329   predicate(UsePopCountInstruction);
6330   match(Set dst (PopCountL (LoadL mem)));
6331   effect(KILL cr);
6332 
6333   format %{ "popcnt  $dst, $mem" %}
6334   ins_encode %{
6335     __ popcntq($dst$$Register, $mem$$Address);
6336   %}
6337   ins_pipe(ialu_reg);
6338 %}
6339 
6340 
6341 //----------MemBar Instructions-----------------------------------------------
6342 // Memory barrier flavors
6343 
6344 instruct membar_acquire()
6345 %{
6346   match(MemBarAcquire);
6347   match(LoadFence);
6348   ins_cost(0);
6349 
6350   size(0);
6351   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6352   ins_encode();
6353   ins_pipe(empty);
6354 %}
6355 
6356 instruct membar_acquire_lock()
6357 %{
6358   match(MemBarAcquireLock);
6359   ins_cost(0);
6360 
6361   size(0);
6362   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6363   ins_encode();
6364   ins_pipe(empty);
6365 %}
6366 
6367 instruct membar_release()
6368 %{
6369   match(MemBarRelease);
6370   match(StoreFence);
6371   ins_cost(0);
6372 
6373   size(0);
6374   format %{ "MEMBAR-release ! (empty encoding)" %}
6375   ins_encode();
6376   ins_pipe(empty);
6377 %}
6378 
6379 instruct membar_release_lock()
6380 %{
6381   match(MemBarReleaseLock);
6382   ins_cost(0);
6383 
6384   size(0);
6385   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6386   ins_encode();
6387   ins_pipe(empty);
6388 %}
6389 
6390 instruct membar_volatile(rFlagsReg cr) %{
6391   match(MemBarVolatile);
6392   effect(KILL cr);
6393   ins_cost(400);
6394 
6395   format %{
6396     $$template
6397     if (os::is_MP()) {
6398       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6399     } else {
6400       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6401     }
6402   %}
6403   ins_encode %{
6404     __ membar(Assembler::StoreLoad);
6405   %}
6406   ins_pipe(pipe_slow);
6407 %}
6408 
6409 instruct unnecessary_membar_volatile()
6410 %{
6411   match(MemBarVolatile);
6412   predicate(Matcher::post_store_load_barrier(n));
6413   ins_cost(0);
6414 
6415   size(0);
6416   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6417   ins_encode();
6418   ins_pipe(empty);
6419 %}
6420 
6421 instruct membar_storestore() %{
6422   match(MemBarStoreStore);
6423   ins_cost(0);
6424 
6425   size(0);
6426   format %{ "MEMBAR-storestore (empty encoding)" %}
6427   ins_encode( );
6428   ins_pipe(empty);
6429 %}
6430 
6431 //----------Move Instructions--------------------------------------------------
6432 
6433 instruct castX2P(rRegP dst, rRegL src)
6434 %{
6435   match(Set dst (CastX2P src));
6436 
6437   format %{ "movq    $dst, $src\t# long->ptr" %}
6438   ins_encode %{
6439     if ($dst$$reg != $src$$reg) {
6440       __ movptr($dst$$Register, $src$$Register);
6441     }
6442   %}
6443   ins_pipe(ialu_reg_reg); // XXX
6444 %}
6445 
6446 instruct castP2X(rRegL dst, rRegP src)
6447 %{
6448   match(Set dst (CastP2X src));
6449 
6450   format %{ "movq    $dst, $src\t# ptr -> long" %}
6451   ins_encode %{
6452     if ($dst$$reg != $src$$reg) {
6453       __ movptr($dst$$Register, $src$$Register);
6454     }
6455   %}
6456   ins_pipe(ialu_reg_reg); // XXX
6457 %}
6458 
6459 // Convert oop into int for vectors alignment masking
6460 instruct convP2I(rRegI dst, rRegP src)
6461 %{
6462   match(Set dst (ConvL2I (CastP2X src)));
6463 
6464   format %{ "movl    $dst, $src\t# ptr -> int" %}
6465   ins_encode %{
6466     __ movl($dst$$Register, $src$$Register);
6467   %}
6468   ins_pipe(ialu_reg_reg); // XXX
6469 %}
6470 
6471 // Convert compressed oop into int for vectors alignment masking
6472 // in case of 32bit oops (heap < 4Gb).
6473 instruct convN2I(rRegI dst, rRegN src)
6474 %{
6475   predicate(Universe::narrow_oop_shift() == 0);
6476   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6477 
6478   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6479   ins_encode %{
6480     __ movl($dst$$Register, $src$$Register);
6481   %}
6482   ins_pipe(ialu_reg_reg); // XXX
6483 %}
6484 
6485 // Convert oop pointer into compressed form
6486 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6487   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6488   match(Set dst (EncodeP src));
6489   effect(KILL cr);
6490   format %{ "encode_heap_oop $dst,$src" %}
6491   ins_encode %{
6492     Register s = $src$$Register;
6493     Register d = $dst$$Register;
6494     if (s != d) {
6495       __ movq(d, s);
6496     }
6497     __ encode_heap_oop(d);
6498   %}
6499   ins_pipe(ialu_reg_long);
6500 %}
6501 
6502 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6503   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6504   match(Set dst (EncodeP src));
6505   effect(KILL cr);
6506   format %{ "encode_heap_oop_not_null $dst,$src" %}
6507   ins_encode %{
6508     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6509   %}
6510   ins_pipe(ialu_reg_long);
6511 %}
6512 
6513 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6514   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6515             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6516   match(Set dst (DecodeN src));
6517   effect(KILL cr);
6518   format %{ "decode_heap_oop $dst,$src" %}
6519   ins_encode %{
6520     Register s = $src$$Register;
6521     Register d = $dst$$Register;
6522     if (s != d) {
6523       __ movq(d, s);
6524     }
6525     __ decode_heap_oop(d);
6526   %}
6527   ins_pipe(ialu_reg_long);
6528 %}
6529 
6530 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6531   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6532             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6533   match(Set dst (DecodeN src));
6534   effect(KILL cr);
6535   format %{ "decode_heap_oop_not_null $dst,$src" %}
6536   ins_encode %{
6537     Register s = $src$$Register;
6538     Register d = $dst$$Register;
6539     if (s != d) {
6540       __ decode_heap_oop_not_null(d, s);
6541     } else {
6542       __ decode_heap_oop_not_null(d);
6543     }
6544   %}
6545   ins_pipe(ialu_reg_long);
6546 %}
6547 
6548 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6549   match(Set dst (EncodePKlass src));
6550   effect(KILL cr);
6551   format %{ "encode_klass_not_null $dst,$src" %}
6552   ins_encode %{
6553     __ encode_klass_not_null($dst$$Register, $src$$Register);
6554   %}
6555   ins_pipe(ialu_reg_long);
6556 %}
6557 
6558 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6559   match(Set dst (DecodeNKlass src));
6560   effect(KILL cr);
6561   format %{ "decode_klass_not_null $dst,$src" %}
6562   ins_encode %{
6563     Register s = $src$$Register;
6564     Register d = $dst$$Register;
6565     if (s != d) {
6566       __ decode_klass_not_null(d, s);
6567     } else {
6568       __ decode_klass_not_null(d);
6569     }
6570   %}
6571   ins_pipe(ialu_reg_long);
6572 %}
6573 
6574 
6575 //----------Conditional Move---------------------------------------------------
6576 // Jump
6577 // dummy instruction for generating temp registers
6578 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6579   match(Jump (LShiftL switch_val shift));
6580   ins_cost(350);
6581   predicate(false);
6582   effect(TEMP dest);
6583 
6584   format %{ "leaq    $dest, [$constantaddress]\n\t"
6585             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6586   ins_encode %{
6587     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6588     // to do that and the compiler is using that register as one it can allocate.
6589     // So we build it all by hand.
6590     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6591     // ArrayAddress dispatch(table, index);
6592     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6593     __ lea($dest$$Register, $constantaddress);
6594     __ jmp(dispatch);
6595   %}
6596   ins_pipe(pipe_jmp);
6597 %}
6598 
6599 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6600   match(Jump (AddL (LShiftL switch_val shift) offset));
6601   ins_cost(350);
6602   effect(TEMP dest);
6603 
6604   format %{ "leaq    $dest, [$constantaddress]\n\t"
6605             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6606   ins_encode %{
6607     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6608     // to do that and the compiler is using that register as one it can allocate.
6609     // So we build it all by hand.
6610     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6611     // ArrayAddress dispatch(table, index);
6612     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6613     __ lea($dest$$Register, $constantaddress);
6614     __ jmp(dispatch);
6615   %}
6616   ins_pipe(pipe_jmp);
6617 %}
6618 
6619 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6620   match(Jump switch_val);
6621   ins_cost(350);
6622   effect(TEMP dest);
6623 
6624   format %{ "leaq    $dest, [$constantaddress]\n\t"
6625             "jmp     [$dest + $switch_val]\n\t" %}
6626   ins_encode %{
6627     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6628     // to do that and the compiler is using that register as one it can allocate.
6629     // So we build it all by hand.
6630     // Address index(noreg, switch_reg, Address::times_1);
6631     // ArrayAddress dispatch(table, index);
6632     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6633     __ lea($dest$$Register, $constantaddress);
6634     __ jmp(dispatch);
6635   %}
6636   ins_pipe(pipe_jmp);
6637 %}
6638 
6639 // Conditional move
6640 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6641 %{
6642   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6643 
6644   ins_cost(200); // XXX
6645   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6646   opcode(0x0F, 0x40);
6647   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6648   ins_pipe(pipe_cmov_reg);
6649 %}
6650 
6651 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6652   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6653 
6654   ins_cost(200); // XXX
6655   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6656   opcode(0x0F, 0x40);
6657   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6658   ins_pipe(pipe_cmov_reg);
6659 %}
6660 
6661 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6662   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6663   ins_cost(200);
6664   expand %{
6665     cmovI_regU(cop, cr, dst, src);
6666   %}
6667 %}
6668 
6669 // Conditional move
6670 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6671   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6672 
6673   ins_cost(250); // XXX
6674   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6675   opcode(0x0F, 0x40);
6676   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6677   ins_pipe(pipe_cmov_mem);
6678 %}
6679 
6680 // Conditional move
6681 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6682 %{
6683   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6684 
6685   ins_cost(250); // XXX
6686   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6687   opcode(0x0F, 0x40);
6688   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6689   ins_pipe(pipe_cmov_mem);
6690 %}
6691 
6692 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6693   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6694   ins_cost(250);
6695   expand %{
6696     cmovI_memU(cop, cr, dst, src);
6697   %}
6698 %}
6699 
6700 // Conditional move
6701 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6702 %{
6703   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6704 
6705   ins_cost(200); // XXX
6706   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6707   opcode(0x0F, 0x40);
6708   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6709   ins_pipe(pipe_cmov_reg);
6710 %}
6711 
6712 // Conditional move
6713 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6714 %{
6715   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6716 
6717   ins_cost(200); // XXX
6718   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6719   opcode(0x0F, 0x40);
6720   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6721   ins_pipe(pipe_cmov_reg);
6722 %}
6723 
6724 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6725   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6726   ins_cost(200);
6727   expand %{
6728     cmovN_regU(cop, cr, dst, src);
6729   %}
6730 %}
6731 
6732 // Conditional move
6733 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6734 %{
6735   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6736 
6737   ins_cost(200); // XXX
6738   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6739   opcode(0x0F, 0x40);
6740   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6741   ins_pipe(pipe_cmov_reg);  // XXX
6742 %}
6743 
6744 // Conditional move
6745 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6746 %{
6747   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6748 
6749   ins_cost(200); // XXX
6750   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6751   opcode(0x0F, 0x40);
6752   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6753   ins_pipe(pipe_cmov_reg); // XXX
6754 %}
6755 
6756 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6757   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6758   ins_cost(200);
6759   expand %{
6760     cmovP_regU(cop, cr, dst, src);
6761   %}
6762 %}
6763 
6764 // DISABLED: Requires the ADLC to emit a bottom_type call that
6765 // correctly meets the two pointer arguments; one is an incoming
6766 // register but the other is a memory operand.  ALSO appears to
6767 // be buggy with implicit null checks.
6768 //
6769 //// Conditional move
6770 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6771 //%{
6772 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6773 //  ins_cost(250);
6774 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6775 //  opcode(0x0F,0x40);
6776 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6777 //  ins_pipe( pipe_cmov_mem );
6778 //%}
6779 //
6780 //// Conditional move
6781 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6782 //%{
6783 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6784 //  ins_cost(250);
6785 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6786 //  opcode(0x0F,0x40);
6787 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6788 //  ins_pipe( pipe_cmov_mem );
6789 //%}
6790 
6791 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6792 %{
6793   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6794 
6795   ins_cost(200); // XXX
6796   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6797   opcode(0x0F, 0x40);
6798   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6799   ins_pipe(pipe_cmov_reg);  // XXX
6800 %}
6801 
6802 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6803 %{
6804   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6805 
6806   ins_cost(200); // XXX
6807   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6808   opcode(0x0F, 0x40);
6809   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6810   ins_pipe(pipe_cmov_mem);  // XXX
6811 %}
6812 
6813 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6814 %{
6815   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6816 
6817   ins_cost(200); // XXX
6818   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6819   opcode(0x0F, 0x40);
6820   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6821   ins_pipe(pipe_cmov_reg); // XXX
6822 %}
6823 
6824 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6825   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6826   ins_cost(200);
6827   expand %{
6828     cmovL_regU(cop, cr, dst, src);
6829   %}
6830 %}
6831 
6832 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6833 %{
6834   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6835 
6836   ins_cost(200); // XXX
6837   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6838   opcode(0x0F, 0x40);
6839   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6840   ins_pipe(pipe_cmov_mem); // XXX
6841 %}
6842 
6843 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6844   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6845   ins_cost(200);
6846   expand %{
6847     cmovL_memU(cop, cr, dst, src);
6848   %}
6849 %}
6850 
6851 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6852 %{
6853   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6854 
6855   ins_cost(200); // XXX
6856   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6857             "movss     $dst, $src\n"
6858     "skip:" %}
6859   ins_encode %{
6860     Label Lskip;
6861     // Invert sense of branch from sense of CMOV
6862     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6863     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6864     __ bind(Lskip);
6865   %}
6866   ins_pipe(pipe_slow);
6867 %}
6868 
6869 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
6870 // %{
6871 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
6872 
6873 //   ins_cost(200); // XXX
6874 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6875 //             "movss     $dst, $src\n"
6876 //     "skip:" %}
6877 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
6878 //   ins_pipe(pipe_slow);
6879 // %}
6880 
6881 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
6882 %{
6883   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6884 
6885   ins_cost(200); // XXX
6886   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
6887             "movss     $dst, $src\n"
6888     "skip:" %}
6889   ins_encode %{
6890     Label Lskip;
6891     // Invert sense of branch from sense of CMOV
6892     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6893     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6894     __ bind(Lskip);
6895   %}
6896   ins_pipe(pipe_slow);
6897 %}
6898 
6899 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
6900   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6901   ins_cost(200);
6902   expand %{
6903     cmovF_regU(cop, cr, dst, src);
6904   %}
6905 %}
6906 
6907 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
6908 %{
6909   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6910 
6911   ins_cost(200); // XXX
6912   format %{ "jn$cop    skip\t# signed cmove double\n\t"
6913             "movsd     $dst, $src\n"
6914     "skip:" %}
6915   ins_encode %{
6916     Label Lskip;
6917     // Invert sense of branch from sense of CMOV
6918     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6919     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6920     __ bind(Lskip);
6921   %}
6922   ins_pipe(pipe_slow);
6923 %}
6924 
6925 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
6926 %{
6927   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6928 
6929   ins_cost(200); // XXX
6930   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
6931             "movsd     $dst, $src\n"
6932     "skip:" %}
6933   ins_encode %{
6934     Label Lskip;
6935     // Invert sense of branch from sense of CMOV
6936     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6937     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6938     __ bind(Lskip);
6939   %}
6940   ins_pipe(pipe_slow);
6941 %}
6942 
6943 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
6944   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6945   ins_cost(200);
6946   expand %{
6947     cmovD_regU(cop, cr, dst, src);
6948   %}
6949 %}
6950 
6951 //----------Arithmetic Instructions--------------------------------------------
6952 //----------Addition Instructions----------------------------------------------
6953 
6954 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
6955 %{
6956   match(Set dst (AddI dst src));
6957   effect(KILL cr);
6958 
6959   format %{ "addl    $dst, $src\t# int" %}
6960   opcode(0x03);
6961   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
6962   ins_pipe(ialu_reg_reg);
6963 %}
6964 
6965 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
6966 %{
6967   match(Set dst (AddI dst src));
6968   effect(KILL cr);
6969 
6970   format %{ "addl    $dst, $src\t# int" %}
6971   opcode(0x81, 0x00); /* /0 id */
6972   ins_encode(OpcSErm(dst, src), Con8or32(src));
6973   ins_pipe( ialu_reg );
6974 %}
6975 
6976 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
6977 %{
6978   match(Set dst (AddI dst (LoadI src)));
6979   effect(KILL cr);
6980 
6981   ins_cost(125); // XXX
6982   format %{ "addl    $dst, $src\t# int" %}
6983   opcode(0x03);
6984   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6985   ins_pipe(ialu_reg_mem);
6986 %}
6987 
6988 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
6989 %{
6990   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
6991   effect(KILL cr);
6992 
6993   ins_cost(150); // XXX
6994   format %{ "addl    $dst, $src\t# int" %}
6995   opcode(0x01); /* Opcode 01 /r */
6996   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6997   ins_pipe(ialu_mem_reg);
6998 %}
6999 
7000 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7001 %{
7002   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7003   effect(KILL cr);
7004 
7005   ins_cost(125); // XXX
7006   format %{ "addl    $dst, $src\t# int" %}
7007   opcode(0x81); /* Opcode 81 /0 id */
7008   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7009   ins_pipe(ialu_mem_imm);
7010 %}
7011 
7012 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7013 %{
7014   predicate(UseIncDec);
7015   match(Set dst (AddI dst src));
7016   effect(KILL cr);
7017 
7018   format %{ "incl    $dst\t# int" %}
7019   opcode(0xFF, 0x00); // FF /0
7020   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7021   ins_pipe(ialu_reg);
7022 %}
7023 
7024 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7025 %{
7026   predicate(UseIncDec);
7027   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7028   effect(KILL cr);
7029 
7030   ins_cost(125); // XXX
7031   format %{ "incl    $dst\t# int" %}
7032   opcode(0xFF); /* Opcode FF /0 */
7033   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7034   ins_pipe(ialu_mem_imm);
7035 %}
7036 
7037 // XXX why does that use AddI
7038 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7039 %{
7040   predicate(UseIncDec);
7041   match(Set dst (AddI dst src));
7042   effect(KILL cr);
7043 
7044   format %{ "decl    $dst\t# int" %}
7045   opcode(0xFF, 0x01); // FF /1
7046   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7047   ins_pipe(ialu_reg);
7048 %}
7049 
7050 // XXX why does that use AddI
7051 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7052 %{
7053   predicate(UseIncDec);
7054   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7055   effect(KILL cr);
7056 
7057   ins_cost(125); // XXX
7058   format %{ "decl    $dst\t# int" %}
7059   opcode(0xFF); /* Opcode FF /1 */
7060   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7061   ins_pipe(ialu_mem_imm);
7062 %}
7063 
7064 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7065 %{
7066   match(Set dst (AddI src0 src1));
7067 
7068   ins_cost(110);
7069   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7070   opcode(0x8D); /* 0x8D /r */
7071   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7072   ins_pipe(ialu_reg_reg);
7073 %}
7074 
7075 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7076 %{
7077   match(Set dst (AddL dst src));
7078   effect(KILL cr);
7079 
7080   format %{ "addq    $dst, $src\t# long" %}
7081   opcode(0x03);
7082   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7083   ins_pipe(ialu_reg_reg);
7084 %}
7085 
7086 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7087 %{
7088   match(Set dst (AddL dst src));
7089   effect(KILL cr);
7090 
7091   format %{ "addq    $dst, $src\t# long" %}
7092   opcode(0x81, 0x00); /* /0 id */
7093   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7094   ins_pipe( ialu_reg );
7095 %}
7096 
7097 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7098 %{
7099   match(Set dst (AddL dst (LoadL src)));
7100   effect(KILL cr);
7101 
7102   ins_cost(125); // XXX
7103   format %{ "addq    $dst, $src\t# long" %}
7104   opcode(0x03);
7105   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7106   ins_pipe(ialu_reg_mem);
7107 %}
7108 
7109 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7110 %{
7111   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7112   effect(KILL cr);
7113 
7114   ins_cost(150); // XXX
7115   format %{ "addq    $dst, $src\t# long" %}
7116   opcode(0x01); /* Opcode 01 /r */
7117   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7118   ins_pipe(ialu_mem_reg);
7119 %}
7120 
7121 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7122 %{
7123   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7124   effect(KILL cr);
7125 
7126   ins_cost(125); // XXX
7127   format %{ "addq    $dst, $src\t# long" %}
7128   opcode(0x81); /* Opcode 81 /0 id */
7129   ins_encode(REX_mem_wide(dst),
7130              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7131   ins_pipe(ialu_mem_imm);
7132 %}
7133 
7134 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7135 %{
7136   predicate(UseIncDec);
7137   match(Set dst (AddL dst src));
7138   effect(KILL cr);
7139 
7140   format %{ "incq    $dst\t# long" %}
7141   opcode(0xFF, 0x00); // FF /0
7142   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7143   ins_pipe(ialu_reg);
7144 %}
7145 
7146 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7147 %{
7148   predicate(UseIncDec);
7149   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7150   effect(KILL cr);
7151 
7152   ins_cost(125); // XXX
7153   format %{ "incq    $dst\t# long" %}
7154   opcode(0xFF); /* Opcode FF /0 */
7155   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7156   ins_pipe(ialu_mem_imm);
7157 %}
7158 
7159 // XXX why does that use AddL
7160 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7161 %{
7162   predicate(UseIncDec);
7163   match(Set dst (AddL dst src));
7164   effect(KILL cr);
7165 
7166   format %{ "decq    $dst\t# long" %}
7167   opcode(0xFF, 0x01); // FF /1
7168   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7169   ins_pipe(ialu_reg);
7170 %}
7171 
7172 // XXX why does that use AddL
7173 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7174 %{
7175   predicate(UseIncDec);
7176   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7177   effect(KILL cr);
7178 
7179   ins_cost(125); // XXX
7180   format %{ "decq    $dst\t# long" %}
7181   opcode(0xFF); /* Opcode FF /1 */
7182   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7183   ins_pipe(ialu_mem_imm);
7184 %}
7185 
7186 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7187 %{
7188   match(Set dst (AddL src0 src1));
7189 
7190   ins_cost(110);
7191   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7192   opcode(0x8D); /* 0x8D /r */
7193   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7194   ins_pipe(ialu_reg_reg);
7195 %}
7196 
7197 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7198 %{
7199   match(Set dst (AddP dst src));
7200   effect(KILL cr);
7201 
7202   format %{ "addq    $dst, $src\t# ptr" %}
7203   opcode(0x03);
7204   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7205   ins_pipe(ialu_reg_reg);
7206 %}
7207 
7208 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7209 %{
7210   match(Set dst (AddP dst src));
7211   effect(KILL cr);
7212 
7213   format %{ "addq    $dst, $src\t# ptr" %}
7214   opcode(0x81, 0x00); /* /0 id */
7215   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7216   ins_pipe( ialu_reg );
7217 %}
7218 
7219 // XXX addP mem ops ????
7220 
7221 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7222 %{
7223   match(Set dst (AddP src0 src1));
7224 
7225   ins_cost(110);
7226   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7227   opcode(0x8D); /* 0x8D /r */
7228   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7229   ins_pipe(ialu_reg_reg);
7230 %}
7231 
7232 instruct checkCastPP(rRegP dst)
7233 %{
7234   match(Set dst (CheckCastPP dst));
7235 
7236   size(0);
7237   format %{ "# checkcastPP of $dst" %}
7238   ins_encode(/* empty encoding */);
7239   ins_pipe(empty);
7240 %}
7241 
7242 instruct castPP(rRegP dst)
7243 %{
7244   match(Set dst (CastPP dst));
7245 
7246   size(0);
7247   format %{ "# castPP of $dst" %}
7248   ins_encode(/* empty encoding */);
7249   ins_pipe(empty);
7250 %}
7251 
7252 instruct castII(rRegI dst)
7253 %{
7254   match(Set dst (CastII dst));
7255 
7256   size(0);
7257   format %{ "# castII of $dst" %}
7258   ins_encode(/* empty encoding */);
7259   ins_cost(0);
7260   ins_pipe(empty);
7261 %}
7262 
7263 // LoadP-locked same as a regular LoadP when used with compare-swap
7264 instruct loadPLocked(rRegP dst, memory mem)
7265 %{
7266   match(Set dst (LoadPLocked mem));
7267 
7268   ins_cost(125); // XXX
7269   format %{ "movq    $dst, $mem\t# ptr locked" %}
7270   opcode(0x8B);
7271   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7272   ins_pipe(ialu_reg_mem); // XXX
7273 %}
7274 
7275 // Conditional-store of the updated heap-top.
7276 // Used during allocation of the shared heap.
7277 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7278 
7279 instruct storePConditional(memory heap_top_ptr,
7280                            rax_RegP oldval, rRegP newval,
7281                            rFlagsReg cr)
7282 %{
7283   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7284 
7285   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7286             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7287   opcode(0x0F, 0xB1);
7288   ins_encode(lock_prefix,
7289              REX_reg_mem_wide(newval, heap_top_ptr),
7290              OpcP, OpcS,
7291              reg_mem(newval, heap_top_ptr));
7292   ins_pipe(pipe_cmpxchg);
7293 %}
7294 
7295 // Conditional-store of an int value.
7296 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7297 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7298 %{
7299   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7300   effect(KILL oldval);
7301 
7302   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7303   opcode(0x0F, 0xB1);
7304   ins_encode(lock_prefix,
7305              REX_reg_mem(newval, mem),
7306              OpcP, OpcS,
7307              reg_mem(newval, mem));
7308   ins_pipe(pipe_cmpxchg);
7309 %}
7310 
7311 // Conditional-store of a long value.
7312 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7313 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7314 %{
7315   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7316   effect(KILL oldval);
7317 
7318   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7319   opcode(0x0F, 0xB1);
7320   ins_encode(lock_prefix,
7321              REX_reg_mem_wide(newval, mem),
7322              OpcP, OpcS,
7323              reg_mem(newval, mem));
7324   ins_pipe(pipe_cmpxchg);
7325 %}
7326 
7327 
7328 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7329 instruct compareAndSwapP(rRegI res,
7330                          memory mem_ptr,
7331                          rax_RegP oldval, rRegP newval,
7332                          rFlagsReg cr)
7333 %{
7334   predicate(VM_Version::supports_cx8());
7335   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7336   effect(KILL cr, KILL oldval);
7337 
7338   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7339             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7340             "sete    $res\n\t"
7341             "movzbl  $res, $res" %}
7342   opcode(0x0F, 0xB1);
7343   ins_encode(lock_prefix,
7344              REX_reg_mem_wide(newval, mem_ptr),
7345              OpcP, OpcS,
7346              reg_mem(newval, mem_ptr),
7347              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7348              REX_reg_breg(res, res), // movzbl
7349              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7350   ins_pipe( pipe_cmpxchg );
7351 %}
7352 
7353 instruct compareAndSwapL(rRegI res,
7354                          memory mem_ptr,
7355                          rax_RegL oldval, rRegL newval,
7356                          rFlagsReg cr)
7357 %{
7358   predicate(VM_Version::supports_cx8());
7359   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7360   effect(KILL cr, KILL oldval);
7361 
7362   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7363             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7364             "sete    $res\n\t"
7365             "movzbl  $res, $res" %}
7366   opcode(0x0F, 0xB1);
7367   ins_encode(lock_prefix,
7368              REX_reg_mem_wide(newval, mem_ptr),
7369              OpcP, OpcS,
7370              reg_mem(newval, mem_ptr),
7371              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7372              REX_reg_breg(res, res), // movzbl
7373              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7374   ins_pipe( pipe_cmpxchg );
7375 %}
7376 
7377 instruct compareAndSwapI(rRegI res,
7378                          memory mem_ptr,
7379                          rax_RegI oldval, rRegI newval,
7380                          rFlagsReg cr)
7381 %{
7382   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7383   effect(KILL cr, KILL oldval);
7384 
7385   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7386             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7387             "sete    $res\n\t"
7388             "movzbl  $res, $res" %}
7389   opcode(0x0F, 0xB1);
7390   ins_encode(lock_prefix,
7391              REX_reg_mem(newval, mem_ptr),
7392              OpcP, OpcS,
7393              reg_mem(newval, mem_ptr),
7394              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7395              REX_reg_breg(res, res), // movzbl
7396              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7397   ins_pipe( pipe_cmpxchg );
7398 %}
7399 
7400 
7401 instruct compareAndSwapN(rRegI res,
7402                           memory mem_ptr,
7403                           rax_RegN oldval, rRegN newval,
7404                           rFlagsReg cr) %{
7405   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7406   effect(KILL cr, KILL oldval);
7407 
7408   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7409             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7410             "sete    $res\n\t"
7411             "movzbl  $res, $res" %}
7412   opcode(0x0F, 0xB1);
7413   ins_encode(lock_prefix,
7414              REX_reg_mem(newval, mem_ptr),
7415              OpcP, OpcS,
7416              reg_mem(newval, mem_ptr),
7417              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7418              REX_reg_breg(res, res), // movzbl
7419              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7420   ins_pipe( pipe_cmpxchg );
7421 %}
7422 
7423 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7424   predicate(n->as_LoadStore()->result_not_used());
7425   match(Set dummy (GetAndAddI mem add));
7426   effect(KILL cr);
7427   format %{ "ADDL  [$mem],$add" %}
7428   ins_encode %{
7429     if (os::is_MP()) { __ lock(); }
7430     __ addl($mem$$Address, $add$$constant);
7431   %}
7432   ins_pipe( pipe_cmpxchg );
7433 %}
7434 
7435 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7436   match(Set newval (GetAndAddI mem newval));
7437   effect(KILL cr);
7438   format %{ "XADDL  [$mem],$newval" %}
7439   ins_encode %{
7440     if (os::is_MP()) { __ lock(); }
7441     __ xaddl($mem$$Address, $newval$$Register);
7442   %}
7443   ins_pipe( pipe_cmpxchg );
7444 %}
7445 
7446 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7447   predicate(n->as_LoadStore()->result_not_used());
7448   match(Set dummy (GetAndAddL mem add));
7449   effect(KILL cr);
7450   format %{ "ADDQ  [$mem],$add" %}
7451   ins_encode %{
7452     if (os::is_MP()) { __ lock(); }
7453     __ addq($mem$$Address, $add$$constant);
7454   %}
7455   ins_pipe( pipe_cmpxchg );
7456 %}
7457 
7458 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7459   match(Set newval (GetAndAddL mem newval));
7460   effect(KILL cr);
7461   format %{ "XADDQ  [$mem],$newval" %}
7462   ins_encode %{
7463     if (os::is_MP()) { __ lock(); }
7464     __ xaddq($mem$$Address, $newval$$Register);
7465   %}
7466   ins_pipe( pipe_cmpxchg );
7467 %}
7468 
7469 instruct xchgI( memory mem, rRegI newval) %{
7470   match(Set newval (GetAndSetI mem newval));
7471   format %{ "XCHGL  $newval,[$mem]" %}
7472   ins_encode %{
7473     __ xchgl($newval$$Register, $mem$$Address);
7474   %}
7475   ins_pipe( pipe_cmpxchg );
7476 %}
7477 
7478 instruct xchgL( memory mem, rRegL newval) %{
7479   match(Set newval (GetAndSetL mem newval));
7480   format %{ "XCHGL  $newval,[$mem]" %}
7481   ins_encode %{
7482     __ xchgq($newval$$Register, $mem$$Address);
7483   %}
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct xchgP( memory mem, rRegP newval) %{
7488   match(Set newval (GetAndSetP mem newval));
7489   format %{ "XCHGQ  $newval,[$mem]" %}
7490   ins_encode %{
7491     __ xchgq($newval$$Register, $mem$$Address);
7492   %}
7493   ins_pipe( pipe_cmpxchg );
7494 %}
7495 
7496 instruct xchgN( memory mem, rRegN newval) %{
7497   match(Set newval (GetAndSetN mem newval));
7498   format %{ "XCHGL  $newval,$mem]" %}
7499   ins_encode %{
7500     __ xchgl($newval$$Register, $mem$$Address);
7501   %}
7502   ins_pipe( pipe_cmpxchg );
7503 %}
7504 
7505 //----------Subtraction Instructions-------------------------------------------
7506 
7507 // Integer Subtraction Instructions
7508 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7509 %{
7510   match(Set dst (SubI dst src));
7511   effect(KILL cr);
7512 
7513   format %{ "subl    $dst, $src\t# int" %}
7514   opcode(0x2B);
7515   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7516   ins_pipe(ialu_reg_reg);
7517 %}
7518 
7519 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7520 %{
7521   match(Set dst (SubI dst src));
7522   effect(KILL cr);
7523 
7524   format %{ "subl    $dst, $src\t# int" %}
7525   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7526   ins_encode(OpcSErm(dst, src), Con8or32(src));
7527   ins_pipe(ialu_reg);
7528 %}
7529 
7530 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7531 %{
7532   match(Set dst (SubI dst (LoadI src)));
7533   effect(KILL cr);
7534 
7535   ins_cost(125);
7536   format %{ "subl    $dst, $src\t# int" %}
7537   opcode(0x2B);
7538   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7539   ins_pipe(ialu_reg_mem);
7540 %}
7541 
7542 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7543 %{
7544   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7545   effect(KILL cr);
7546 
7547   ins_cost(150);
7548   format %{ "subl    $dst, $src\t# int" %}
7549   opcode(0x29); /* Opcode 29 /r */
7550   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7551   ins_pipe(ialu_mem_reg);
7552 %}
7553 
7554 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7555 %{
7556   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7557   effect(KILL cr);
7558 
7559   ins_cost(125); // XXX
7560   format %{ "subl    $dst, $src\t# int" %}
7561   opcode(0x81); /* Opcode 81 /5 id */
7562   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7563   ins_pipe(ialu_mem_imm);
7564 %}
7565 
7566 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7567 %{
7568   match(Set dst (SubL dst src));
7569   effect(KILL cr);
7570 
7571   format %{ "subq    $dst, $src\t# long" %}
7572   opcode(0x2B);
7573   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7574   ins_pipe(ialu_reg_reg);
7575 %}
7576 
7577 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7578 %{
7579   match(Set dst (SubL dst src));
7580   effect(KILL cr);
7581 
7582   format %{ "subq    $dst, $src\t# long" %}
7583   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7584   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7585   ins_pipe(ialu_reg);
7586 %}
7587 
7588 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7589 %{
7590   match(Set dst (SubL dst (LoadL src)));
7591   effect(KILL cr);
7592 
7593   ins_cost(125);
7594   format %{ "subq    $dst, $src\t# long" %}
7595   opcode(0x2B);
7596   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7597   ins_pipe(ialu_reg_mem);
7598 %}
7599 
7600 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7601 %{
7602   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7603   effect(KILL cr);
7604 
7605   ins_cost(150);
7606   format %{ "subq    $dst, $src\t# long" %}
7607   opcode(0x29); /* Opcode 29 /r */
7608   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7609   ins_pipe(ialu_mem_reg);
7610 %}
7611 
7612 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7613 %{
7614   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7615   effect(KILL cr);
7616 
7617   ins_cost(125); // XXX
7618   format %{ "subq    $dst, $src\t# long" %}
7619   opcode(0x81); /* Opcode 81 /5 id */
7620   ins_encode(REX_mem_wide(dst),
7621              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7622   ins_pipe(ialu_mem_imm);
7623 %}
7624 
7625 // Subtract from a pointer
7626 // XXX hmpf???
7627 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7628 %{
7629   match(Set dst (AddP dst (SubI zero src)));
7630   effect(KILL cr);
7631 
7632   format %{ "subq    $dst, $src\t# ptr - int" %}
7633   opcode(0x2B);
7634   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7635   ins_pipe(ialu_reg_reg);
7636 %}
7637 
7638 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7639 %{
7640   match(Set dst (SubI zero dst));
7641   effect(KILL cr);
7642 
7643   format %{ "negl    $dst\t# int" %}
7644   opcode(0xF7, 0x03);  // Opcode F7 /3
7645   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7646   ins_pipe(ialu_reg);
7647 %}
7648 
7649 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7650 %{
7651   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7652   effect(KILL cr);
7653 
7654   format %{ "negl    $dst\t# int" %}
7655   opcode(0xF7, 0x03);  // Opcode F7 /3
7656   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7657   ins_pipe(ialu_reg);
7658 %}
7659 
7660 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7661 %{
7662   match(Set dst (SubL zero dst));
7663   effect(KILL cr);
7664 
7665   format %{ "negq    $dst\t# long" %}
7666   opcode(0xF7, 0x03);  // Opcode F7 /3
7667   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7668   ins_pipe(ialu_reg);
7669 %}
7670 
7671 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7672 %{
7673   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7674   effect(KILL cr);
7675 
7676   format %{ "negq    $dst\t# long" %}
7677   opcode(0xF7, 0x03);  // Opcode F7 /3
7678   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7679   ins_pipe(ialu_reg);
7680 %}
7681 
7682 //----------Multiplication/Division Instructions-------------------------------
7683 // Integer Multiplication Instructions
7684 // Multiply Register
7685 
7686 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7687 %{
7688   match(Set dst (MulI dst src));
7689   effect(KILL cr);
7690 
7691   ins_cost(300);
7692   format %{ "imull   $dst, $src\t# int" %}
7693   opcode(0x0F, 0xAF);
7694   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7695   ins_pipe(ialu_reg_reg_alu0);
7696 %}
7697 
7698 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7699 %{
7700   match(Set dst (MulI src imm));
7701   effect(KILL cr);
7702 
7703   ins_cost(300);
7704   format %{ "imull   $dst, $src, $imm\t# int" %}
7705   opcode(0x69); /* 69 /r id */
7706   ins_encode(REX_reg_reg(dst, src),
7707              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7708   ins_pipe(ialu_reg_reg_alu0);
7709 %}
7710 
7711 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7712 %{
7713   match(Set dst (MulI dst (LoadI src)));
7714   effect(KILL cr);
7715 
7716   ins_cost(350);
7717   format %{ "imull   $dst, $src\t# int" %}
7718   opcode(0x0F, 0xAF);
7719   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7720   ins_pipe(ialu_reg_mem_alu0);
7721 %}
7722 
7723 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7724 %{
7725   match(Set dst (MulI (LoadI src) imm));
7726   effect(KILL cr);
7727 
7728   ins_cost(300);
7729   format %{ "imull   $dst, $src, $imm\t# int" %}
7730   opcode(0x69); /* 69 /r id */
7731   ins_encode(REX_reg_mem(dst, src),
7732              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7733   ins_pipe(ialu_reg_mem_alu0);
7734 %}
7735 
7736 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7737 %{
7738   match(Set dst (MulL dst src));
7739   effect(KILL cr);
7740 
7741   ins_cost(300);
7742   format %{ "imulq   $dst, $src\t# long" %}
7743   opcode(0x0F, 0xAF);
7744   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7745   ins_pipe(ialu_reg_reg_alu0);
7746 %}
7747 
7748 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7749 %{
7750   match(Set dst (MulL src imm));
7751   effect(KILL cr);
7752 
7753   ins_cost(300);
7754   format %{ "imulq   $dst, $src, $imm\t# long" %}
7755   opcode(0x69); /* 69 /r id */
7756   ins_encode(REX_reg_reg_wide(dst, src),
7757              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7758   ins_pipe(ialu_reg_reg_alu0);
7759 %}
7760 
7761 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7762 %{
7763   match(Set dst (MulL dst (LoadL src)));
7764   effect(KILL cr);
7765 
7766   ins_cost(350);
7767   format %{ "imulq   $dst, $src\t# long" %}
7768   opcode(0x0F, 0xAF);
7769   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7770   ins_pipe(ialu_reg_mem_alu0);
7771 %}
7772 
7773 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7774 %{
7775   match(Set dst (MulL (LoadL src) imm));
7776   effect(KILL cr);
7777 
7778   ins_cost(300);
7779   format %{ "imulq   $dst, $src, $imm\t# long" %}
7780   opcode(0x69); /* 69 /r id */
7781   ins_encode(REX_reg_mem_wide(dst, src),
7782              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7783   ins_pipe(ialu_reg_mem_alu0);
7784 %}
7785 
7786 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7787 %{
7788   match(Set dst (MulHiL src rax));
7789   effect(USE_KILL rax, KILL cr);
7790 
7791   ins_cost(300);
7792   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
7793   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7794   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7795   ins_pipe(ialu_reg_reg_alu0);
7796 %}
7797 
7798 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7799                    rFlagsReg cr)
7800 %{
7801   match(Set rax (DivI rax div));
7802   effect(KILL rdx, KILL cr);
7803 
7804   ins_cost(30*100+10*100); // XXX
7805   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7806             "jne,s   normal\n\t"
7807             "xorl    rdx, rdx\n\t"
7808             "cmpl    $div, -1\n\t"
7809             "je,s    done\n"
7810     "normal: cdql\n\t"
7811             "idivl   $div\n"
7812     "done:"        %}
7813   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7814   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7815   ins_pipe(ialu_reg_reg_alu0);
7816 %}
7817 
7818 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7819                    rFlagsReg cr)
7820 %{
7821   match(Set rax (DivL rax div));
7822   effect(KILL rdx, KILL cr);
7823 
7824   ins_cost(30*100+10*100); // XXX
7825   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7826             "cmpq    rax, rdx\n\t"
7827             "jne,s   normal\n\t"
7828             "xorl    rdx, rdx\n\t"
7829             "cmpq    $div, -1\n\t"
7830             "je,s    done\n"
7831     "normal: cdqq\n\t"
7832             "idivq   $div\n"
7833     "done:"        %}
7834   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7835   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7836   ins_pipe(ialu_reg_reg_alu0);
7837 %}
7838 
7839 // Integer DIVMOD with Register, both quotient and mod results
7840 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7841                              rFlagsReg cr)
7842 %{
7843   match(DivModI rax div);
7844   effect(KILL cr);
7845 
7846   ins_cost(30*100+10*100); // XXX
7847   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7848             "jne,s   normal\n\t"
7849             "xorl    rdx, rdx\n\t"
7850             "cmpl    $div, -1\n\t"
7851             "je,s    done\n"
7852     "normal: cdql\n\t"
7853             "idivl   $div\n"
7854     "done:"        %}
7855   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7856   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7857   ins_pipe(pipe_slow);
7858 %}
7859 
7860 // Long DIVMOD with Register, both quotient and mod results
7861 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7862                              rFlagsReg cr)
7863 %{
7864   match(DivModL rax div);
7865   effect(KILL cr);
7866 
7867   ins_cost(30*100+10*100); // XXX
7868   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7869             "cmpq    rax, rdx\n\t"
7870             "jne,s   normal\n\t"
7871             "xorl    rdx, rdx\n\t"
7872             "cmpq    $div, -1\n\t"
7873             "je,s    done\n"
7874     "normal: cdqq\n\t"
7875             "idivq   $div\n"
7876     "done:"        %}
7877   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7878   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7879   ins_pipe(pipe_slow);
7880 %}
7881 
7882 //----------- DivL-By-Constant-Expansions--------------------------------------
7883 // DivI cases are handled by the compiler
7884 
7885 // Magic constant, reciprocal of 10
7886 instruct loadConL_0x6666666666666667(rRegL dst)
7887 %{
7888   effect(DEF dst);
7889 
7890   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
7891   ins_encode(load_immL(dst, 0x6666666666666667));
7892   ins_pipe(ialu_reg);
7893 %}
7894 
7895 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7896 %{
7897   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
7898 
7899   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
7900   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7901   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7902   ins_pipe(ialu_reg_reg_alu0);
7903 %}
7904 
7905 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
7906 %{
7907   effect(USE_DEF dst, KILL cr);
7908 
7909   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
7910   opcode(0xC1, 0x7); /* C1 /7 ib */
7911   ins_encode(reg_opc_imm_wide(dst, 0x3F));
7912   ins_pipe(ialu_reg);
7913 %}
7914 
7915 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
7916 %{
7917   effect(USE_DEF dst, KILL cr);
7918 
7919   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
7920   opcode(0xC1, 0x7); /* C1 /7 ib */
7921   ins_encode(reg_opc_imm_wide(dst, 0x2));
7922   ins_pipe(ialu_reg);
7923 %}
7924 
7925 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
7926 %{
7927   match(Set dst (DivL src div));
7928 
7929   ins_cost((5+8)*100);
7930   expand %{
7931     rax_RegL rax;                     // Killed temp
7932     rFlagsReg cr;                     // Killed
7933     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
7934     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
7935     sarL_rReg_63(src, cr);            // sarq  src, 63
7936     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
7937     subL_rReg(dst, src, cr);          // subl  rdx, src
7938   %}
7939 %}
7940 
7941 //-----------------------------------------------------------------------------
7942 
7943 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
7944                    rFlagsReg cr)
7945 %{
7946   match(Set rdx (ModI rax div));
7947   effect(KILL rax, KILL cr);
7948 
7949   ins_cost(300); // XXX
7950   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
7951             "jne,s   normal\n\t"
7952             "xorl    rdx, rdx\n\t"
7953             "cmpl    $div, -1\n\t"
7954             "je,s    done\n"
7955     "normal: cdql\n\t"
7956             "idivl   $div\n"
7957     "done:"        %}
7958   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7959   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7960   ins_pipe(ialu_reg_reg_alu0);
7961 %}
7962 
7963 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
7964                    rFlagsReg cr)
7965 %{
7966   match(Set rdx (ModL rax div));
7967   effect(KILL rax, KILL cr);
7968 
7969   ins_cost(300); // XXX
7970   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
7971             "cmpq    rax, rdx\n\t"
7972             "jne,s   normal\n\t"
7973             "xorl    rdx, rdx\n\t"
7974             "cmpq    $div, -1\n\t"
7975             "je,s    done\n"
7976     "normal: cdqq\n\t"
7977             "idivq   $div\n"
7978     "done:"        %}
7979   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7980   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7981   ins_pipe(ialu_reg_reg_alu0);
7982 %}
7983 
7984 // Integer Shift Instructions
7985 // Shift Left by one
7986 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
7987 %{
7988   match(Set dst (LShiftI dst shift));
7989   effect(KILL cr);
7990 
7991   format %{ "sall    $dst, $shift" %}
7992   opcode(0xD1, 0x4); /* D1 /4 */
7993   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7994   ins_pipe(ialu_reg);
7995 %}
7996 
7997 // Shift Left by one
7998 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
7999 %{
8000   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8001   effect(KILL cr);
8002 
8003   format %{ "sall    $dst, $shift\t" %}
8004   opcode(0xD1, 0x4); /* D1 /4 */
8005   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8006   ins_pipe(ialu_mem_imm);
8007 %}
8008 
8009 // Shift Left by 8-bit immediate
8010 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8011 %{
8012   match(Set dst (LShiftI dst shift));
8013   effect(KILL cr);
8014 
8015   format %{ "sall    $dst, $shift" %}
8016   opcode(0xC1, 0x4); /* C1 /4 ib */
8017   ins_encode(reg_opc_imm(dst, shift));
8018   ins_pipe(ialu_reg);
8019 %}
8020 
8021 // Shift Left by 8-bit immediate
8022 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8023 %{
8024   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8025   effect(KILL cr);
8026 
8027   format %{ "sall    $dst, $shift" %}
8028   opcode(0xC1, 0x4); /* C1 /4 ib */
8029   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8030   ins_pipe(ialu_mem_imm);
8031 %}
8032 
8033 // Shift Left by variable
8034 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8035 %{
8036   match(Set dst (LShiftI dst shift));
8037   effect(KILL cr);
8038 
8039   format %{ "sall    $dst, $shift" %}
8040   opcode(0xD3, 0x4); /* D3 /4 */
8041   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8042   ins_pipe(ialu_reg_reg);
8043 %}
8044 
8045 // Shift Left by variable
8046 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8047 %{
8048   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8049   effect(KILL cr);
8050 
8051   format %{ "sall    $dst, $shift" %}
8052   opcode(0xD3, 0x4); /* D3 /4 */
8053   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8054   ins_pipe(ialu_mem_reg);
8055 %}
8056 
8057 // Arithmetic shift right by one
8058 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8059 %{
8060   match(Set dst (RShiftI dst shift));
8061   effect(KILL cr);
8062 
8063   format %{ "sarl    $dst, $shift" %}
8064   opcode(0xD1, 0x7); /* D1 /7 */
8065   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8066   ins_pipe(ialu_reg);
8067 %}
8068 
8069 // Arithmetic shift right by one
8070 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8071 %{
8072   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8073   effect(KILL cr);
8074 
8075   format %{ "sarl    $dst, $shift" %}
8076   opcode(0xD1, 0x7); /* D1 /7 */
8077   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8078   ins_pipe(ialu_mem_imm);
8079 %}
8080 
8081 // Arithmetic Shift Right by 8-bit immediate
8082 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8083 %{
8084   match(Set dst (RShiftI dst shift));
8085   effect(KILL cr);
8086 
8087   format %{ "sarl    $dst, $shift" %}
8088   opcode(0xC1, 0x7); /* C1 /7 ib */
8089   ins_encode(reg_opc_imm(dst, shift));
8090   ins_pipe(ialu_mem_imm);
8091 %}
8092 
8093 // Arithmetic Shift Right by 8-bit immediate
8094 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8095 %{
8096   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8097   effect(KILL cr);
8098 
8099   format %{ "sarl    $dst, $shift" %}
8100   opcode(0xC1, 0x7); /* C1 /7 ib */
8101   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8102   ins_pipe(ialu_mem_imm);
8103 %}
8104 
8105 // Arithmetic Shift Right by variable
8106 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8107 %{
8108   match(Set dst (RShiftI dst shift));
8109   effect(KILL cr);
8110 
8111   format %{ "sarl    $dst, $shift" %}
8112   opcode(0xD3, 0x7); /* D3 /7 */
8113   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8114   ins_pipe(ialu_reg_reg);
8115 %}
8116 
8117 // Arithmetic Shift Right by variable
8118 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8119 %{
8120   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8121   effect(KILL cr);
8122 
8123   format %{ "sarl    $dst, $shift" %}
8124   opcode(0xD3, 0x7); /* D3 /7 */
8125   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8126   ins_pipe(ialu_mem_reg);
8127 %}
8128 
8129 // Logical shift right by one
8130 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8131 %{
8132   match(Set dst (URShiftI dst shift));
8133   effect(KILL cr);
8134 
8135   format %{ "shrl    $dst, $shift" %}
8136   opcode(0xD1, 0x5); /* D1 /5 */
8137   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8138   ins_pipe(ialu_reg);
8139 %}
8140 
8141 // Logical shift right by one
8142 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8143 %{
8144   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8145   effect(KILL cr);
8146 
8147   format %{ "shrl    $dst, $shift" %}
8148   opcode(0xD1, 0x5); /* D1 /5 */
8149   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8150   ins_pipe(ialu_mem_imm);
8151 %}
8152 
8153 // Logical Shift Right by 8-bit immediate
8154 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8155 %{
8156   match(Set dst (URShiftI dst shift));
8157   effect(KILL cr);
8158 
8159   format %{ "shrl    $dst, $shift" %}
8160   opcode(0xC1, 0x5); /* C1 /5 ib */
8161   ins_encode(reg_opc_imm(dst, shift));
8162   ins_pipe(ialu_reg);
8163 %}
8164 
8165 // Logical Shift Right by 8-bit immediate
8166 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8167 %{
8168   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8169   effect(KILL cr);
8170 
8171   format %{ "shrl    $dst, $shift" %}
8172   opcode(0xC1, 0x5); /* C1 /5 ib */
8173   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8174   ins_pipe(ialu_mem_imm);
8175 %}
8176 
8177 // Logical Shift Right by variable
8178 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8179 %{
8180   match(Set dst (URShiftI dst shift));
8181   effect(KILL cr);
8182 
8183   format %{ "shrl    $dst, $shift" %}
8184   opcode(0xD3, 0x5); /* D3 /5 */
8185   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8186   ins_pipe(ialu_reg_reg);
8187 %}
8188 
8189 // Logical Shift Right by variable
8190 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8191 %{
8192   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8193   effect(KILL cr);
8194 
8195   format %{ "shrl    $dst, $shift" %}
8196   opcode(0xD3, 0x5); /* D3 /5 */
8197   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8198   ins_pipe(ialu_mem_reg);
8199 %}
8200 
8201 // Long Shift Instructions
8202 // Shift Left by one
8203 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8204 %{
8205   match(Set dst (LShiftL dst shift));
8206   effect(KILL cr);
8207 
8208   format %{ "salq    $dst, $shift" %}
8209   opcode(0xD1, 0x4); /* D1 /4 */
8210   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8211   ins_pipe(ialu_reg);
8212 %}
8213 
8214 // Shift Left by one
8215 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8216 %{
8217   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8218   effect(KILL cr);
8219 
8220   format %{ "salq    $dst, $shift" %}
8221   opcode(0xD1, 0x4); /* D1 /4 */
8222   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8223   ins_pipe(ialu_mem_imm);
8224 %}
8225 
8226 // Shift Left by 8-bit immediate
8227 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8228 %{
8229   match(Set dst (LShiftL dst shift));
8230   effect(KILL cr);
8231 
8232   format %{ "salq    $dst, $shift" %}
8233   opcode(0xC1, 0x4); /* C1 /4 ib */
8234   ins_encode(reg_opc_imm_wide(dst, shift));
8235   ins_pipe(ialu_reg);
8236 %}
8237 
8238 // Shift Left by 8-bit immediate
8239 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8240 %{
8241   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8242   effect(KILL cr);
8243 
8244   format %{ "salq    $dst, $shift" %}
8245   opcode(0xC1, 0x4); /* C1 /4 ib */
8246   ins_encode(REX_mem_wide(dst), OpcP,
8247              RM_opc_mem(secondary, dst), Con8or32(shift));
8248   ins_pipe(ialu_mem_imm);
8249 %}
8250 
8251 // Shift Left by variable
8252 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8253 %{
8254   match(Set dst (LShiftL dst shift));
8255   effect(KILL cr);
8256 
8257   format %{ "salq    $dst, $shift" %}
8258   opcode(0xD3, 0x4); /* D3 /4 */
8259   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8260   ins_pipe(ialu_reg_reg);
8261 %}
8262 
8263 // Shift Left by variable
8264 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8265 %{
8266   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8267   effect(KILL cr);
8268 
8269   format %{ "salq    $dst, $shift" %}
8270   opcode(0xD3, 0x4); /* D3 /4 */
8271   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8272   ins_pipe(ialu_mem_reg);
8273 %}
8274 
8275 // Arithmetic shift right by one
8276 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8277 %{
8278   match(Set dst (RShiftL dst shift));
8279   effect(KILL cr);
8280 
8281   format %{ "sarq    $dst, $shift" %}
8282   opcode(0xD1, 0x7); /* D1 /7 */
8283   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8284   ins_pipe(ialu_reg);
8285 %}
8286 
8287 // Arithmetic shift right by one
8288 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8289 %{
8290   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8291   effect(KILL cr);
8292 
8293   format %{ "sarq    $dst, $shift" %}
8294   opcode(0xD1, 0x7); /* D1 /7 */
8295   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8296   ins_pipe(ialu_mem_imm);
8297 %}
8298 
8299 // Arithmetic Shift Right by 8-bit immediate
8300 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8301 %{
8302   match(Set dst (RShiftL dst shift));
8303   effect(KILL cr);
8304 
8305   format %{ "sarq    $dst, $shift" %}
8306   opcode(0xC1, 0x7); /* C1 /7 ib */
8307   ins_encode(reg_opc_imm_wide(dst, shift));
8308   ins_pipe(ialu_mem_imm);
8309 %}
8310 
8311 // Arithmetic Shift Right by 8-bit immediate
8312 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8313 %{
8314   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8315   effect(KILL cr);
8316 
8317   format %{ "sarq    $dst, $shift" %}
8318   opcode(0xC1, 0x7); /* C1 /7 ib */
8319   ins_encode(REX_mem_wide(dst), OpcP,
8320              RM_opc_mem(secondary, dst), Con8or32(shift));
8321   ins_pipe(ialu_mem_imm);
8322 %}
8323 
8324 // Arithmetic Shift Right by variable
8325 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8326 %{
8327   match(Set dst (RShiftL dst shift));
8328   effect(KILL cr);
8329 
8330   format %{ "sarq    $dst, $shift" %}
8331   opcode(0xD3, 0x7); /* D3 /7 */
8332   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8333   ins_pipe(ialu_reg_reg);
8334 %}
8335 
8336 // Arithmetic Shift Right by variable
8337 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8338 %{
8339   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8340   effect(KILL cr);
8341 
8342   format %{ "sarq    $dst, $shift" %}
8343   opcode(0xD3, 0x7); /* D3 /7 */
8344   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8345   ins_pipe(ialu_mem_reg);
8346 %}
8347 
8348 // Logical shift right by one
8349 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8350 %{
8351   match(Set dst (URShiftL dst shift));
8352   effect(KILL cr);
8353 
8354   format %{ "shrq    $dst, $shift" %}
8355   opcode(0xD1, 0x5); /* D1 /5 */
8356   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8357   ins_pipe(ialu_reg);
8358 %}
8359 
8360 // Logical shift right by one
8361 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8362 %{
8363   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8364   effect(KILL cr);
8365 
8366   format %{ "shrq    $dst, $shift" %}
8367   opcode(0xD1, 0x5); /* D1 /5 */
8368   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8369   ins_pipe(ialu_mem_imm);
8370 %}
8371 
8372 // Logical Shift Right by 8-bit immediate
8373 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8374 %{
8375   match(Set dst (URShiftL dst shift));
8376   effect(KILL cr);
8377 
8378   format %{ "shrq    $dst, $shift" %}
8379   opcode(0xC1, 0x5); /* C1 /5 ib */
8380   ins_encode(reg_opc_imm_wide(dst, shift));
8381   ins_pipe(ialu_reg);
8382 %}
8383 
8384 
8385 // Logical Shift Right by 8-bit immediate
8386 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8387 %{
8388   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8389   effect(KILL cr);
8390 
8391   format %{ "shrq    $dst, $shift" %}
8392   opcode(0xC1, 0x5); /* C1 /5 ib */
8393   ins_encode(REX_mem_wide(dst), OpcP,
8394              RM_opc_mem(secondary, dst), Con8or32(shift));
8395   ins_pipe(ialu_mem_imm);
8396 %}
8397 
8398 // Logical Shift Right by variable
8399 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8400 %{
8401   match(Set dst (URShiftL dst shift));
8402   effect(KILL cr);
8403 
8404   format %{ "shrq    $dst, $shift" %}
8405   opcode(0xD3, 0x5); /* D3 /5 */
8406   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8407   ins_pipe(ialu_reg_reg);
8408 %}
8409 
8410 // Logical Shift Right by variable
8411 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8412 %{
8413   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8414   effect(KILL cr);
8415 
8416   format %{ "shrq    $dst, $shift" %}
8417   opcode(0xD3, 0x5); /* D3 /5 */
8418   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8419   ins_pipe(ialu_mem_reg);
8420 %}
8421 
8422 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8423 // This idiom is used by the compiler for the i2b bytecode.
8424 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8425 %{
8426   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8427 
8428   format %{ "movsbl  $dst, $src\t# i2b" %}
8429   opcode(0x0F, 0xBE);
8430   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8431   ins_pipe(ialu_reg_reg);
8432 %}
8433 
8434 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8435 // This idiom is used by the compiler the i2s bytecode.
8436 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8437 %{
8438   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8439 
8440   format %{ "movswl  $dst, $src\t# i2s" %}
8441   opcode(0x0F, 0xBF);
8442   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8443   ins_pipe(ialu_reg_reg);
8444 %}
8445 
8446 // ROL/ROR instructions
8447 
8448 // ROL expand
8449 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8450   effect(KILL cr, USE_DEF dst);
8451 
8452   format %{ "roll    $dst" %}
8453   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8454   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8455   ins_pipe(ialu_reg);
8456 %}
8457 
8458 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8459   effect(USE_DEF dst, USE shift, KILL cr);
8460 
8461   format %{ "roll    $dst, $shift" %}
8462   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8463   ins_encode( reg_opc_imm(dst, shift) );
8464   ins_pipe(ialu_reg);
8465 %}
8466 
8467 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8468 %{
8469   effect(USE_DEF dst, USE shift, KILL cr);
8470 
8471   format %{ "roll    $dst, $shift" %}
8472   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8473   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8474   ins_pipe(ialu_reg_reg);
8475 %}
8476 // end of ROL expand
8477 
8478 // Rotate Left by one
8479 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8480 %{
8481   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8482 
8483   expand %{
8484     rolI_rReg_imm1(dst, cr);
8485   %}
8486 %}
8487 
8488 // Rotate Left by 8-bit immediate
8489 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8490 %{
8491   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8492   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8493 
8494   expand %{
8495     rolI_rReg_imm8(dst, lshift, cr);
8496   %}
8497 %}
8498 
8499 // Rotate Left by variable
8500 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8501 %{
8502   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8503 
8504   expand %{
8505     rolI_rReg_CL(dst, shift, cr);
8506   %}
8507 %}
8508 
8509 // Rotate Left by variable
8510 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8511 %{
8512   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8513 
8514   expand %{
8515     rolI_rReg_CL(dst, shift, cr);
8516   %}
8517 %}
8518 
8519 // ROR expand
8520 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8521 %{
8522   effect(USE_DEF dst, KILL cr);
8523 
8524   format %{ "rorl    $dst" %}
8525   opcode(0xD1, 0x1); /* D1 /1 */
8526   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8527   ins_pipe(ialu_reg);
8528 %}
8529 
8530 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8531 %{
8532   effect(USE_DEF dst, USE shift, KILL cr);
8533 
8534   format %{ "rorl    $dst, $shift" %}
8535   opcode(0xC1, 0x1); /* C1 /1 ib */
8536   ins_encode(reg_opc_imm(dst, shift));
8537   ins_pipe(ialu_reg);
8538 %}
8539 
8540 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8541 %{
8542   effect(USE_DEF dst, USE shift, KILL cr);
8543 
8544   format %{ "rorl    $dst, $shift" %}
8545   opcode(0xD3, 0x1); /* D3 /1 */
8546   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8547   ins_pipe(ialu_reg_reg);
8548 %}
8549 // end of ROR expand
8550 
8551 // Rotate Right by one
8552 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8553 %{
8554   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8555 
8556   expand %{
8557     rorI_rReg_imm1(dst, cr);
8558   %}
8559 %}
8560 
8561 // Rotate Right by 8-bit immediate
8562 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8563 %{
8564   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8565   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8566 
8567   expand %{
8568     rorI_rReg_imm8(dst, rshift, cr);
8569   %}
8570 %}
8571 
8572 // Rotate Right by variable
8573 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8574 %{
8575   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8576 
8577   expand %{
8578     rorI_rReg_CL(dst, shift, cr);
8579   %}
8580 %}
8581 
8582 // Rotate Right by variable
8583 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8584 %{
8585   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8586 
8587   expand %{
8588     rorI_rReg_CL(dst, shift, cr);
8589   %}
8590 %}
8591 
8592 // for long rotate
8593 // ROL expand
8594 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8595   effect(USE_DEF dst, KILL cr);
8596 
8597   format %{ "rolq    $dst" %}
8598   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8599   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8600   ins_pipe(ialu_reg);
8601 %}
8602 
8603 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8604   effect(USE_DEF dst, USE shift, KILL cr);
8605 
8606   format %{ "rolq    $dst, $shift" %}
8607   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8608   ins_encode( reg_opc_imm_wide(dst, shift) );
8609   ins_pipe(ialu_reg);
8610 %}
8611 
8612 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8613 %{
8614   effect(USE_DEF dst, USE shift, KILL cr);
8615 
8616   format %{ "rolq    $dst, $shift" %}
8617   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8618   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8619   ins_pipe(ialu_reg_reg);
8620 %}
8621 // end of ROL expand
8622 
8623 // Rotate Left by one
8624 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8625 %{
8626   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8627 
8628   expand %{
8629     rolL_rReg_imm1(dst, cr);
8630   %}
8631 %}
8632 
8633 // Rotate Left by 8-bit immediate
8634 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8635 %{
8636   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8637   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8638 
8639   expand %{
8640     rolL_rReg_imm8(dst, lshift, cr);
8641   %}
8642 %}
8643 
8644 // Rotate Left by variable
8645 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8646 %{
8647   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8648 
8649   expand %{
8650     rolL_rReg_CL(dst, shift, cr);
8651   %}
8652 %}
8653 
8654 // Rotate Left by variable
8655 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8656 %{
8657   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8658 
8659   expand %{
8660     rolL_rReg_CL(dst, shift, cr);
8661   %}
8662 %}
8663 
8664 // ROR expand
8665 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8666 %{
8667   effect(USE_DEF dst, KILL cr);
8668 
8669   format %{ "rorq    $dst" %}
8670   opcode(0xD1, 0x1); /* D1 /1 */
8671   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8672   ins_pipe(ialu_reg);
8673 %}
8674 
8675 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8676 %{
8677   effect(USE_DEF dst, USE shift, KILL cr);
8678 
8679   format %{ "rorq    $dst, $shift" %}
8680   opcode(0xC1, 0x1); /* C1 /1 ib */
8681   ins_encode(reg_opc_imm_wide(dst, shift));
8682   ins_pipe(ialu_reg);
8683 %}
8684 
8685 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8686 %{
8687   effect(USE_DEF dst, USE shift, KILL cr);
8688 
8689   format %{ "rorq    $dst, $shift" %}
8690   opcode(0xD3, 0x1); /* D3 /1 */
8691   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8692   ins_pipe(ialu_reg_reg);
8693 %}
8694 // end of ROR expand
8695 
8696 // Rotate Right by one
8697 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8698 %{
8699   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8700 
8701   expand %{
8702     rorL_rReg_imm1(dst, cr);
8703   %}
8704 %}
8705 
8706 // Rotate Right by 8-bit immediate
8707 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8708 %{
8709   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8710   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8711 
8712   expand %{
8713     rorL_rReg_imm8(dst, rshift, cr);
8714   %}
8715 %}
8716 
8717 // Rotate Right by variable
8718 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8719 %{
8720   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8721 
8722   expand %{
8723     rorL_rReg_CL(dst, shift, cr);
8724   %}
8725 %}
8726 
8727 // Rotate Right by variable
8728 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8729 %{
8730   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8731 
8732   expand %{
8733     rorL_rReg_CL(dst, shift, cr);
8734   %}
8735 %}
8736 
8737 // Logical Instructions
8738 
8739 // Integer Logical Instructions
8740 
8741 // And Instructions
8742 // And Register with Register
8743 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8744 %{
8745   match(Set dst (AndI dst src));
8746   effect(KILL cr);
8747 
8748   format %{ "andl    $dst, $src\t# int" %}
8749   opcode(0x23);
8750   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8751   ins_pipe(ialu_reg_reg);
8752 %}
8753 
8754 // And Register with Immediate 255
8755 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8756 %{
8757   match(Set dst (AndI dst src));
8758 
8759   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8760   opcode(0x0F, 0xB6);
8761   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8762   ins_pipe(ialu_reg);
8763 %}
8764 
8765 // And Register with Immediate 255 and promote to long
8766 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8767 %{
8768   match(Set dst (ConvI2L (AndI src mask)));
8769 
8770   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8771   opcode(0x0F, 0xB6);
8772   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8773   ins_pipe(ialu_reg);
8774 %}
8775 
8776 // And Register with Immediate 65535
8777 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8778 %{
8779   match(Set dst (AndI dst src));
8780 
8781   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8782   opcode(0x0F, 0xB7);
8783   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8784   ins_pipe(ialu_reg);
8785 %}
8786 
8787 // And Register with Immediate 65535 and promote to long
8788 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8789 %{
8790   match(Set dst (ConvI2L (AndI src mask)));
8791 
8792   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8793   opcode(0x0F, 0xB7);
8794   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8795   ins_pipe(ialu_reg);
8796 %}
8797 
8798 // And Register with Immediate
8799 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8800 %{
8801   match(Set dst (AndI dst src));
8802   effect(KILL cr);
8803 
8804   format %{ "andl    $dst, $src\t# int" %}
8805   opcode(0x81, 0x04); /* Opcode 81 /4 */
8806   ins_encode(OpcSErm(dst, src), Con8or32(src));
8807   ins_pipe(ialu_reg);
8808 %}
8809 
8810 // And Register with Memory
8811 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8812 %{
8813   match(Set dst (AndI dst (LoadI src)));
8814   effect(KILL cr);
8815 
8816   ins_cost(125);
8817   format %{ "andl    $dst, $src\t# int" %}
8818   opcode(0x23);
8819   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8820   ins_pipe(ialu_reg_mem);
8821 %}
8822 
8823 // And Memory with Register
8824 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8825 %{
8826   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8827   effect(KILL cr);
8828 
8829   ins_cost(150);
8830   format %{ "andl    $dst, $src\t# int" %}
8831   opcode(0x21); /* Opcode 21 /r */
8832   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8833   ins_pipe(ialu_mem_reg);
8834 %}
8835 
8836 // And Memory with Immediate
8837 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
8838 %{
8839   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8840   effect(KILL cr);
8841 
8842   ins_cost(125);
8843   format %{ "andl    $dst, $src\t# int" %}
8844   opcode(0x81, 0x4); /* Opcode 81 /4 id */
8845   ins_encode(REX_mem(dst), OpcSE(src),
8846              RM_opc_mem(secondary, dst), Con8or32(src));
8847   ins_pipe(ialu_mem_imm);
8848 %}
8849 
8850 // Or Instructions
8851 // Or Register with Register
8852 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8853 %{
8854   match(Set dst (OrI dst src));
8855   effect(KILL cr);
8856 
8857   format %{ "orl     $dst, $src\t# int" %}
8858   opcode(0x0B);
8859   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8860   ins_pipe(ialu_reg_reg);
8861 %}
8862 
8863 // Or Register with Immediate
8864 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8865 %{
8866   match(Set dst (OrI dst src));
8867   effect(KILL cr);
8868 
8869   format %{ "orl     $dst, $src\t# int" %}
8870   opcode(0x81, 0x01); /* Opcode 81 /1 id */
8871   ins_encode(OpcSErm(dst, src), Con8or32(src));
8872   ins_pipe(ialu_reg);
8873 %}
8874 
8875 // Or Register with Memory
8876 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8877 %{
8878   match(Set dst (OrI dst (LoadI src)));
8879   effect(KILL cr);
8880 
8881   ins_cost(125);
8882   format %{ "orl     $dst, $src\t# int" %}
8883   opcode(0x0B);
8884   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8885   ins_pipe(ialu_reg_mem);
8886 %}
8887 
8888 // Or Memory with Register
8889 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8890 %{
8891   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8892   effect(KILL cr);
8893 
8894   ins_cost(150);
8895   format %{ "orl     $dst, $src\t# int" %}
8896   opcode(0x09); /* Opcode 09 /r */
8897   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8898   ins_pipe(ialu_mem_reg);
8899 %}
8900 
8901 // Or Memory with Immediate
8902 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
8903 %{
8904   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8905   effect(KILL cr);
8906 
8907   ins_cost(125);
8908   format %{ "orl     $dst, $src\t# int" %}
8909   opcode(0x81, 0x1); /* Opcode 81 /1 id */
8910   ins_encode(REX_mem(dst), OpcSE(src),
8911              RM_opc_mem(secondary, dst), Con8or32(src));
8912   ins_pipe(ialu_mem_imm);
8913 %}
8914 
8915 // Xor Instructions
8916 // Xor Register with Register
8917 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8918 %{
8919   match(Set dst (XorI dst src));
8920   effect(KILL cr);
8921 
8922   format %{ "xorl    $dst, $src\t# int" %}
8923   opcode(0x33);
8924   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8925   ins_pipe(ialu_reg_reg);
8926 %}
8927 
8928 // Xor Register with Immediate -1
8929 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
8930   match(Set dst (XorI dst imm));
8931 
8932   format %{ "not    $dst" %}
8933   ins_encode %{
8934      __ notl($dst$$Register);
8935   %}
8936   ins_pipe(ialu_reg);
8937 %}
8938 
8939 // Xor Register with Immediate
8940 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8941 %{
8942   match(Set dst (XorI dst src));
8943   effect(KILL cr);
8944 
8945   format %{ "xorl    $dst, $src\t# int" %}
8946   opcode(0x81, 0x06); /* Opcode 81 /6 id */
8947   ins_encode(OpcSErm(dst, src), Con8or32(src));
8948   ins_pipe(ialu_reg);
8949 %}
8950 
8951 // Xor Register with Memory
8952 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8953 %{
8954   match(Set dst (XorI dst (LoadI src)));
8955   effect(KILL cr);
8956 
8957   ins_cost(125);
8958   format %{ "xorl    $dst, $src\t# int" %}
8959   opcode(0x33);
8960   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8961   ins_pipe(ialu_reg_mem);
8962 %}
8963 
8964 // Xor Memory with Register
8965 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8966 %{
8967   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8968   effect(KILL cr);
8969 
8970   ins_cost(150);
8971   format %{ "xorl    $dst, $src\t# int" %}
8972   opcode(0x31); /* Opcode 31 /r */
8973   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8974   ins_pipe(ialu_mem_reg);
8975 %}
8976 
8977 // Xor Memory with Immediate
8978 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
8979 %{
8980   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8981   effect(KILL cr);
8982 
8983   ins_cost(125);
8984   format %{ "xorl    $dst, $src\t# int" %}
8985   opcode(0x81, 0x6); /* Opcode 81 /6 id */
8986   ins_encode(REX_mem(dst), OpcSE(src),
8987              RM_opc_mem(secondary, dst), Con8or32(src));
8988   ins_pipe(ialu_mem_imm);
8989 %}
8990 
8991 
8992 // Long Logical Instructions
8993 
8994 // And Instructions
8995 // And Register with Register
8996 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8997 %{
8998   match(Set dst (AndL dst src));
8999   effect(KILL cr);
9000 
9001   format %{ "andq    $dst, $src\t# long" %}
9002   opcode(0x23);
9003   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9004   ins_pipe(ialu_reg_reg);
9005 %}
9006 
9007 // And Register with Immediate 255
9008 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9009 %{
9010   match(Set dst (AndL dst src));
9011 
9012   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9013   opcode(0x0F, 0xB6);
9014   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9015   ins_pipe(ialu_reg);
9016 %}
9017 
9018 // And Register with Immediate 65535
9019 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9020 %{
9021   match(Set dst (AndL dst src));
9022 
9023   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9024   opcode(0x0F, 0xB7);
9025   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9026   ins_pipe(ialu_reg);
9027 %}
9028 
9029 // And Register with Immediate
9030 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9031 %{
9032   match(Set dst (AndL dst src));
9033   effect(KILL cr);
9034 
9035   format %{ "andq    $dst, $src\t# long" %}
9036   opcode(0x81, 0x04); /* Opcode 81 /4 */
9037   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9038   ins_pipe(ialu_reg);
9039 %}
9040 
9041 // And Register with Memory
9042 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9043 %{
9044   match(Set dst (AndL dst (LoadL src)));
9045   effect(KILL cr);
9046 
9047   ins_cost(125);
9048   format %{ "andq    $dst, $src\t# long" %}
9049   opcode(0x23);
9050   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9051   ins_pipe(ialu_reg_mem);
9052 %}
9053 
9054 // And Memory with Register
9055 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9056 %{
9057   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9058   effect(KILL cr);
9059 
9060   ins_cost(150);
9061   format %{ "andq    $dst, $src\t# long" %}
9062   opcode(0x21); /* Opcode 21 /r */
9063   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9064   ins_pipe(ialu_mem_reg);
9065 %}
9066 
9067 // And Memory with Immediate
9068 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9069 %{
9070   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9071   effect(KILL cr);
9072 
9073   ins_cost(125);
9074   format %{ "andq    $dst, $src\t# long" %}
9075   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9076   ins_encode(REX_mem_wide(dst), OpcSE(src),
9077              RM_opc_mem(secondary, dst), Con8or32(src));
9078   ins_pipe(ialu_mem_imm);
9079 %}
9080 
9081 // Or Instructions
9082 // Or Register with Register
9083 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9084 %{
9085   match(Set dst (OrL dst src));
9086   effect(KILL cr);
9087 
9088   format %{ "orq     $dst, $src\t# long" %}
9089   opcode(0x0B);
9090   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9091   ins_pipe(ialu_reg_reg);
9092 %}
9093 
9094 // Use any_RegP to match R15 (TLS register) without spilling.
9095 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9096   match(Set dst (OrL dst (CastP2X src)));
9097   effect(KILL cr);
9098 
9099   format %{ "orq     $dst, $src\t# long" %}
9100   opcode(0x0B);
9101   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9102   ins_pipe(ialu_reg_reg);
9103 %}
9104 
9105 
9106 // Or Register with Immediate
9107 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9108 %{
9109   match(Set dst (OrL dst src));
9110   effect(KILL cr);
9111 
9112   format %{ "orq     $dst, $src\t# long" %}
9113   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9114   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9115   ins_pipe(ialu_reg);
9116 %}
9117 
9118 // Or Register with Memory
9119 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9120 %{
9121   match(Set dst (OrL dst (LoadL src)));
9122   effect(KILL cr);
9123 
9124   ins_cost(125);
9125   format %{ "orq     $dst, $src\t# long" %}
9126   opcode(0x0B);
9127   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9128   ins_pipe(ialu_reg_mem);
9129 %}
9130 
9131 // Or Memory with Register
9132 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9133 %{
9134   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9135   effect(KILL cr);
9136 
9137   ins_cost(150);
9138   format %{ "orq     $dst, $src\t# long" %}
9139   opcode(0x09); /* Opcode 09 /r */
9140   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9141   ins_pipe(ialu_mem_reg);
9142 %}
9143 
9144 // Or Memory with Immediate
9145 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9146 %{
9147   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9148   effect(KILL cr);
9149 
9150   ins_cost(125);
9151   format %{ "orq     $dst, $src\t# long" %}
9152   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9153   ins_encode(REX_mem_wide(dst), OpcSE(src),
9154              RM_opc_mem(secondary, dst), Con8or32(src));
9155   ins_pipe(ialu_mem_imm);
9156 %}
9157 
9158 // Xor Instructions
9159 // Xor Register with Register
9160 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9161 %{
9162   match(Set dst (XorL dst src));
9163   effect(KILL cr);
9164 
9165   format %{ "xorq    $dst, $src\t# long" %}
9166   opcode(0x33);
9167   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9168   ins_pipe(ialu_reg_reg);
9169 %}
9170 
9171 // Xor Register with Immediate -1
9172 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9173   match(Set dst (XorL dst imm));
9174 
9175   format %{ "notq   $dst" %}
9176   ins_encode %{
9177      __ notq($dst$$Register);
9178   %}
9179   ins_pipe(ialu_reg);
9180 %}
9181 
9182 // Xor Register with Immediate
9183 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9184 %{
9185   match(Set dst (XorL dst src));
9186   effect(KILL cr);
9187 
9188   format %{ "xorq    $dst, $src\t# long" %}
9189   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9190   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9191   ins_pipe(ialu_reg);
9192 %}
9193 
9194 // Xor Register with Memory
9195 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9196 %{
9197   match(Set dst (XorL dst (LoadL src)));
9198   effect(KILL cr);
9199 
9200   ins_cost(125);
9201   format %{ "xorq    $dst, $src\t# long" %}
9202   opcode(0x33);
9203   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9204   ins_pipe(ialu_reg_mem);
9205 %}
9206 
9207 // Xor Memory with Register
9208 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9209 %{
9210   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9211   effect(KILL cr);
9212 
9213   ins_cost(150);
9214   format %{ "xorq    $dst, $src\t# long" %}
9215   opcode(0x31); /* Opcode 31 /r */
9216   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9217   ins_pipe(ialu_mem_reg);
9218 %}
9219 
9220 // Xor Memory with Immediate
9221 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9222 %{
9223   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9224   effect(KILL cr);
9225 
9226   ins_cost(125);
9227   format %{ "xorq    $dst, $src\t# long" %}
9228   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9229   ins_encode(REX_mem_wide(dst), OpcSE(src),
9230              RM_opc_mem(secondary, dst), Con8or32(src));
9231   ins_pipe(ialu_mem_imm);
9232 %}
9233 
9234 // Convert Int to Boolean
9235 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9236 %{
9237   match(Set dst (Conv2B src));
9238   effect(KILL cr);
9239 
9240   format %{ "testl   $src, $src\t# ci2b\n\t"
9241             "setnz   $dst\n\t"
9242             "movzbl  $dst, $dst" %}
9243   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9244              setNZ_reg(dst),
9245              REX_reg_breg(dst, dst), // movzbl
9246              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9247   ins_pipe(pipe_slow); // XXX
9248 %}
9249 
9250 // Convert Pointer to Boolean
9251 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9252 %{
9253   match(Set dst (Conv2B src));
9254   effect(KILL cr);
9255 
9256   format %{ "testq   $src, $src\t# cp2b\n\t"
9257             "setnz   $dst\n\t"
9258             "movzbl  $dst, $dst" %}
9259   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9260              setNZ_reg(dst),
9261              REX_reg_breg(dst, dst), // movzbl
9262              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9263   ins_pipe(pipe_slow); // XXX
9264 %}
9265 
9266 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9267 %{
9268   match(Set dst (CmpLTMask p q));
9269   effect(KILL cr);
9270 
9271   ins_cost(400);
9272   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9273             "setlt   $dst\n\t"
9274             "movzbl  $dst, $dst\n\t"
9275             "negl    $dst" %}
9276   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9277              setLT_reg(dst),
9278              REX_reg_breg(dst, dst), // movzbl
9279              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9280              neg_reg(dst));
9281   ins_pipe(pipe_slow);
9282 %}
9283 
9284 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9285 %{
9286   match(Set dst (CmpLTMask dst zero));
9287   effect(KILL cr);
9288 
9289   ins_cost(100);
9290   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9291   ins_encode %{
9292   __ sarl($dst$$Register, 31);
9293   %}
9294   ins_pipe(ialu_reg);
9295 %}
9296 
9297 /* Better to save a register than avoid a branch */
9298 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9299 %{
9300   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9301   effect(KILL cr);
9302   ins_cost(300);
9303   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
9304             "jge    done\n\t"
9305             "addl   $p,$y\n"
9306             "done:  " %}
9307   ins_encode %{
9308     Register Rp = $p$$Register;
9309     Register Rq = $q$$Register;
9310     Register Ry = $y$$Register;
9311     Label done;
9312     __ subl(Rp, Rq);
9313     __ jccb(Assembler::greaterEqual, done);
9314     __ addl(Rp, Ry);
9315     __ bind(done);
9316   %}
9317   ins_pipe(pipe_cmplt);
9318 %}
9319 
9320 /* Better to save a register than avoid a branch */
9321 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9322 %{
9323   match(Set y (AndI (CmpLTMask p q) y));
9324   effect(KILL cr);
9325 
9326   ins_cost(300);
9327 
9328   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
9329             "jlt      done\n\t"
9330             "xorl     $y, $y\n"
9331             "done:  " %}
9332   ins_encode %{
9333     Register Rp = $p$$Register;
9334     Register Rq = $q$$Register;
9335     Register Ry = $y$$Register;
9336     Label done;
9337     __ cmpl(Rp, Rq);
9338     __ jccb(Assembler::less, done);
9339     __ xorl(Ry, Ry);
9340     __ bind(done);
9341   %}
9342   ins_pipe(pipe_cmplt);
9343 %}
9344 
9345 
9346 //---------- FP Instructions------------------------------------------------
9347 
9348 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9349 %{
9350   match(Set cr (CmpF src1 src2));
9351 
9352   ins_cost(145);
9353   format %{ "ucomiss $src1, $src2\n\t"
9354             "jnp,s   exit\n\t"
9355             "pushfq\t# saw NaN, set CF\n\t"
9356             "andq    [rsp], #0xffffff2b\n\t"
9357             "popfq\n"
9358     "exit:" %}
9359   ins_encode %{
9360     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9361     emit_cmpfp_fixup(_masm);
9362   %}
9363   ins_pipe(pipe_slow);
9364 %}
9365 
9366 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9367   match(Set cr (CmpF src1 src2));
9368 
9369   ins_cost(100);
9370   format %{ "ucomiss $src1, $src2" %}
9371   ins_encode %{
9372     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9373   %}
9374   ins_pipe(pipe_slow);
9375 %}
9376 
9377 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9378 %{
9379   match(Set cr (CmpF src1 (LoadF src2)));
9380 
9381   ins_cost(145);
9382   format %{ "ucomiss $src1, $src2\n\t"
9383             "jnp,s   exit\n\t"
9384             "pushfq\t# saw NaN, set CF\n\t"
9385             "andq    [rsp], #0xffffff2b\n\t"
9386             "popfq\n"
9387     "exit:" %}
9388   ins_encode %{
9389     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9390     emit_cmpfp_fixup(_masm);
9391   %}
9392   ins_pipe(pipe_slow);
9393 %}
9394 
9395 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9396   match(Set cr (CmpF src1 (LoadF src2)));
9397 
9398   ins_cost(100);
9399   format %{ "ucomiss $src1, $src2" %}
9400   ins_encode %{
9401     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9402   %}
9403   ins_pipe(pipe_slow);
9404 %}
9405 
9406 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9407   match(Set cr (CmpF src con));
9408 
9409   ins_cost(145);
9410   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9411             "jnp,s   exit\n\t"
9412             "pushfq\t# saw NaN, set CF\n\t"
9413             "andq    [rsp], #0xffffff2b\n\t"
9414             "popfq\n"
9415     "exit:" %}
9416   ins_encode %{
9417     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9418     emit_cmpfp_fixup(_masm);
9419   %}
9420   ins_pipe(pipe_slow);
9421 %}
9422 
9423 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9424   match(Set cr (CmpF src con));
9425   ins_cost(100);
9426   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9427   ins_encode %{
9428     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9429   %}
9430   ins_pipe(pipe_slow);
9431 %}
9432 
9433 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9434 %{
9435   match(Set cr (CmpD src1 src2));
9436 
9437   ins_cost(145);
9438   format %{ "ucomisd $src1, $src2\n\t"
9439             "jnp,s   exit\n\t"
9440             "pushfq\t# saw NaN, set CF\n\t"
9441             "andq    [rsp], #0xffffff2b\n\t"
9442             "popfq\n"
9443     "exit:" %}
9444   ins_encode %{
9445     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9446     emit_cmpfp_fixup(_masm);
9447   %}
9448   ins_pipe(pipe_slow);
9449 %}
9450 
9451 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9452   match(Set cr (CmpD src1 src2));
9453 
9454   ins_cost(100);
9455   format %{ "ucomisd $src1, $src2 test" %}
9456   ins_encode %{
9457     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9458   %}
9459   ins_pipe(pipe_slow);
9460 %}
9461 
9462 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9463 %{
9464   match(Set cr (CmpD src1 (LoadD src2)));
9465 
9466   ins_cost(145);
9467   format %{ "ucomisd $src1, $src2\n\t"
9468             "jnp,s   exit\n\t"
9469             "pushfq\t# saw NaN, set CF\n\t"
9470             "andq    [rsp], #0xffffff2b\n\t"
9471             "popfq\n"
9472     "exit:" %}
9473   ins_encode %{
9474     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9475     emit_cmpfp_fixup(_masm);
9476   %}
9477   ins_pipe(pipe_slow);
9478 %}
9479 
9480 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
9481   match(Set cr (CmpD src1 (LoadD src2)));
9482 
9483   ins_cost(100);
9484   format %{ "ucomisd $src1, $src2" %}
9485   ins_encode %{
9486     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9487   %}
9488   ins_pipe(pipe_slow);
9489 %}
9490 
9491 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
9492   match(Set cr (CmpD src con));
9493 
9494   ins_cost(145);
9495   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9496             "jnp,s   exit\n\t"
9497             "pushfq\t# saw NaN, set CF\n\t"
9498             "andq    [rsp], #0xffffff2b\n\t"
9499             "popfq\n"
9500     "exit:" %}
9501   ins_encode %{
9502     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9503     emit_cmpfp_fixup(_masm);
9504   %}
9505   ins_pipe(pipe_slow);
9506 %}
9507 
9508 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
9509   match(Set cr (CmpD src con));
9510   ins_cost(100);
9511   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
9512   ins_encode %{
9513     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9514   %}
9515   ins_pipe(pipe_slow);
9516 %}
9517 
9518 // Compare into -1,0,1
9519 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9520 %{
9521   match(Set dst (CmpF3 src1 src2));
9522   effect(KILL cr);
9523 
9524   ins_cost(275);
9525   format %{ "ucomiss $src1, $src2\n\t"
9526             "movl    $dst, #-1\n\t"
9527             "jp,s    done\n\t"
9528             "jb,s    done\n\t"
9529             "setne   $dst\n\t"
9530             "movzbl  $dst, $dst\n"
9531     "done:" %}
9532   ins_encode %{
9533     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9534     emit_cmpfp3(_masm, $dst$$Register);
9535   %}
9536   ins_pipe(pipe_slow);
9537 %}
9538 
9539 // Compare into -1,0,1
9540 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9541 %{
9542   match(Set dst (CmpF3 src1 (LoadF src2)));
9543   effect(KILL cr);
9544 
9545   ins_cost(275);
9546   format %{ "ucomiss $src1, $src2\n\t"
9547             "movl    $dst, #-1\n\t"
9548             "jp,s    done\n\t"
9549             "jb,s    done\n\t"
9550             "setne   $dst\n\t"
9551             "movzbl  $dst, $dst\n"
9552     "done:" %}
9553   ins_encode %{
9554     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9555     emit_cmpfp3(_masm, $dst$$Register);
9556   %}
9557   ins_pipe(pipe_slow);
9558 %}
9559 
9560 // Compare into -1,0,1
9561 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
9562   match(Set dst (CmpF3 src con));
9563   effect(KILL cr);
9564 
9565   ins_cost(275);
9566   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9567             "movl    $dst, #-1\n\t"
9568             "jp,s    done\n\t"
9569             "jb,s    done\n\t"
9570             "setne   $dst\n\t"
9571             "movzbl  $dst, $dst\n"
9572     "done:" %}
9573   ins_encode %{
9574     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9575     emit_cmpfp3(_masm, $dst$$Register);
9576   %}
9577   ins_pipe(pipe_slow);
9578 %}
9579 
9580 // Compare into -1,0,1
9581 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9582 %{
9583   match(Set dst (CmpD3 src1 src2));
9584   effect(KILL cr);
9585 
9586   ins_cost(275);
9587   format %{ "ucomisd $src1, $src2\n\t"
9588             "movl    $dst, #-1\n\t"
9589             "jp,s    done\n\t"
9590             "jb,s    done\n\t"
9591             "setne   $dst\n\t"
9592             "movzbl  $dst, $dst\n"
9593     "done:" %}
9594   ins_encode %{
9595     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9596     emit_cmpfp3(_masm, $dst$$Register);
9597   %}
9598   ins_pipe(pipe_slow);
9599 %}
9600 
9601 // Compare into -1,0,1
9602 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9603 %{
9604   match(Set dst (CmpD3 src1 (LoadD src2)));
9605   effect(KILL cr);
9606 
9607   ins_cost(275);
9608   format %{ "ucomisd $src1, $src2\n\t"
9609             "movl    $dst, #-1\n\t"
9610             "jp,s    done\n\t"
9611             "jb,s    done\n\t"
9612             "setne   $dst\n\t"
9613             "movzbl  $dst, $dst\n"
9614     "done:" %}
9615   ins_encode %{
9616     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9617     emit_cmpfp3(_masm, $dst$$Register);
9618   %}
9619   ins_pipe(pipe_slow);
9620 %}
9621 
9622 // Compare into -1,0,1
9623 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
9624   match(Set dst (CmpD3 src con));
9625   effect(KILL cr);
9626 
9627   ins_cost(275);
9628   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9629             "movl    $dst, #-1\n\t"
9630             "jp,s    done\n\t"
9631             "jb,s    done\n\t"
9632             "setne   $dst\n\t"
9633             "movzbl  $dst, $dst\n"
9634     "done:" %}
9635   ins_encode %{
9636     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9637     emit_cmpfp3(_masm, $dst$$Register);
9638   %}
9639   ins_pipe(pipe_slow);
9640 %}
9641 
9642 // -----------Trig and Trancendental Instructions------------------------------
9643 instruct cosD_reg(regD dst) %{
9644   match(Set dst (CosD dst));
9645 
9646   format %{ "dcos   $dst\n\t" %}
9647   opcode(0xD9, 0xFF);
9648   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9649   ins_pipe( pipe_slow );
9650 %}
9651 
9652 instruct sinD_reg(regD dst) %{
9653   match(Set dst (SinD dst));
9654 
9655   format %{ "dsin   $dst\n\t" %}
9656   opcode(0xD9, 0xFE);
9657   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9658   ins_pipe( pipe_slow );
9659 %}
9660 
9661 instruct tanD_reg(regD dst) %{
9662   match(Set dst (TanD dst));
9663 
9664   format %{ "dtan   $dst\n\t" %}
9665   ins_encode( Push_SrcXD(dst),
9666               Opcode(0xD9), Opcode(0xF2),   //fptan
9667               Opcode(0xDD), Opcode(0xD8),   //fstp st
9668               Push_ResultXD(dst) );
9669   ins_pipe( pipe_slow );
9670 %}
9671 
9672 instruct log10D_reg(regD dst) %{
9673   // The source and result Double operands in XMM registers
9674   match(Set dst (Log10D dst));
9675   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9676   // fyl2x        ; compute log_10(2) * log_2(x)
9677   format %{ "fldlg2\t\t\t#Log10\n\t"
9678             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9679          %}
9680    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9681               Push_SrcXD(dst),
9682               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9683               Push_ResultXD(dst));
9684 
9685   ins_pipe( pipe_slow );
9686 %}
9687 
9688 instruct logD_reg(regD dst) %{
9689   // The source and result Double operands in XMM registers
9690   match(Set dst (LogD dst));
9691   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9692   // fyl2x        ; compute log_e(2) * log_2(x)
9693   format %{ "fldln2\t\t\t#Log_e\n\t"
9694             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9695          %}
9696   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9697               Push_SrcXD(dst),
9698               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9699               Push_ResultXD(dst));
9700   ins_pipe( pipe_slow );
9701 %}
9702 
9703 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9704   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9705   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9706   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9707   ins_encode %{
9708     __ subptr(rsp, 8);
9709     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9710     __ fld_d(Address(rsp, 0));
9711     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9712     __ fld_d(Address(rsp, 0));
9713     __ fast_pow();
9714     __ fstp_d(Address(rsp, 0));
9715     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9716     __ addptr(rsp, 8);
9717   %}
9718   ins_pipe( pipe_slow );
9719 %}
9720 
9721 instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
9722   match(Set dst (ExpD src));
9723   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9724   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9725   ins_encode %{
9726     __ subptr(rsp, 8);
9727     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9728     __ fld_d(Address(rsp, 0));
9729     __ fast_exp();
9730     __ fstp_d(Address(rsp, 0));
9731     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9732     __ addptr(rsp, 8);
9733   %}
9734   ins_pipe( pipe_slow );
9735 %}
9736 
9737 //----------Arithmetic Conversion Instructions---------------------------------
9738 
9739 instruct roundFloat_nop(regF dst)
9740 %{
9741   match(Set dst (RoundFloat dst));
9742 
9743   ins_cost(0);
9744   ins_encode();
9745   ins_pipe(empty);
9746 %}
9747 
9748 instruct roundDouble_nop(regD dst)
9749 %{
9750   match(Set dst (RoundDouble dst));
9751 
9752   ins_cost(0);
9753   ins_encode();
9754   ins_pipe(empty);
9755 %}
9756 
9757 instruct convF2D_reg_reg(regD dst, regF src)
9758 %{
9759   match(Set dst (ConvF2D src));
9760 
9761   format %{ "cvtss2sd $dst, $src" %}
9762   ins_encode %{
9763     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
9764   %}
9765   ins_pipe(pipe_slow); // XXX
9766 %}
9767 
9768 instruct convF2D_reg_mem(regD dst, memory src)
9769 %{
9770   match(Set dst (ConvF2D (LoadF src)));
9771 
9772   format %{ "cvtss2sd $dst, $src" %}
9773   ins_encode %{
9774     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
9775   %}
9776   ins_pipe(pipe_slow); // XXX
9777 %}
9778 
9779 instruct convD2F_reg_reg(regF dst, regD src)
9780 %{
9781   match(Set dst (ConvD2F src));
9782 
9783   format %{ "cvtsd2ss $dst, $src" %}
9784   ins_encode %{
9785     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
9786   %}
9787   ins_pipe(pipe_slow); // XXX
9788 %}
9789 
9790 instruct convD2F_reg_mem(regF dst, memory src)
9791 %{
9792   match(Set dst (ConvD2F (LoadD src)));
9793 
9794   format %{ "cvtsd2ss $dst, $src" %}
9795   ins_encode %{
9796     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
9797   %}
9798   ins_pipe(pipe_slow); // XXX
9799 %}
9800 
9801 // XXX do mem variants
9802 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
9803 %{
9804   match(Set dst (ConvF2I src));
9805   effect(KILL cr);
9806 
9807   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
9808             "cmpl    $dst, #0x80000000\n\t"
9809             "jne,s   done\n\t"
9810             "subq    rsp, #8\n\t"
9811             "movss   [rsp], $src\n\t"
9812             "call    f2i_fixup\n\t"
9813             "popq    $dst\n"
9814     "done:   "%}
9815   ins_encode %{
9816     Label done;
9817     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
9818     __ cmpl($dst$$Register, 0x80000000);
9819     __ jccb(Assembler::notEqual, done);
9820     __ subptr(rsp, 8);
9821     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9822     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
9823     __ pop($dst$$Register);
9824     __ bind(done);
9825   %}
9826   ins_pipe(pipe_slow);
9827 %}
9828 
9829 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
9830 %{
9831   match(Set dst (ConvF2L src));
9832   effect(KILL cr);
9833 
9834   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
9835             "cmpq    $dst, [0x8000000000000000]\n\t"
9836             "jne,s   done\n\t"
9837             "subq    rsp, #8\n\t"
9838             "movss   [rsp], $src\n\t"
9839             "call    f2l_fixup\n\t"
9840             "popq    $dst\n"
9841     "done:   "%}
9842   ins_encode %{
9843     Label done;
9844     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
9845     __ cmp64($dst$$Register,
9846              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9847     __ jccb(Assembler::notEqual, done);
9848     __ subptr(rsp, 8);
9849     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9850     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
9851     __ pop($dst$$Register);
9852     __ bind(done);
9853   %}
9854   ins_pipe(pipe_slow);
9855 %}
9856 
9857 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
9858 %{
9859   match(Set dst (ConvD2I src));
9860   effect(KILL cr);
9861 
9862   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
9863             "cmpl    $dst, #0x80000000\n\t"
9864             "jne,s   done\n\t"
9865             "subq    rsp, #8\n\t"
9866             "movsd   [rsp], $src\n\t"
9867             "call    d2i_fixup\n\t"
9868             "popq    $dst\n"
9869     "done:   "%}
9870   ins_encode %{
9871     Label done;
9872     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
9873     __ cmpl($dst$$Register, 0x80000000);
9874     __ jccb(Assembler::notEqual, done);
9875     __ subptr(rsp, 8);
9876     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9877     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
9878     __ pop($dst$$Register);
9879     __ bind(done);
9880   %}
9881   ins_pipe(pipe_slow);
9882 %}
9883 
9884 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
9885 %{
9886   match(Set dst (ConvD2L src));
9887   effect(KILL cr);
9888 
9889   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
9890             "cmpq    $dst, [0x8000000000000000]\n\t"
9891             "jne,s   done\n\t"
9892             "subq    rsp, #8\n\t"
9893             "movsd   [rsp], $src\n\t"
9894             "call    d2l_fixup\n\t"
9895             "popq    $dst\n"
9896     "done:   "%}
9897   ins_encode %{
9898     Label done;
9899     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
9900     __ cmp64($dst$$Register,
9901              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9902     __ jccb(Assembler::notEqual, done);
9903     __ subptr(rsp, 8);
9904     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9905     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
9906     __ pop($dst$$Register);
9907     __ bind(done);
9908   %}
9909   ins_pipe(pipe_slow);
9910 %}
9911 
9912 instruct convI2F_reg_reg(regF dst, rRegI src)
9913 %{
9914   predicate(!UseXmmI2F);
9915   match(Set dst (ConvI2F src));
9916 
9917   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
9918   ins_encode %{
9919     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
9920   %}
9921   ins_pipe(pipe_slow); // XXX
9922 %}
9923 
9924 instruct convI2F_reg_mem(regF dst, memory src)
9925 %{
9926   match(Set dst (ConvI2F (LoadI src)));
9927 
9928   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
9929   ins_encode %{
9930     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
9931   %}
9932   ins_pipe(pipe_slow); // XXX
9933 %}
9934 
9935 instruct convI2D_reg_reg(regD dst, rRegI src)
9936 %{
9937   predicate(!UseXmmI2D);
9938   match(Set dst (ConvI2D src));
9939 
9940   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
9941   ins_encode %{
9942     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
9943   %}
9944   ins_pipe(pipe_slow); // XXX
9945 %}
9946 
9947 instruct convI2D_reg_mem(regD dst, memory src)
9948 %{
9949   match(Set dst (ConvI2D (LoadI src)));
9950 
9951   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
9952   ins_encode %{
9953     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
9954   %}
9955   ins_pipe(pipe_slow); // XXX
9956 %}
9957 
9958 instruct convXI2F_reg(regF dst, rRegI src)
9959 %{
9960   predicate(UseXmmI2F);
9961   match(Set dst (ConvI2F src));
9962 
9963   format %{ "movdl $dst, $src\n\t"
9964             "cvtdq2psl $dst, $dst\t# i2f" %}
9965   ins_encode %{
9966     __ movdl($dst$$XMMRegister, $src$$Register);
9967     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
9968   %}
9969   ins_pipe(pipe_slow); // XXX
9970 %}
9971 
9972 instruct convXI2D_reg(regD dst, rRegI src)
9973 %{
9974   predicate(UseXmmI2D);
9975   match(Set dst (ConvI2D src));
9976 
9977   format %{ "movdl $dst, $src\n\t"
9978             "cvtdq2pdl $dst, $dst\t# i2d" %}
9979   ins_encode %{
9980     __ movdl($dst$$XMMRegister, $src$$Register);
9981     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
9982   %}
9983   ins_pipe(pipe_slow); // XXX
9984 %}
9985 
9986 instruct convL2F_reg_reg(regF dst, rRegL src)
9987 %{
9988   match(Set dst (ConvL2F src));
9989 
9990   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
9991   ins_encode %{
9992     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
9993   %}
9994   ins_pipe(pipe_slow); // XXX
9995 %}
9996 
9997 instruct convL2F_reg_mem(regF dst, memory src)
9998 %{
9999   match(Set dst (ConvL2F (LoadL src)));
10000 
10001   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10002   ins_encode %{
10003     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10004   %}
10005   ins_pipe(pipe_slow); // XXX
10006 %}
10007 
10008 instruct convL2D_reg_reg(regD dst, rRegL src)
10009 %{
10010   match(Set dst (ConvL2D src));
10011 
10012   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10013   ins_encode %{
10014     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10015   %}
10016   ins_pipe(pipe_slow); // XXX
10017 %}
10018 
10019 instruct convL2D_reg_mem(regD dst, memory src)
10020 %{
10021   match(Set dst (ConvL2D (LoadL src)));
10022 
10023   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10024   ins_encode %{
10025     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10026   %}
10027   ins_pipe(pipe_slow); // XXX
10028 %}
10029 
10030 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10031 %{
10032   match(Set dst (ConvI2L src));
10033 
10034   ins_cost(125);
10035   format %{ "movslq  $dst, $src\t# i2l" %}
10036   ins_encode %{
10037     __ movslq($dst$$Register, $src$$Register);
10038   %}
10039   ins_pipe(ialu_reg_reg);
10040 %}
10041 
10042 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10043 // %{
10044 //   match(Set dst (ConvI2L src));
10045 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10046 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10047 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10048 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10049 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10050 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10051 
10052 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10053 //   ins_encode(enc_copy(dst, src));
10054 // //   opcode(0x63); // needs REX.W
10055 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10056 //   ins_pipe(ialu_reg_reg);
10057 // %}
10058 
10059 // Zero-extend convert int to long
10060 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10061 %{
10062   match(Set dst (AndL (ConvI2L src) mask));
10063 
10064   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10065   ins_encode %{
10066     if ($dst$$reg != $src$$reg) {
10067       __ movl($dst$$Register, $src$$Register);
10068     }
10069   %}
10070   ins_pipe(ialu_reg_reg);
10071 %}
10072 
10073 // Zero-extend convert int to long
10074 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10075 %{
10076   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10077 
10078   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10079   ins_encode %{
10080     __ movl($dst$$Register, $src$$Address);
10081   %}
10082   ins_pipe(ialu_reg_mem);
10083 %}
10084 
10085 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10086 %{
10087   match(Set dst (AndL src mask));
10088 
10089   format %{ "movl    $dst, $src\t# zero-extend long" %}
10090   ins_encode %{
10091     __ movl($dst$$Register, $src$$Register);
10092   %}
10093   ins_pipe(ialu_reg_reg);
10094 %}
10095 
10096 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10097 %{
10098   match(Set dst (ConvL2I src));
10099 
10100   format %{ "movl    $dst, $src\t# l2i" %}
10101   ins_encode %{
10102     __ movl($dst$$Register, $src$$Register);
10103   %}
10104   ins_pipe(ialu_reg_reg);
10105 %}
10106 
10107 
10108 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10109   match(Set dst (MoveF2I src));
10110   effect(DEF dst, USE src);
10111 
10112   ins_cost(125);
10113   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10114   ins_encode %{
10115     __ movl($dst$$Register, Address(rsp, $src$$disp));
10116   %}
10117   ins_pipe(ialu_reg_mem);
10118 %}
10119 
10120 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10121   match(Set dst (MoveI2F src));
10122   effect(DEF dst, USE src);
10123 
10124   ins_cost(125);
10125   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10126   ins_encode %{
10127     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10128   %}
10129   ins_pipe(pipe_slow);
10130 %}
10131 
10132 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10133   match(Set dst (MoveD2L src));
10134   effect(DEF dst, USE src);
10135 
10136   ins_cost(125);
10137   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10138   ins_encode %{
10139     __ movq($dst$$Register, Address(rsp, $src$$disp));
10140   %}
10141   ins_pipe(ialu_reg_mem);
10142 %}
10143 
10144 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10145   predicate(!UseXmmLoadAndClearUpper);
10146   match(Set dst (MoveL2D src));
10147   effect(DEF dst, USE src);
10148 
10149   ins_cost(125);
10150   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10151   ins_encode %{
10152     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10153   %}
10154   ins_pipe(pipe_slow);
10155 %}
10156 
10157 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10158   predicate(UseXmmLoadAndClearUpper);
10159   match(Set dst (MoveL2D src));
10160   effect(DEF dst, USE src);
10161 
10162   ins_cost(125);
10163   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10164   ins_encode %{
10165     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10166   %}
10167   ins_pipe(pipe_slow);
10168 %}
10169 
10170 
10171 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10172   match(Set dst (MoveF2I src));
10173   effect(DEF dst, USE src);
10174 
10175   ins_cost(95); // XXX
10176   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10177   ins_encode %{
10178     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10179   %}
10180   ins_pipe(pipe_slow);
10181 %}
10182 
10183 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10184   match(Set dst (MoveI2F src));
10185   effect(DEF dst, USE src);
10186 
10187   ins_cost(100);
10188   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10189   ins_encode %{
10190     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10191   %}
10192   ins_pipe( ialu_mem_reg );
10193 %}
10194 
10195 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10196   match(Set dst (MoveD2L src));
10197   effect(DEF dst, USE src);
10198 
10199   ins_cost(95); // XXX
10200   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10201   ins_encode %{
10202     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10203   %}
10204   ins_pipe(pipe_slow);
10205 %}
10206 
10207 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10208   match(Set dst (MoveL2D src));
10209   effect(DEF dst, USE src);
10210 
10211   ins_cost(100);
10212   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10213   ins_encode %{
10214     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10215   %}
10216   ins_pipe(ialu_mem_reg);
10217 %}
10218 
10219 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10220   match(Set dst (MoveF2I src));
10221   effect(DEF dst, USE src);
10222   ins_cost(85);
10223   format %{ "movd    $dst,$src\t# MoveF2I" %}
10224   ins_encode %{
10225     __ movdl($dst$$Register, $src$$XMMRegister);
10226   %}
10227   ins_pipe( pipe_slow );
10228 %}
10229 
10230 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10231   match(Set dst (MoveD2L src));
10232   effect(DEF dst, USE src);
10233   ins_cost(85);
10234   format %{ "movd    $dst,$src\t# MoveD2L" %}
10235   ins_encode %{
10236     __ movdq($dst$$Register, $src$$XMMRegister);
10237   %}
10238   ins_pipe( pipe_slow );
10239 %}
10240 
10241 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10242   match(Set dst (MoveI2F src));
10243   effect(DEF dst, USE src);
10244   ins_cost(100);
10245   format %{ "movd    $dst,$src\t# MoveI2F" %}
10246   ins_encode %{
10247     __ movdl($dst$$XMMRegister, $src$$Register);
10248   %}
10249   ins_pipe( pipe_slow );
10250 %}
10251 
10252 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10253   match(Set dst (MoveL2D src));
10254   effect(DEF dst, USE src);
10255   ins_cost(100);
10256   format %{ "movd    $dst,$src\t# MoveL2D" %}
10257   ins_encode %{
10258      __ movdq($dst$$XMMRegister, $src$$Register);
10259   %}
10260   ins_pipe( pipe_slow );
10261 %}
10262 
10263 
10264 // =======================================================================
10265 // fast clearing of an array
10266 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10267                   rFlagsReg cr)
10268 %{
10269   predicate(!UseFastStosb);
10270   match(Set dummy (ClearArray cnt base));
10271   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10272 
10273   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10274             "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
10275   ins_encode %{ 
10276     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10277   %}
10278   ins_pipe(pipe_slow);
10279 %}
10280 
10281 instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10282                         rFlagsReg cr)
10283 %{
10284   predicate(UseFastStosb);
10285   match(Set dummy (ClearArray cnt base));
10286   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10287   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10288             "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10289             "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
10290   ins_encode %{ 
10291     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10292   %}
10293   ins_pipe( pipe_slow );
10294 %}
10295 
10296 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10297                         rax_RegI result, regD tmp1, rFlagsReg cr)
10298 %{
10299   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10300   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10301 
10302   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10303   ins_encode %{
10304     __ string_compare($str1$$Register, $str2$$Register,
10305                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10306                       $tmp1$$XMMRegister);
10307   %}
10308   ins_pipe( pipe_slow );
10309 %}
10310 
10311 // fast search of substring with known size.
10312 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10313                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10314 %{
10315   predicate(UseSSE42Intrinsics);
10316   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10317   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10318 
10319   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10320   ins_encode %{
10321     int icnt2 = (int)$int_cnt2$$constant;
10322     if (icnt2 >= 8) {
10323       // IndexOf for constant substrings with size >= 8 elements
10324       // which don't need to be loaded through stack.
10325       __ string_indexofC8($str1$$Register, $str2$$Register,
10326                           $cnt1$$Register, $cnt2$$Register,
10327                           icnt2, $result$$Register,
10328                           $vec$$XMMRegister, $tmp$$Register);
10329     } else {
10330       // Small strings are loaded through stack if they cross page boundary.
10331       __ string_indexof($str1$$Register, $str2$$Register,
10332                         $cnt1$$Register, $cnt2$$Register,
10333                         icnt2, $result$$Register,
10334                         $vec$$XMMRegister, $tmp$$Register);
10335     }
10336   %}
10337   ins_pipe( pipe_slow );
10338 %}
10339 
10340 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10341                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10342 %{
10343   predicate(UseSSE42Intrinsics);
10344   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10345   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10346 
10347   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10348   ins_encode %{
10349     __ string_indexof($str1$$Register, $str2$$Register,
10350                       $cnt1$$Register, $cnt2$$Register,
10351                       (-1), $result$$Register,
10352                       $vec$$XMMRegister, $tmp$$Register);
10353   %}
10354   ins_pipe( pipe_slow );
10355 %}
10356 
10357 // fast string equals
10358 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10359                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10360 %{
10361   match(Set result (StrEquals (Binary str1 str2) cnt));
10362   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10363 
10364   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10365   ins_encode %{
10366     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10367                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10368                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10369   %}
10370   ins_pipe( pipe_slow );
10371 %}
10372 
10373 // fast array equals
10374 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10375                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10376 %{
10377   match(Set result (AryEq ary1 ary2));
10378   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10379   //ins_cost(300);
10380 
10381   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10382   ins_encode %{
10383     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10384                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10385                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10386   %}
10387   ins_pipe( pipe_slow );
10388 %}
10389 
10390 // encode char[] to byte[] in ISO_8859_1
10391 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
10392                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
10393                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
10394   match(Set result (EncodeISOArray src (Binary dst len)));
10395   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
10396 
10397   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
10398   ins_encode %{
10399     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
10400                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
10401                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
10402   %}
10403   ins_pipe( pipe_slow );
10404 %}
10405 
10406 //----------Overflow Math Instructions-----------------------------------------
10407 
10408 instruct addofI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
10409 %{
10410   match(Set cr (OverflowAddI op1 op2));
10411   effect(DEF cr, USE_KILL op1, USE op2);
10412 
10413   format %{ "addl    $op1, $op2 #overflow check int" %}
10414 
10415   ins_encode %{
10416     __ addl($op1$$Register, $op2$$Register);
10417   %}
10418   ins_pipe(ialu_reg_reg);
10419 %}
10420 
10421 instruct addofI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
10422 %{
10423   match(Set cr (OverflowAddI op1 op2));
10424   effect(DEF cr, USE_KILL op1, USE op2);
10425 
10426   format %{ "addl    $op1, $op2 #overflow check int" %}
10427 
10428   ins_encode %{
10429     __ addl($op1$$Register, $op2$$constant);
10430   %}
10431   ins_pipe(ialu_reg_reg);
10432 %}
10433 
10434 instruct addofL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
10435 %{
10436   match(Set cr (OverflowAddL op1 op2));
10437   effect(DEF cr, USE_KILL op1, USE op2);
10438 
10439   format %{ "addq    $op1, $op2 #overflow check long" %}
10440   ins_encode %{
10441     __ addq($op1$$Register, $op2$$Register);
10442   %}
10443   ins_pipe(ialu_reg_reg);
10444 %}
10445 
10446 instruct addofL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
10447 %{
10448   match(Set cr (OverflowAddL op1 op2));
10449   effect(DEF cr, USE_KILL op1, USE op2);
10450   ins_encode %{
10451     __ addq($op1$$Register, $op2$$constant);
10452   %}
10453   ins_pipe(ialu_reg_reg);
10454 %}
10455 
10456 instruct subofI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
10457 %{
10458   match(Set cr (OverflowSubI op1 op2));
10459   effect(DEF cr, USE op1, USE op2);
10460 
10461   format %{ "cmpl    $op1, $op2 #overflow check int" %}
10462   ins_encode %{
10463     __ cmpl($op1$$Register, $op2$$Register);
10464   %}
10465   ins_pipe(ialu_reg_reg);
10466 %}
10467 
10468 instruct subofI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
10469 %{
10470   match(Set cr (OverflowSubI op1 op2));
10471   effect(DEF cr, USE op1, USE op2);
10472 
10473   format %{ "cmpl    $op1, $op2 #overflow check int" %}
10474   ins_encode %{
10475     __ cmpl($op1$$Register, $op2$$constant);
10476   %}
10477   ins_pipe(ialu_reg_reg);
10478 %}
10479 
10480 instruct subofL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
10481 %{
10482   match(Set cr (OverflowSubL op1 op2));
10483   effect(DEF cr, USE op1, USE op2);
10484 
10485   format %{ "cmpq    $op1, $op2 #overflow check long" %}
10486   ins_encode %{
10487     __ cmpq($op1$$Register, $op2$$Register);
10488   %}
10489   ins_pipe(ialu_reg_reg);
10490 %}
10491 
10492 instruct subofL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
10493 %{
10494   match(Set cr (OverflowSubL op1 op2));
10495   effect(DEF cr, USE op1, USE op2);
10496 
10497   format %{ "cmpq    $op1, $op2 #overflow check long" %}
10498   ins_encode %{
10499     __ cmpq($op1$$Register, $op2$$constant);
10500   %}
10501   ins_pipe(ialu_reg_reg);
10502 %}
10503 
10504 instruct negofI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
10505 %{
10506   match(Set cr (OverflowSubI zero op2));
10507   effect(DEF cr, USE_KILL op2);
10508 
10509   format %{ "negl    $op2 #overflow check int" %}
10510   ins_encode %{
10511     __ negl($op2$$Register);
10512   %}
10513   ins_pipe(ialu_reg_reg);
10514 %}
10515 
10516 instruct negofL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
10517 %{
10518   match(Set cr (OverflowSubL zero op2));
10519   effect(DEF cr, USE_KILL op2);
10520 
10521   format %{ "negq    $op2 #overflow check long" %}
10522   ins_encode %{
10523     __ negq($op2$$Register);
10524   %}
10525   ins_pipe(ialu_reg_reg);
10526 %}
10527 
10528 instruct mulofI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
10529 %{
10530   match(Set cr (OverflowMulI op1 op2));
10531   effect(DEF cr, USE_KILL op1, USE op2);
10532 
10533   format %{ "imull    $op1, $op2 #overflow check int" %}
10534   ins_encode %{
10535     __ imull($op1$$Register, $op2$$Register);
10536   %}
10537   ins_pipe(ialu_reg_reg_alu0);
10538 %}
10539 
10540 instruct mulofI_rReg_imm(rFlagsReg cr, rax_RegI op1, rRegI op2, immI op3)
10541 %{
10542   match(Set cr (OverflowMulI op2 op3));
10543   effect(DEF cr, KILL op1, USE op2, USE op3);
10544 
10545   format %{ "imull    $op1, $op2, $op3 #overflow check int" %}
10546   ins_encode %{
10547     __ imull($op1$$Register, $op2$$Register, $op3$$constant);
10548   %}
10549   ins_pipe(ialu_reg_reg_alu0);
10550 %}
10551 
10552 instruct mulofL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
10553 %{
10554   match(Set cr (OverflowMulL op1 op2));
10555   effect(DEF cr, USE_KILL op1, USE op2);
10556 
10557   format %{ "imulq    $op1, $op2 #overflow check long" %}
10558   ins_encode %{
10559     __ imulq($op1$$Register, $op2$$Register);
10560   %}
10561   ins_pipe(ialu_reg_reg_alu0);
10562 %}
10563 
10564 instruct mulofL_rReg_imm(rFlagsReg cr, rax_RegL op1, rRegL op2, immL32 op3)
10565 %{
10566   match(Set cr (OverflowMulL op2 op3));
10567   effect(DEF cr, KILL op1, USE op2, USE op3);
10568 
10569   format %{ "imulq    $op1, $op2 #overflow check long" %}
10570   ins_encode %{
10571     __ imulq($op1$$Register, $op2$$Register, $op3$$constant);
10572   %}
10573   ins_pipe(ialu_reg_reg_alu0);
10574 %}
10575 
10576 
10577 //----------Control Flow Instructions------------------------------------------
10578 // Signed compare Instructions
10579 
10580 // XXX more variants!!
10581 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10582 %{
10583   match(Set cr (CmpI op1 op2));
10584   effect(DEF cr, USE op1, USE op2);
10585 
10586   format %{ "cmpl    $op1, $op2" %}
10587   opcode(0x3B);  /* Opcode 3B /r */
10588   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10589   ins_pipe(ialu_cr_reg_reg);
10590 %}
10591 
10592 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10593 %{
10594   match(Set cr (CmpI op1 op2));
10595 
10596   format %{ "cmpl    $op1, $op2" %}
10597   opcode(0x81, 0x07); /* Opcode 81 /7 */
10598   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10599   ins_pipe(ialu_cr_reg_imm);
10600 %}
10601 
10602 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10603 %{
10604   match(Set cr (CmpI op1 (LoadI op2)));
10605 
10606   ins_cost(500); // XXX
10607   format %{ "cmpl    $op1, $op2" %}
10608   opcode(0x3B); /* Opcode 3B /r */
10609   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10610   ins_pipe(ialu_cr_reg_mem);
10611 %}
10612 
10613 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10614 %{
10615   match(Set cr (CmpI src zero));
10616 
10617   format %{ "testl   $src, $src" %}
10618   opcode(0x85);
10619   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10620   ins_pipe(ialu_cr_reg_imm);
10621 %}
10622 
10623 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10624 %{
10625   match(Set cr (CmpI (AndI src con) zero));
10626 
10627   format %{ "testl   $src, $con" %}
10628   opcode(0xF7, 0x00);
10629   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10630   ins_pipe(ialu_cr_reg_imm);
10631 %}
10632 
10633 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10634 %{
10635   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10636 
10637   format %{ "testl   $src, $mem" %}
10638   opcode(0x85);
10639   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10640   ins_pipe(ialu_cr_reg_mem);
10641 %}
10642 
10643 // Unsigned compare Instructions; really, same as signed except they
10644 // produce an rFlagsRegU instead of rFlagsReg.
10645 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10646 %{
10647   match(Set cr (CmpU op1 op2));
10648 
10649   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10650   opcode(0x3B); /* Opcode 3B /r */
10651   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10652   ins_pipe(ialu_cr_reg_reg);
10653 %}
10654 
10655 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10656 %{
10657   match(Set cr (CmpU op1 op2));
10658 
10659   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10660   opcode(0x81,0x07); /* Opcode 81 /7 */
10661   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10662   ins_pipe(ialu_cr_reg_imm);
10663 %}
10664 
10665 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10666 %{
10667   match(Set cr (CmpU op1 (LoadI op2)));
10668 
10669   ins_cost(500); // XXX
10670   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10671   opcode(0x3B); /* Opcode 3B /r */
10672   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10673   ins_pipe(ialu_cr_reg_mem);
10674 %}
10675 
10676 // // // Cisc-spilled version of cmpU_rReg
10677 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10678 // //%{
10679 // //  match(Set cr (CmpU (LoadI op1) op2));
10680 // //
10681 // //  format %{ "CMPu   $op1,$op2" %}
10682 // //  ins_cost(500);
10683 // //  opcode(0x39);  /* Opcode 39 /r */
10684 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10685 // //%}
10686 
10687 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10688 %{
10689   match(Set cr (CmpU src zero));
10690 
10691   format %{ "testl  $src, $src\t# unsigned" %}
10692   opcode(0x85);
10693   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10694   ins_pipe(ialu_cr_reg_imm);
10695 %}
10696 
10697 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10698 %{
10699   match(Set cr (CmpP op1 op2));
10700 
10701   format %{ "cmpq    $op1, $op2\t# ptr" %}
10702   opcode(0x3B); /* Opcode 3B /r */
10703   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10704   ins_pipe(ialu_cr_reg_reg);
10705 %}
10706 
10707 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10708 %{
10709   match(Set cr (CmpP op1 (LoadP op2)));
10710 
10711   ins_cost(500); // XXX
10712   format %{ "cmpq    $op1, $op2\t# ptr" %}
10713   opcode(0x3B); /* Opcode 3B /r */
10714   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10715   ins_pipe(ialu_cr_reg_mem);
10716 %}
10717 
10718 // // // Cisc-spilled version of cmpP_rReg
10719 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10720 // //%{
10721 // //  match(Set cr (CmpP (LoadP op1) op2));
10722 // //
10723 // //  format %{ "CMPu   $op1,$op2" %}
10724 // //  ins_cost(500);
10725 // //  opcode(0x39);  /* Opcode 39 /r */
10726 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10727 // //%}
10728 
10729 // XXX this is generalized by compP_rReg_mem???
10730 // Compare raw pointer (used in out-of-heap check).
10731 // Only works because non-oop pointers must be raw pointers
10732 // and raw pointers have no anti-dependencies.
10733 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10734 %{
10735   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
10736   match(Set cr (CmpP op1 (LoadP op2)));
10737 
10738   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10739   opcode(0x3B); /* Opcode 3B /r */
10740   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10741   ins_pipe(ialu_cr_reg_mem);
10742 %}
10743 
10744 // This will generate a signed flags result. This should be OK since
10745 // any compare to a zero should be eq/neq.
10746 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10747 %{
10748   match(Set cr (CmpP src zero));
10749 
10750   format %{ "testq   $src, $src\t# ptr" %}
10751   opcode(0x85);
10752   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10753   ins_pipe(ialu_cr_reg_imm);
10754 %}
10755 
10756 // This will generate a signed flags result. This should be OK since
10757 // any compare to a zero should be eq/neq.
10758 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
10759 %{
10760   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
10761   match(Set cr (CmpP (LoadP op) zero));
10762 
10763   ins_cost(500); // XXX
10764   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10765   opcode(0xF7); /* Opcode F7 /0 */
10766   ins_encode(REX_mem_wide(op),
10767              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10768   ins_pipe(ialu_cr_reg_imm);
10769 %}
10770 
10771 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
10772 %{
10773   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
10774   match(Set cr (CmpP (LoadP mem) zero));
10775 
10776   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
10777   ins_encode %{
10778     __ cmpq(r12, $mem$$Address);
10779   %}
10780   ins_pipe(ialu_cr_reg_mem);
10781 %}
10782 
10783 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
10784 %{
10785   match(Set cr (CmpN op1 op2));
10786 
10787   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10788   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
10789   ins_pipe(ialu_cr_reg_reg);
10790 %}
10791 
10792 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
10793 %{
10794   match(Set cr (CmpN src (LoadN mem)));
10795 
10796   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
10797   ins_encode %{
10798     __ cmpl($src$$Register, $mem$$Address);
10799   %}
10800   ins_pipe(ialu_cr_reg_mem);
10801 %}
10802 
10803 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
10804   match(Set cr (CmpN op1 op2));
10805 
10806   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10807   ins_encode %{
10808     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
10809   %}
10810   ins_pipe(ialu_cr_reg_imm);
10811 %}
10812 
10813 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
10814 %{
10815   match(Set cr (CmpN src (LoadN mem)));
10816 
10817   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
10818   ins_encode %{
10819     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
10820   %}
10821   ins_pipe(ialu_cr_reg_mem);
10822 %}
10823 
10824 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
10825   match(Set cr (CmpN op1 op2));
10826 
10827   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
10828   ins_encode %{
10829     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
10830   %}
10831   ins_pipe(ialu_cr_reg_imm);
10832 %}
10833 
10834 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
10835 %{
10836   match(Set cr (CmpN src (LoadNKlass mem)));
10837 
10838   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
10839   ins_encode %{
10840     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
10841   %}
10842   ins_pipe(ialu_cr_reg_mem);
10843 %}
10844 
10845 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
10846   match(Set cr (CmpN src zero));
10847 
10848   format %{ "testl   $src, $src\t# compressed ptr" %}
10849   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
10850   ins_pipe(ialu_cr_reg_imm);
10851 %}
10852 
10853 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
10854 %{
10855   predicate(Universe::narrow_oop_base() != NULL);
10856   match(Set cr (CmpN (LoadN mem) zero));
10857 
10858   ins_cost(500); // XXX
10859   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
10860   ins_encode %{
10861     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
10862   %}
10863   ins_pipe(ialu_cr_reg_mem);
10864 %}
10865 
10866 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
10867 %{
10868   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
10869   match(Set cr (CmpN (LoadN mem) zero));
10870 
10871   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
10872   ins_encode %{
10873     __ cmpl(r12, $mem$$Address);
10874   %}
10875   ins_pipe(ialu_cr_reg_mem);
10876 %}
10877 
10878 // Yanked all unsigned pointer compare operations.
10879 // Pointer compares are done with CmpP which is already unsigned.
10880 
10881 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10882 %{
10883   match(Set cr (CmpL op1 op2));
10884 
10885   format %{ "cmpq    $op1, $op2" %}
10886   opcode(0x3B);  /* Opcode 3B /r */
10887   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10888   ins_pipe(ialu_cr_reg_reg);
10889 %}
10890 
10891 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10892 %{
10893   match(Set cr (CmpL op1 op2));
10894 
10895   format %{ "cmpq    $op1, $op2" %}
10896   opcode(0x81, 0x07); /* Opcode 81 /7 */
10897   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10898   ins_pipe(ialu_cr_reg_imm);
10899 %}
10900 
10901 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10902 %{
10903   match(Set cr (CmpL op1 (LoadL op2)));
10904 
10905   format %{ "cmpq    $op1, $op2" %}
10906   opcode(0x3B); /* Opcode 3B /r */
10907   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10908   ins_pipe(ialu_cr_reg_mem);
10909 %}
10910 
10911 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10912 %{
10913   match(Set cr (CmpL src zero));
10914 
10915   format %{ "testq   $src, $src" %}
10916   opcode(0x85);
10917   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10918   ins_pipe(ialu_cr_reg_imm);
10919 %}
10920 
10921 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
10922 %{
10923   match(Set cr (CmpL (AndL src con) zero));
10924 
10925   format %{ "testq   $src, $con\t# long" %}
10926   opcode(0xF7, 0x00);
10927   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
10928   ins_pipe(ialu_cr_reg_imm);
10929 %}
10930 
10931 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
10932 %{
10933   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
10934 
10935   format %{ "testq   $src, $mem" %}
10936   opcode(0x85);
10937   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
10938   ins_pipe(ialu_cr_reg_mem);
10939 %}
10940 
10941 // Manifest a CmpL result in an integer register.  Very painful.
10942 // This is the test to avoid.
10943 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
10944 %{
10945   match(Set dst (CmpL3 src1 src2));
10946   effect(KILL flags);
10947 
10948   ins_cost(275); // XXX
10949   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
10950             "movl    $dst, -1\n\t"
10951             "jl,s    done\n\t"
10952             "setne   $dst\n\t"
10953             "movzbl  $dst, $dst\n\t"
10954     "done:" %}
10955   ins_encode(cmpl3_flag(src1, src2, dst));
10956   ins_pipe(pipe_slow);
10957 %}
10958 
10959 //----------Max and Min--------------------------------------------------------
10960 // Min Instructions
10961 
10962 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
10963 %{
10964   effect(USE_DEF dst, USE src, USE cr);
10965 
10966   format %{ "cmovlgt $dst, $src\t# min" %}
10967   opcode(0x0F, 0x4F);
10968   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10969   ins_pipe(pipe_cmov_reg);
10970 %}
10971 
10972 
10973 instruct minI_rReg(rRegI dst, rRegI src)
10974 %{
10975   match(Set dst (MinI dst src));
10976 
10977   ins_cost(200);
10978   expand %{
10979     rFlagsReg cr;
10980     compI_rReg(cr, dst, src);
10981     cmovI_reg_g(dst, src, cr);
10982   %}
10983 %}
10984 
10985 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
10986 %{
10987   effect(USE_DEF dst, USE src, USE cr);
10988 
10989   format %{ "cmovllt $dst, $src\t# max" %}
10990   opcode(0x0F, 0x4C);
10991   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10992   ins_pipe(pipe_cmov_reg);
10993 %}
10994 
10995 
10996 instruct maxI_rReg(rRegI dst, rRegI src)
10997 %{
10998   match(Set dst (MaxI dst src));
10999 
11000   ins_cost(200);
11001   expand %{
11002     rFlagsReg cr;
11003     compI_rReg(cr, dst, src);
11004     cmovI_reg_l(dst, src, cr);
11005   %}
11006 %}
11007 
11008 // ============================================================================
11009 // Branch Instructions
11010 
11011 // Jump Direct - Label defines a relative address from JMP+1
11012 instruct jmpDir(label labl)
11013 %{
11014   match(Goto);
11015   effect(USE labl);
11016 
11017   ins_cost(300);
11018   format %{ "jmp     $labl" %}
11019   size(5);
11020   ins_encode %{
11021     Label* L = $labl$$label;
11022     __ jmp(*L, false); // Always long jump
11023   %}
11024   ins_pipe(pipe_jmp);
11025 %}
11026 
11027 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11028 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11029 %{
11030   match(If cop cr);
11031   effect(USE labl);
11032 
11033   ins_cost(300);
11034   format %{ "j$cop     $labl" %}
11035   size(6);
11036   ins_encode %{
11037     Label* L = $labl$$label;
11038     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11039   %}
11040   ins_pipe(pipe_jcc);
11041 %}
11042 
11043 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11044 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
11045 %{
11046   match(CountedLoopEnd cop cr);
11047   effect(USE labl);
11048 
11049   ins_cost(300);
11050   format %{ "j$cop     $labl\t# loop end" %}
11051   size(6);
11052   ins_encode %{
11053     Label* L = $labl$$label;
11054     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11055   %}
11056   ins_pipe(pipe_jcc);
11057 %}
11058 
11059 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11060 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11061   match(CountedLoopEnd cop cmp);
11062   effect(USE labl);
11063 
11064   ins_cost(300);
11065   format %{ "j$cop,u   $labl\t# loop end" %}
11066   size(6);
11067   ins_encode %{
11068     Label* L = $labl$$label;
11069     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11070   %}
11071   ins_pipe(pipe_jcc);
11072 %}
11073 
11074 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11075   match(CountedLoopEnd cop cmp);
11076   effect(USE labl);
11077 
11078   ins_cost(200);
11079   format %{ "j$cop,u   $labl\t# loop end" %}
11080   size(6);
11081   ins_encode %{
11082     Label* L = $labl$$label;
11083     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11084   %}
11085   ins_pipe(pipe_jcc);
11086 %}
11087 
11088 // Jump Direct Conditional - using unsigned comparison
11089 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11090   match(If cop cmp);
11091   effect(USE labl);
11092 
11093   ins_cost(300);
11094   format %{ "j$cop,u  $labl" %}
11095   size(6);
11096   ins_encode %{
11097     Label* L = $labl$$label;
11098     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11099   %}
11100   ins_pipe(pipe_jcc);
11101 %}
11102 
11103 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11104   match(If cop cmp);
11105   effect(USE labl);
11106 
11107   ins_cost(200);
11108   format %{ "j$cop,u  $labl" %}
11109   size(6);
11110   ins_encode %{
11111     Label* L = $labl$$label;
11112     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11113   %}
11114   ins_pipe(pipe_jcc);
11115 %}
11116 
11117 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11118   match(If cop cmp);
11119   effect(USE labl);
11120 
11121   ins_cost(200);
11122   format %{ $$template
11123     if ($cop$$cmpcode == Assembler::notEqual) {
11124       $$emit$$"jp,u   $labl\n\t"
11125       $$emit$$"j$cop,u   $labl"
11126     } else {
11127       $$emit$$"jp,u   done\n\t"
11128       $$emit$$"j$cop,u   $labl\n\t"
11129       $$emit$$"done:"
11130     }
11131   %}
11132   ins_encode %{
11133     Label* l = $labl$$label;
11134     if ($cop$$cmpcode == Assembler::notEqual) {
11135       __ jcc(Assembler::parity, *l, false);
11136       __ jcc(Assembler::notEqual, *l, false);
11137     } else if ($cop$$cmpcode == Assembler::equal) {
11138       Label done;
11139       __ jccb(Assembler::parity, done);
11140       __ jcc(Assembler::equal, *l, false);
11141       __ bind(done);
11142     } else {
11143        ShouldNotReachHere();
11144     }
11145   %}
11146   ins_pipe(pipe_jcc);
11147 %}
11148 
11149 // ============================================================================
11150 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11151 // superklass array for an instance of the superklass.  Set a hidden
11152 // internal cache on a hit (cache is checked with exposed code in
11153 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11154 // encoding ALSO sets flags.
11155 
11156 instruct partialSubtypeCheck(rdi_RegP result,
11157                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11158                              rFlagsReg cr)
11159 %{
11160   match(Set result (PartialSubtypeCheck sub super));
11161   effect(KILL rcx, KILL cr);
11162 
11163   ins_cost(1100);  // slightly larger than the next version
11164   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11165             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11166             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11167             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11168             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11169             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11170             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11171     "miss:\t" %}
11172 
11173   opcode(0x1); // Force a XOR of RDI
11174   ins_encode(enc_PartialSubtypeCheck());
11175   ins_pipe(pipe_slow);
11176 %}
11177 
11178 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11179                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11180                                      immP0 zero,
11181                                      rdi_RegP result)
11182 %{
11183   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11184   effect(KILL rcx, KILL result);
11185 
11186   ins_cost(1000);
11187   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11188             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11189             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11190             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11191             "jne,s   miss\t\t# Missed: flags nz\n\t"
11192             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11193     "miss:\t" %}
11194 
11195   opcode(0x0); // No need to XOR RDI
11196   ins_encode(enc_PartialSubtypeCheck());
11197   ins_pipe(pipe_slow);
11198 %}
11199 
11200 // ============================================================================
11201 // Branch Instructions -- short offset versions
11202 //
11203 // These instructions are used to replace jumps of a long offset (the default
11204 // match) with jumps of a shorter offset.  These instructions are all tagged
11205 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11206 // match rules in general matching.  Instead, the ADLC generates a conversion
11207 // method in the MachNode which can be used to do in-place replacement of the
11208 // long variant with the shorter variant.  The compiler will determine if a
11209 // branch can be taken by the is_short_branch_offset() predicate in the machine
11210 // specific code section of the file.
11211 
11212 // Jump Direct - Label defines a relative address from JMP+1
11213 instruct jmpDir_short(label labl) %{
11214   match(Goto);
11215   effect(USE labl);
11216 
11217   ins_cost(300);
11218   format %{ "jmp,s   $labl" %}
11219   size(2);
11220   ins_encode %{
11221     Label* L = $labl$$label;
11222     __ jmpb(*L);
11223   %}
11224   ins_pipe(pipe_jmp);
11225   ins_short_branch(1);
11226 %}
11227 
11228 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11229 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11230   match(If cop cr);
11231   effect(USE labl);
11232 
11233   ins_cost(300);
11234   format %{ "j$cop,s   $labl" %}
11235   size(2);
11236   ins_encode %{
11237     Label* L = $labl$$label;
11238     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11239   %}
11240   ins_pipe(pipe_jcc);
11241   ins_short_branch(1);
11242 %}
11243 
11244 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11245 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11246   match(CountedLoopEnd cop cr);
11247   effect(USE labl);
11248 
11249   ins_cost(300);
11250   format %{ "j$cop,s   $labl\t# loop end" %}
11251   size(2);
11252   ins_encode %{
11253     Label* L = $labl$$label;
11254     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11255   %}
11256   ins_pipe(pipe_jcc);
11257   ins_short_branch(1);
11258 %}
11259 
11260 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11261 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11262   match(CountedLoopEnd cop cmp);
11263   effect(USE labl);
11264 
11265   ins_cost(300);
11266   format %{ "j$cop,us  $labl\t# loop end" %}
11267   size(2);
11268   ins_encode %{
11269     Label* L = $labl$$label;
11270     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11271   %}
11272   ins_pipe(pipe_jcc);
11273   ins_short_branch(1);
11274 %}
11275 
11276 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11277   match(CountedLoopEnd cop cmp);
11278   effect(USE labl);
11279 
11280   ins_cost(300);
11281   format %{ "j$cop,us  $labl\t# loop end" %}
11282   size(2);
11283   ins_encode %{
11284     Label* L = $labl$$label;
11285     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11286   %}
11287   ins_pipe(pipe_jcc);
11288   ins_short_branch(1);
11289 %}
11290 
11291 // Jump Direct Conditional - using unsigned comparison
11292 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11293   match(If cop cmp);
11294   effect(USE labl);
11295 
11296   ins_cost(300);
11297   format %{ "j$cop,us  $labl" %}
11298   size(2);
11299   ins_encode %{
11300     Label* L = $labl$$label;
11301     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11302   %}
11303   ins_pipe(pipe_jcc);
11304   ins_short_branch(1);
11305 %}
11306 
11307 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11308   match(If cop cmp);
11309   effect(USE labl);
11310 
11311   ins_cost(300);
11312   format %{ "j$cop,us  $labl" %}
11313   size(2);
11314   ins_encode %{
11315     Label* L = $labl$$label;
11316     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11317   %}
11318   ins_pipe(pipe_jcc);
11319   ins_short_branch(1);
11320 %}
11321 
11322 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11323   match(If cop cmp);
11324   effect(USE labl);
11325 
11326   ins_cost(300);
11327   format %{ $$template
11328     if ($cop$$cmpcode == Assembler::notEqual) {
11329       $$emit$$"jp,u,s   $labl\n\t"
11330       $$emit$$"j$cop,u,s   $labl"
11331     } else {
11332       $$emit$$"jp,u,s   done\n\t"
11333       $$emit$$"j$cop,u,s  $labl\n\t"
11334       $$emit$$"done:"
11335     }
11336   %}
11337   size(4);
11338   ins_encode %{
11339     Label* l = $labl$$label;
11340     if ($cop$$cmpcode == Assembler::notEqual) {
11341       __ jccb(Assembler::parity, *l);
11342       __ jccb(Assembler::notEqual, *l);
11343     } else if ($cop$$cmpcode == Assembler::equal) {
11344       Label done;
11345       __ jccb(Assembler::parity, done);
11346       __ jccb(Assembler::equal, *l);
11347       __ bind(done);
11348     } else {
11349        ShouldNotReachHere();
11350     }
11351   %}
11352   ins_pipe(pipe_jcc);
11353   ins_short_branch(1);
11354 %}
11355 
11356 // ============================================================================
11357 // inlined locking and unlocking
11358 
11359 instruct cmpFastLock(rFlagsReg cr,
11360                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11361 %{
11362   match(Set cr (FastLock object box));
11363   effect(TEMP tmp, TEMP scr, USE_KILL box);
11364 
11365   ins_cost(300);
11366   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11367   ins_encode(Fast_Lock(object, box, tmp, scr));
11368   ins_pipe(pipe_slow);
11369 %}
11370 
11371 instruct cmpFastUnlock(rFlagsReg cr,
11372                        rRegP object, rax_RegP box, rRegP tmp)
11373 %{
11374   match(Set cr (FastUnlock object box));
11375   effect(TEMP tmp, USE_KILL box);
11376 
11377   ins_cost(300);
11378   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11379   ins_encode(Fast_Unlock(object, box, tmp));
11380   ins_pipe(pipe_slow);
11381 %}
11382 
11383 
11384 // ============================================================================
11385 // Safepoint Instructions
11386 instruct safePoint_poll(rFlagsReg cr)
11387 %{
11388   predicate(!Assembler::is_polling_page_far());
11389   match(SafePoint);
11390   effect(KILL cr);
11391 
11392   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11393             "# Safepoint: poll for GC" %}
11394   ins_cost(125);
11395   ins_encode %{
11396     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11397     __ testl(rax, addr);
11398   %}
11399   ins_pipe(ialu_reg_mem);
11400 %}
11401 
11402 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11403 %{
11404   predicate(Assembler::is_polling_page_far());
11405   match(SafePoint poll);
11406   effect(KILL cr, USE poll);
11407 
11408   format %{ "testl  rax, [$poll]\t"
11409             "# Safepoint: poll for GC" %}
11410   ins_cost(125);
11411   ins_encode %{
11412     __ relocate(relocInfo::poll_type);
11413     __ testl(rax, Address($poll$$Register, 0));
11414   %}
11415   ins_pipe(ialu_reg_mem);
11416 %}
11417 
11418 // ============================================================================
11419 // Procedure Call/Return Instructions
11420 // Call Java Static Instruction
11421 // Note: If this code changes, the corresponding ret_addr_offset() and
11422 //       compute_padding() functions will have to be adjusted.
11423 instruct CallStaticJavaDirect(method meth) %{
11424   match(CallStaticJava);
11425   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11426   effect(USE meth);
11427 
11428   ins_cost(300);
11429   format %{ "call,static " %}
11430   opcode(0xE8); /* E8 cd */
11431   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
11432   ins_pipe(pipe_slow);
11433   ins_alignment(4);
11434 %}
11435 
11436 // Call Java Static Instruction (method handle version)
11437 // Note: If this code changes, the corresponding ret_addr_offset() and
11438 //       compute_padding() functions will have to be adjusted.
11439 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11440   match(CallStaticJava);
11441   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11442   effect(USE meth);
11443   // RBP is saved by all callees (for interpreter stack correction).
11444   // We use it here for a similar purpose, in {preserve,restore}_SP.
11445 
11446   ins_cost(300);
11447   format %{ "call,static/MethodHandle " %}
11448   opcode(0xE8); /* E8 cd */
11449   ins_encode(clear_avx, preserve_SP,
11450              Java_Static_Call(meth),
11451              restore_SP,
11452              call_epilog);
11453   ins_pipe(pipe_slow);
11454   ins_alignment(4);
11455 %}
11456 
11457 // Call Java Dynamic Instruction
11458 // Note: If this code changes, the corresponding ret_addr_offset() and
11459 //       compute_padding() functions will have to be adjusted.
11460 instruct CallDynamicJavaDirect(method meth)
11461 %{
11462   match(CallDynamicJava);
11463   effect(USE meth);
11464 
11465   ins_cost(300);
11466   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11467             "call,dynamic " %}
11468   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
11469   ins_pipe(pipe_slow);
11470   ins_alignment(4);
11471 %}
11472 
11473 // Call Runtime Instruction
11474 instruct CallRuntimeDirect(method meth)
11475 %{
11476   match(CallRuntime);
11477   effect(USE meth);
11478 
11479   ins_cost(300);
11480   format %{ "call,runtime " %}
11481   ins_encode(clear_avx, Java_To_Runtime(meth));
11482   ins_pipe(pipe_slow);
11483 %}
11484 
11485 // Call runtime without safepoint
11486 instruct CallLeafDirect(method meth)
11487 %{
11488   match(CallLeaf);
11489   effect(USE meth);
11490 
11491   ins_cost(300);
11492   format %{ "call_leaf,runtime " %}
11493   ins_encode(clear_avx, Java_To_Runtime(meth));
11494   ins_pipe(pipe_slow);
11495 %}
11496 
11497 // Call runtime without safepoint
11498 instruct CallLeafNoFPDirect(method meth)
11499 %{
11500   match(CallLeafNoFP);
11501   effect(USE meth);
11502 
11503   ins_cost(300);
11504   format %{ "call_leaf_nofp,runtime " %}
11505   ins_encode(Java_To_Runtime(meth));
11506   ins_pipe(pipe_slow);
11507 %}
11508 
11509 // Return Instruction
11510 // Remove the return address & jump to it.
11511 // Notice: We always emit a nop after a ret to make sure there is room
11512 // for safepoint patching
11513 instruct Ret()
11514 %{
11515   match(Return);
11516 
11517   format %{ "ret" %}
11518   opcode(0xC3);
11519   ins_encode(OpcP);
11520   ins_pipe(pipe_jmp);
11521 %}
11522 
11523 // Tail Call; Jump from runtime stub to Java code.
11524 // Also known as an 'interprocedural jump'.
11525 // Target of jump will eventually return to caller.
11526 // TailJump below removes the return address.
11527 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11528 %{
11529   match(TailCall jump_target method_oop);
11530 
11531   ins_cost(300);
11532   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11533   opcode(0xFF, 0x4); /* Opcode FF /4 */
11534   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11535   ins_pipe(pipe_jmp);
11536 %}
11537 
11538 // Tail Jump; remove the return address; jump to target.
11539 // TailCall above leaves the return address around.
11540 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11541 %{
11542   match(TailJump jump_target ex_oop);
11543 
11544   ins_cost(300);
11545   format %{ "popq    rdx\t# pop return address\n\t"
11546             "jmp     $jump_target" %}
11547   opcode(0xFF, 0x4); /* Opcode FF /4 */
11548   ins_encode(Opcode(0x5a), // popq rdx
11549              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11550   ins_pipe(pipe_jmp);
11551 %}
11552 
11553 // Create exception oop: created by stack-crawling runtime code.
11554 // Created exception is now available to this handler, and is setup
11555 // just prior to jumping to this handler.  No code emitted.
11556 instruct CreateException(rax_RegP ex_oop)
11557 %{
11558   match(Set ex_oop (CreateEx));
11559 
11560   size(0);
11561   // use the following format syntax
11562   format %{ "# exception oop is in rax; no code emitted" %}
11563   ins_encode();
11564   ins_pipe(empty);
11565 %}
11566 
11567 // Rethrow exception:
11568 // The exception oop will come in the first argument position.
11569 // Then JUMP (not call) to the rethrow stub code.
11570 instruct RethrowException()
11571 %{
11572   match(Rethrow);
11573 
11574   // use the following format syntax
11575   format %{ "jmp     rethrow_stub" %}
11576   ins_encode(enc_rethrow);
11577   ins_pipe(pipe_jmp);
11578 %}
11579 
11580 
11581 // ============================================================================
11582 // This name is KNOWN by the ADLC and cannot be changed.
11583 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
11584 // for this guy.
11585 instruct tlsLoadP(r15_RegP dst) %{
11586   match(Set dst (ThreadLocal));
11587   effect(DEF dst);
11588 
11589   size(0);
11590   format %{ "# TLS is in R15" %}
11591   ins_encode( /*empty encoding*/ );
11592   ins_pipe(ialu_reg_reg);
11593 %}
11594 
11595 
11596 //----------PEEPHOLE RULES-----------------------------------------------------
11597 // These must follow all instruction definitions as they use the names
11598 // defined in the instructions definitions.
11599 //
11600 // peepmatch ( root_instr_name [preceding_instruction]* );
11601 //
11602 // peepconstraint %{
11603 // (instruction_number.operand_name relational_op instruction_number.operand_name
11604 //  [, ...] );
11605 // // instruction numbers are zero-based using left to right order in peepmatch
11606 //
11607 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11608 // // provide an instruction_number.operand_name for each operand that appears
11609 // // in the replacement instruction's match rule
11610 //
11611 // ---------VM FLAGS---------------------------------------------------------
11612 //
11613 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11614 //
11615 // Each peephole rule is given an identifying number starting with zero and
11616 // increasing by one in the order seen by the parser.  An individual peephole
11617 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11618 // on the command-line.
11619 //
11620 // ---------CURRENT LIMITATIONS----------------------------------------------
11621 //
11622 // Only match adjacent instructions in same basic block
11623 // Only equality constraints
11624 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11625 // Only one replacement instruction
11626 //
11627 // ---------EXAMPLE----------------------------------------------------------
11628 //
11629 // // pertinent parts of existing instructions in architecture description
11630 // instruct movI(rRegI dst, rRegI src)
11631 // %{
11632 //   match(Set dst (CopyI src));
11633 // %}
11634 //
11635 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11636 // %{
11637 //   match(Set dst (AddI dst src));
11638 //   effect(KILL cr);
11639 // %}
11640 //
11641 // // Change (inc mov) to lea
11642 // peephole %{
11643 //   // increment preceeded by register-register move
11644 //   peepmatch ( incI_rReg movI );
11645 //   // require that the destination register of the increment
11646 //   // match the destination register of the move
11647 //   peepconstraint ( 0.dst == 1.dst );
11648 //   // construct a replacement instruction that sets
11649 //   // the destination to ( move's source register + one )
11650 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11651 // %}
11652 //
11653 
11654 // Implementation no longer uses movX instructions since
11655 // machine-independent system no longer uses CopyX nodes.
11656 //
11657 // peephole
11658 // %{
11659 //   peepmatch (incI_rReg movI);
11660 //   peepconstraint (0.dst == 1.dst);
11661 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11662 // %}
11663 
11664 // peephole
11665 // %{
11666 //   peepmatch (decI_rReg movI);
11667 //   peepconstraint (0.dst == 1.dst);
11668 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11669 // %}
11670 
11671 // peephole
11672 // %{
11673 //   peepmatch (addI_rReg_imm movI);
11674 //   peepconstraint (0.dst == 1.dst);
11675 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11676 // %}
11677 
11678 // peephole
11679 // %{
11680 //   peepmatch (incL_rReg movL);
11681 //   peepconstraint (0.dst == 1.dst);
11682 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11683 // %}
11684 
11685 // peephole
11686 // %{
11687 //   peepmatch (decL_rReg movL);
11688 //   peepconstraint (0.dst == 1.dst);
11689 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11690 // %}
11691 
11692 // peephole
11693 // %{
11694 //   peepmatch (addL_rReg_imm movL);
11695 //   peepconstraint (0.dst == 1.dst);
11696 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11697 // %}
11698 
11699 // peephole
11700 // %{
11701 //   peepmatch (addP_rReg_imm movP);
11702 //   peepconstraint (0.dst == 1.dst);
11703 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11704 // %}
11705 
11706 // // Change load of spilled value to only a spill
11707 // instruct storeI(memory mem, rRegI src)
11708 // %{
11709 //   match(Set mem (StoreI mem src));
11710 // %}
11711 //
11712 // instruct loadI(rRegI dst, memory mem)
11713 // %{
11714 //   match(Set dst (LoadI mem));
11715 // %}
11716 //
11717 
11718 peephole
11719 %{
11720   peepmatch (loadI storeI);
11721   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11722   peepreplace (storeI(1.mem 1.mem 1.src));
11723 %}
11724 
11725 peephole
11726 %{
11727   peepmatch (loadL storeL);
11728   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11729   peepreplace (storeL(1.mem 1.mem 1.src));
11730 %}
11731 
11732 //----------SMARTSPILL RULES---------------------------------------------------
11733 // These must follow all instruction definitions as they use the names
11734 // defined in the instructions definitions.