1 //
   2 // Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Class for all pointer registers (including RSP)
 170 reg_class any_reg(RAX, RAX_H,
 171                   RDX, RDX_H,
 172                   RBP, RBP_H,
 173                   RDI, RDI_H,
 174                   RSI, RSI_H,
 175                   RCX, RCX_H,
 176                   RBX, RBX_H,
 177                   RSP, RSP_H,
 178                   R8,  R8_H,
 179                   R9,  R9_H,
 180                   R10, R10_H,
 181                   R11, R11_H,
 182                   R12, R12_H,
 183                   R13, R13_H,
 184                   R14, R14_H,
 185                   R15, R15_H);
 186 
 187 // Class for all pointer registers except RSP
 188 reg_class ptr_reg(RAX, RAX_H,
 189                   RDX, RDX_H,
 190                   RBP, RBP_H,
 191                   RDI, RDI_H,
 192                   RSI, RSI_H,
 193                   RCX, RCX_H,
 194                   RBX, RBX_H,
 195                   R8,  R8_H,
 196                   R9,  R9_H,
 197                   R10, R10_H,
 198                   R11, R11_H,
 199                   R13, R13_H,
 200                   R14, R14_H);
 201 
 202 // Class for all pointer registers except RAX and RSP
 203 reg_class ptr_no_rax_reg(RDX, RDX_H,
 204                          RBP, RBP_H,
 205                          RDI, RDI_H,
 206                          RSI, RSI_H,
 207                          RCX, RCX_H,
 208                          RBX, RBX_H,
 209                          R8,  R8_H,
 210                          R9,  R9_H,
 211                          R10, R10_H,
 212                          R11, R11_H,
 213                          R13, R13_H,
 214                          R14, R14_H);
 215 
 216 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 217                          RAX, RAX_H,
 218                          RDI, RDI_H,
 219                          RSI, RSI_H,
 220                          RCX, RCX_H,
 221                          RBX, RBX_H,
 222                          R8,  R8_H,
 223                          R9,  R9_H,
 224                          R10, R10_H,
 225                          R11, R11_H,
 226                          R13, R13_H,
 227                          R14, R14_H);
 228 
 229 // Class for all pointer registers except RAX, RBX and RSP
 230 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 231                              RBP, RBP_H,
 232                              RDI, RDI_H,
 233                              RSI, RSI_H,
 234                              RCX, RCX_H,
 235                              R8,  R8_H,
 236                              R9,  R9_H,
 237                              R10, R10_H,
 238                              R11, R11_H,
 239                              R13, R13_H,
 240                              R14, R14_H);
 241 
 242 // Singleton class for RAX pointer register
 243 reg_class ptr_rax_reg(RAX, RAX_H);
 244 
 245 // Singleton class for RBX pointer register
 246 reg_class ptr_rbx_reg(RBX, RBX_H);
 247 
 248 // Singleton class for RSI pointer register
 249 reg_class ptr_rsi_reg(RSI, RSI_H);
 250 
 251 // Singleton class for RDI pointer register
 252 reg_class ptr_rdi_reg(RDI, RDI_H);
 253 
 254 // Singleton class for RBP pointer register
 255 reg_class ptr_rbp_reg(RBP, RBP_H);
 256 
 257 // Singleton class for stack pointer
 258 reg_class ptr_rsp_reg(RSP, RSP_H);
 259 
 260 // Singleton class for TLS pointer
 261 reg_class ptr_r15_reg(R15, R15_H);
 262 
 263 // Class for all long registers (except RSP)
 264 reg_class long_reg(RAX, RAX_H,
 265                    RDX, RDX_H,
 266                    RBP, RBP_H,
 267                    RDI, RDI_H,
 268                    RSI, RSI_H,
 269                    RCX, RCX_H,
 270                    RBX, RBX_H,
 271                    R8,  R8_H,
 272                    R9,  R9_H,
 273                    R10, R10_H,
 274                    R11, R11_H,
 275                    R13, R13_H,
 276                    R14, R14_H);
 277 
 278 // Class for all long registers except RAX, RDX (and RSP)
 279 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 280                               RDI, RDI_H,
 281                               RSI, RSI_H,
 282                               RCX, RCX_H,
 283                               RBX, RBX_H,
 284                               R8,  R8_H,
 285                               R9,  R9_H,
 286                               R10, R10_H,
 287                               R11, R11_H,
 288                               R13, R13_H,
 289                               R14, R14_H);
 290 
 291 // Class for all long registers except RCX (and RSP)
 292 reg_class long_no_rcx_reg(RBP, RBP_H,
 293                           RDI, RDI_H,
 294                           RSI, RSI_H,
 295                           RAX, RAX_H,
 296                           RDX, RDX_H,
 297                           RBX, RBX_H,
 298                           R8,  R8_H,
 299                           R9,  R9_H,
 300                           R10, R10_H,
 301                           R11, R11_H,
 302                           R13, R13_H,
 303                           R14, R14_H);
 304 
 305 // Class for all long registers except RAX (and RSP)
 306 reg_class long_no_rax_reg(RBP, RBP_H,
 307                           RDX, RDX_H,
 308                           RDI, RDI_H,
 309                           RSI, RSI_H,
 310                           RCX, RCX_H,
 311                           RBX, RBX_H,
 312                           R8,  R8_H,
 313                           R9,  R9_H,
 314                           R10, R10_H,
 315                           R11, R11_H,
 316                           R13, R13_H,
 317                           R14, R14_H);
 318 
 319 // Singleton class for RAX long register
 320 reg_class long_rax_reg(RAX, RAX_H);
 321 
 322 // Singleton class for RCX long register
 323 reg_class long_rcx_reg(RCX, RCX_H);
 324 
 325 // Singleton class for RDX long register
 326 reg_class long_rdx_reg(RDX, RDX_H);
 327 
 328 // Class for all int registers (except RSP)
 329 reg_class int_reg(RAX,
 330                   RDX,
 331                   RBP,
 332                   RDI,
 333                   RSI,
 334                   RCX,
 335                   RBX,
 336                   R8,
 337                   R9,
 338                   R10,
 339                   R11,
 340                   R13,
 341                   R14);
 342 
 343 // Class for all int registers except RCX (and RSP)
 344 reg_class int_no_rcx_reg(RAX,
 345                          RDX,
 346                          RBP,
 347                          RDI,
 348                          RSI,
 349                          RBX,
 350                          R8,
 351                          R9,
 352                          R10,
 353                          R11,
 354                          R13,
 355                          R14);
 356 
 357 // Class for all int registers except RAX, RDX (and RSP)
 358 reg_class int_no_rax_rdx_reg(RBP,
 359                              RDI,
 360                              RSI,
 361                              RCX,
 362                              RBX,
 363                              R8,
 364                              R9,
 365                              R10,
 366                              R11,
 367                              R13,
 368                              R14);
 369 
 370 // Singleton class for RAX int register
 371 reg_class int_rax_reg(RAX);
 372 
 373 // Singleton class for RBX int register
 374 reg_class int_rbx_reg(RBX);
 375 
 376 // Singleton class for RCX int register
 377 reg_class int_rcx_reg(RCX);
 378 
 379 // Singleton class for RCX int register
 380 reg_class int_rdx_reg(RDX);
 381 
 382 // Singleton class for RCX int register
 383 reg_class int_rdi_reg(RDI);
 384 
 385 // Singleton class for instruction pointer
 386 // reg_class ip_reg(RIP);
 387 
 388 %}
 389 
 390 //----------SOURCE BLOCK-------------------------------------------------------
 391 // This is a block of C++ code which provides values, functions, and
 392 // definitions necessary in the rest of the architecture description
 393 source %{
 394 #define   RELOC_IMM64    Assembler::imm_operand
 395 #define   RELOC_DISP32   Assembler::disp32_operand
 396 
 397 #define __ _masm.
 398 
 399 static int preserve_SP_size() {
 400   return 3;  // rex.w, op, rm(reg/reg)
 401 }
 402 static int clear_avx_size() {
 403   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 404 }
 405 
 406 // !!!!! Special hack to get all types of calls to specify the byte offset
 407 //       from the start of the call to the point where the return address
 408 //       will point.
 409 int MachCallStaticJavaNode::ret_addr_offset()
 410 {
 411   int offset = 5; // 5 bytes from start of call to where return address points
 412   offset += clear_avx_size();
 413   if (_method_handle_invoke)
 414     offset += preserve_SP_size();
 415   return offset;
 416 }
 417 
 418 int MachCallDynamicJavaNode::ret_addr_offset()
 419 {
 420   int offset = 15; // 15 bytes from start of call to where return address points
 421   offset += clear_avx_size();
 422   return offset;
 423 }
 424 
 425 int MachCallRuntimeNode::ret_addr_offset() {
 426   int offset = 13; // movq r10,#addr; callq (r10)
 427   offset += clear_avx_size();
 428   return offset;
 429 }
 430 
 431 // Indicate if the safepoint node needs the polling page as an input,
 432 // it does if the polling page is more than disp32 away.
 433 bool SafePointNode::needs_polling_address_input()
 434 {
 435   return Assembler::is_polling_page_far();
 436 }
 437 
 438 //
 439 // Compute padding required for nodes which need alignment
 440 //
 441 
 442 // The address of the call instruction needs to be 4-byte aligned to
 443 // ensure that it does not span a cache line so that it can be patched.
 444 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 445 {
 446   current_offset += clear_avx_size(); // skip vzeroupper
 447   current_offset += 1; // skip call opcode byte
 448   return round_to(current_offset, alignment_required()) - current_offset;
 449 }
 450 
 451 // The address of the call instruction needs to be 4-byte aligned to
 452 // ensure that it does not span a cache line so that it can be patched.
 453 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 454 {
 455   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 456   current_offset += clear_avx_size(); // skip vzeroupper
 457   current_offset += 1; // skip call opcode byte
 458   return round_to(current_offset, alignment_required()) - current_offset;
 459 }
 460 
 461 // The address of the call instruction needs to be 4-byte aligned to
 462 // ensure that it does not span a cache line so that it can be patched.
 463 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 464 {
 465   current_offset += clear_avx_size(); // skip vzeroupper
 466   current_offset += 11; // skip movq instruction + call opcode byte
 467   return round_to(current_offset, alignment_required()) - current_offset;
 468 }
 469 
 470 // EMIT_RM()
 471 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 472   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 473   cbuf.insts()->emit_int8(c);
 474 }
 475 
 476 // EMIT_CC()
 477 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 478   unsigned char c = (unsigned char) (f1 | f2);
 479   cbuf.insts()->emit_int8(c);
 480 }
 481 
 482 // EMIT_OPCODE()
 483 void emit_opcode(CodeBuffer &cbuf, int code) {
 484   cbuf.insts()->emit_int8((unsigned char) code);
 485 }
 486 
 487 // EMIT_OPCODE() w/ relocation information
 488 void emit_opcode(CodeBuffer &cbuf,
 489                  int code, relocInfo::relocType reloc, int offset, int format)
 490 {
 491   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 492   emit_opcode(cbuf, code);
 493 }
 494 
 495 // EMIT_D8()
 496 void emit_d8(CodeBuffer &cbuf, int d8) {
 497   cbuf.insts()->emit_int8((unsigned char) d8);
 498 }
 499 
 500 // EMIT_D16()
 501 void emit_d16(CodeBuffer &cbuf, int d16) {
 502   cbuf.insts()->emit_int16(d16);
 503 }
 504 
 505 // EMIT_D32()
 506 void emit_d32(CodeBuffer &cbuf, int d32) {
 507   cbuf.insts()->emit_int32(d32);
 508 }
 509 
 510 // EMIT_D64()
 511 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 512   cbuf.insts()->emit_int64(d64);
 513 }
 514 
 515 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 516 void emit_d32_reloc(CodeBuffer& cbuf,
 517                     int d32,
 518                     relocInfo::relocType reloc,
 519                     int format)
 520 {
 521   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 522   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 523   cbuf.insts()->emit_int32(d32);
 524 }
 525 
 526 // emit 32 bit value and construct relocation entry from RelocationHolder
 527 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 528 #ifdef ASSERT
 529   if (rspec.reloc()->type() == relocInfo::oop_type &&
 530       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 531     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 532     assert(cast_to_oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 533   }
 534 #endif
 535   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 536   cbuf.insts()->emit_int32(d32);
 537 }
 538 
 539 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 540   address next_ip = cbuf.insts_end() + 4;
 541   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 542                  external_word_Relocation::spec(addr),
 543                  RELOC_DISP32);
 544 }
 545 
 546 
 547 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 548 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 549   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 550   cbuf.insts()->emit_int64(d64);
 551 }
 552 
 553 // emit 64 bit value and construct relocation entry from RelocationHolder
 554 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 555 #ifdef ASSERT
 556   if (rspec.reloc()->type() == relocInfo::oop_type &&
 557       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 558     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 559     assert(cast_to_oop(d64)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d64)->is_scavengable()),
 560            "cannot embed scavengable oops in code");
 561   }
 562 #endif
 563   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 564   cbuf.insts()->emit_int64(d64);
 565 }
 566 
 567 // Access stack slot for load or store
 568 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 569 {
 570   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 571   if (-0x80 <= disp && disp < 0x80) {
 572     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 573     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 574     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 575   } else {
 576     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 577     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 578     emit_d32(cbuf, disp);     // Displacement // R/M byte
 579   }
 580 }
 581 
 582    // rRegI ereg, memory mem) %{    // emit_reg_mem
 583 void encode_RegMem(CodeBuffer &cbuf,
 584                    int reg,
 585                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 586 {
 587   assert(disp_reloc == relocInfo::none, "cannot have disp");
 588   int regenc = reg & 7;
 589   int baseenc = base & 7;
 590   int indexenc = index & 7;
 591 
 592   // There is no index & no scale, use form without SIB byte
 593   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 594     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 595     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 596       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 597     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 598       // If 8-bit displacement, mode 0x1
 599       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 600       emit_d8(cbuf, disp);
 601     } else {
 602       // If 32-bit displacement
 603       if (base == -1) { // Special flag for absolute address
 604         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 605         if (disp_reloc != relocInfo::none) {
 606           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 607         } else {
 608           emit_d32(cbuf, disp);
 609         }
 610       } else {
 611         // Normal base + offset
 612         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 613         if (disp_reloc != relocInfo::none) {
 614           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 615         } else {
 616           emit_d32(cbuf, disp);
 617         }
 618       }
 619     }
 620   } else {
 621     // Else, encode with the SIB byte
 622     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 623     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 624       // If no displacement
 625       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 626       emit_rm(cbuf, scale, indexenc, baseenc);
 627     } else {
 628       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 629         // If 8-bit displacement, mode 0x1
 630         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 631         emit_rm(cbuf, scale, indexenc, baseenc);
 632         emit_d8(cbuf, disp);
 633       } else {
 634         // If 32-bit displacement
 635         if (base == 0x04 ) {
 636           emit_rm(cbuf, 0x2, regenc, 0x4);
 637           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 638         } else {
 639           emit_rm(cbuf, 0x2, regenc, 0x4);
 640           emit_rm(cbuf, scale, indexenc, baseenc); // *
 641         }
 642         if (disp_reloc != relocInfo::none) {
 643           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 644         } else {
 645           emit_d32(cbuf, disp);
 646         }
 647       }
 648     }
 649   }
 650 }
 651 
 652 // This could be in MacroAssembler but it's fairly C2 specific
 653 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 654   Label exit;
 655   __ jccb(Assembler::noParity, exit);
 656   __ pushf();
 657   //
 658   // comiss/ucomiss instructions set ZF,PF,CF flags and
 659   // zero OF,AF,SF for NaN values.
 660   // Fixup flags by zeroing ZF,PF so that compare of NaN
 661   // values returns 'less than' result (CF is set).
 662   // Leave the rest of flags unchanged.
 663   //
 664   //    7 6 5 4 3 2 1 0
 665   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 666   //    0 0 1 0 1 0 1 1   (0x2B)
 667   //
 668   __ andq(Address(rsp, 0), 0xffffff2b);
 669   __ popf();
 670   __ bind(exit);
 671 }
 672 
 673 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 674   Label done;
 675   __ movl(dst, -1);
 676   __ jcc(Assembler::parity, done);
 677   __ jcc(Assembler::below, done);
 678   __ setb(Assembler::notEqual, dst);
 679   __ movzbl(dst, dst);
 680   __ bind(done);
 681 }
 682 
 683 
 684 //=============================================================================
 685 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 686 
 687 int Compile::ConstantTable::calculate_table_base_offset() const {
 688   return 0;  // absolute addressing, no offset
 689 }
 690 
 691 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 692 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 693   ShouldNotReachHere();
 694 }
 695 
 696 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 697   // Empty encoding
 698 }
 699 
 700 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 701   return 0;
 702 }
 703 
 704 #ifndef PRODUCT
 705 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 706   st->print("# MachConstantBaseNode (empty encoding)");
 707 }
 708 #endif
 709 
 710 
 711 //=============================================================================
 712 #ifndef PRODUCT
 713 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 714   Compile* C = ra_->C;
 715 
 716   int framesize = C->frame_slots() << LogBytesPerInt;
 717   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 718   // Remove wordSize for return addr which is already pushed.
 719   framesize -= wordSize;
 720 
 721   if (C->need_stack_bang(framesize)) {
 722     framesize -= wordSize;
 723     st->print("# stack bang");
 724     st->print("\n\t");
 725     st->print("pushq   rbp\t# Save rbp");
 726     if (framesize) {
 727       st->print("\n\t");
 728       st->print("subq    rsp, #%d\t# Create frame",framesize);
 729     }
 730   } else {
 731     st->print("subq    rsp, #%d\t# Create frame",framesize);
 732     st->print("\n\t");
 733     framesize -= wordSize;
 734     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 735   }
 736 
 737   if (VerifyStackAtCalls) {
 738     st->print("\n\t");
 739     framesize -= wordSize;
 740     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 741 #ifdef ASSERT
 742     st->print("\n\t");
 743     st->print("# stack alignment check");
 744 #endif
 745   }
 746   st->cr();
 747 }
 748 #endif
 749 
 750 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 751   Compile* C = ra_->C;
 752   MacroAssembler _masm(&cbuf);
 753 
 754   int framesize = C->frame_slots() << LogBytesPerInt;
 755 
 756   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 757 
 758   C->set_frame_complete(cbuf.insts_size());
 759 
 760   if (C->has_mach_constant_base_node()) {
 761     // NOTE: We set the table base offset here because users might be
 762     // emitted before MachConstantBaseNode.
 763     Compile::ConstantTable& constant_table = C->constant_table();
 764     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 765   }
 766 }
 767 
 768 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 769 {
 770   return MachNode::size(ra_); // too many variables; just compute it
 771                               // the hard way
 772 }
 773 
 774 int MachPrologNode::reloc() const
 775 {
 776   return 0; // a large enough number
 777 }
 778 
 779 //=============================================================================
 780 #ifndef PRODUCT
 781 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 782 {
 783   Compile* C = ra_->C;
 784   if (C->max_vector_size() > 16) {
 785     st->print("vzeroupper");
 786     st->cr(); st->print("\t");
 787   }
 788 
 789   int framesize = C->frame_slots() << LogBytesPerInt;
 790   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 791   // Remove word for return adr already pushed
 792   // and RBP
 793   framesize -= 2*wordSize;
 794 
 795   if (framesize) {
 796     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 797     st->print("\t");
 798   }
 799 
 800   st->print_cr("popq   rbp");
 801   if (do_polling() && C->is_method_compilation()) {
 802     st->print("\t");
 803     if (Assembler::is_polling_page_far()) {
 804       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 805                    "testl  rax, [rscratch1]\t"
 806                    "# Safepoint: poll for GC");
 807     } else {
 808       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 809                    "# Safepoint: poll for GC");
 810     }
 811   }
 812 }
 813 #endif
 814 
 815 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 816 {
 817   Compile* C = ra_->C;
 818   if (C->max_vector_size() > 16) {
 819     // Clear upper bits of YMM registers when current compiled code uses
 820     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 821     MacroAssembler _masm(&cbuf);
 822     __ vzeroupper();
 823   }
 824 
 825   int framesize = C->frame_slots() << LogBytesPerInt;
 826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 827   // Remove word for return adr already pushed
 828   // and RBP
 829   framesize -= 2*wordSize;
 830 
 831   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 832 
 833   if (framesize) {
 834     emit_opcode(cbuf, Assembler::REX_W);
 835     if (framesize < 0x80) {
 836       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 837       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 838       emit_d8(cbuf, framesize);
 839     } else {
 840       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 841       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 842       emit_d32(cbuf, framesize);
 843     }
 844   }
 845 
 846   // popq rbp
 847   emit_opcode(cbuf, 0x58 | RBP_enc);
 848 
 849   if (do_polling() && C->is_method_compilation()) {
 850     MacroAssembler _masm(&cbuf);
 851     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 852     if (Assembler::is_polling_page_far()) {
 853       __ lea(rscratch1, polling_page);
 854       __ relocate(relocInfo::poll_return_type);
 855       __ testl(rax, Address(rscratch1, 0));
 856     } else {
 857       __ testl(rax, polling_page);
 858     }
 859   }
 860 }
 861 
 862 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 863 {
 864   return MachNode::size(ra_); // too many variables; just compute it
 865                               // the hard way
 866 }
 867 
 868 int MachEpilogNode::reloc() const
 869 {
 870   return 2; // a large enough number
 871 }
 872 
 873 const Pipeline* MachEpilogNode::pipeline() const
 874 {
 875   return MachNode::pipeline_class();
 876 }
 877 
 878 int MachEpilogNode::safepoint_offset() const
 879 {
 880   return 0;
 881 }
 882 
 883 //=============================================================================
 884 
 885 enum RC {
 886   rc_bad,
 887   rc_int,
 888   rc_float,
 889   rc_stack
 890 };
 891 
 892 static enum RC rc_class(OptoReg::Name reg)
 893 {
 894   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 895 
 896   if (OptoReg::is_stack(reg)) return rc_stack;
 897 
 898   VMReg r = OptoReg::as_VMReg(reg);
 899 
 900   if (r->is_Register()) return rc_int;
 901 
 902   assert(r->is_XMMRegister(), "must be");
 903   return rc_float;
 904 }
 905 
 906 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 907 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 908                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 909 
 910 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 911                             int stack_offset, int reg, uint ireg, outputStream* st);
 912 
 913 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 914                                       int dst_offset, uint ireg, outputStream* st) {
 915   if (cbuf) {
 916     MacroAssembler _masm(cbuf);
 917     switch (ireg) {
 918     case Op_VecS:
 919       __ movq(Address(rsp, -8), rax);
 920       __ movl(rax, Address(rsp, src_offset));
 921       __ movl(Address(rsp, dst_offset), rax);
 922       __ movq(rax, Address(rsp, -8));
 923       break;
 924     case Op_VecD:
 925       __ pushq(Address(rsp, src_offset));
 926       __ popq (Address(rsp, dst_offset));
 927       break;
 928     case Op_VecX:
 929       __ pushq(Address(rsp, src_offset));
 930       __ popq (Address(rsp, dst_offset));
 931       __ pushq(Address(rsp, src_offset+8));
 932       __ popq (Address(rsp, dst_offset+8));
 933       break;
 934     case Op_VecY:
 935       __ vmovdqu(Address(rsp, -32), xmm0);
 936       __ vmovdqu(xmm0, Address(rsp, src_offset));
 937       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 938       __ vmovdqu(xmm0, Address(rsp, -32));
 939       break;
 940     default:
 941       ShouldNotReachHere();
 942     }
 943 #ifndef PRODUCT
 944   } else {
 945     switch (ireg) {
 946     case Op_VecS:
 947       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 948                 "movl    rax, [rsp + #%d]\n\t"
 949                 "movl    [rsp + #%d], rax\n\t"
 950                 "movq    rax, [rsp - #8]",
 951                 src_offset, dst_offset);
 952       break;
 953     case Op_VecD:
 954       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 955                 "popq    [rsp + #%d]",
 956                 src_offset, dst_offset);
 957       break;
 958      case Op_VecX:
 959       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 960                 "popq    [rsp + #%d]\n\t"
 961                 "pushq   [rsp + #%d]\n\t"
 962                 "popq    [rsp + #%d]",
 963                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 964       break;
 965     case Op_VecY:
 966       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 967                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 968                 "vmovdqu [rsp + #%d], xmm0\n\t"
 969                 "vmovdqu xmm0, [rsp - #32]",
 970                 src_offset, dst_offset);
 971       break;
 972     default:
 973       ShouldNotReachHere();
 974     }
 975 #endif
 976   }
 977 }
 978 
 979 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 980                                        PhaseRegAlloc* ra_,
 981                                        bool do_size,
 982                                        outputStream* st) const {
 983   assert(cbuf != NULL || st  != NULL, "sanity");
 984   // Get registers to move
 985   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 986   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 987   OptoReg::Name dst_second = ra_->get_reg_second(this);
 988   OptoReg::Name dst_first = ra_->get_reg_first(this);
 989 
 990   enum RC src_second_rc = rc_class(src_second);
 991   enum RC src_first_rc = rc_class(src_first);
 992   enum RC dst_second_rc = rc_class(dst_second);
 993   enum RC dst_first_rc = rc_class(dst_first);
 994 
 995   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 996          "must move at least 1 register" );
 997 
 998   if (src_first == dst_first && src_second == dst_second) {
 999     // Self copy, no move
1000     return 0;
1001   }
1002   if (bottom_type()->isa_vect() != NULL) {
1003     uint ireg = ideal_reg();
1004     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1005     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1006     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1007       // mem -> mem
1008       int src_offset = ra_->reg2offset(src_first);
1009       int dst_offset = ra_->reg2offset(dst_first);
1010       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1011     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1012       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1013     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1014       int stack_offset = ra_->reg2offset(dst_first);
1015       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1016     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1017       int stack_offset = ra_->reg2offset(src_first);
1018       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1019     } else {
1020       ShouldNotReachHere();
1021     }
1022     return 0;
1023   }
1024   if (src_first_rc == rc_stack) {
1025     // mem ->
1026     if (dst_first_rc == rc_stack) {
1027       // mem -> mem
1028       assert(src_second != dst_first, "overlap");
1029       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1030           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1031         // 64-bit
1032         int src_offset = ra_->reg2offset(src_first);
1033         int dst_offset = ra_->reg2offset(dst_first);
1034         if (cbuf) {
1035           MacroAssembler _masm(cbuf);
1036           __ pushq(Address(rsp, src_offset));
1037           __ popq (Address(rsp, dst_offset));
1038 #ifndef PRODUCT
1039         } else {
1040           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1041                     "popq    [rsp + #%d]",
1042                      src_offset, dst_offset);
1043 #endif
1044         }
1045       } else {
1046         // 32-bit
1047         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1048         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1049         // No pushl/popl, so:
1050         int src_offset = ra_->reg2offset(src_first);
1051         int dst_offset = ra_->reg2offset(dst_first);
1052         if (cbuf) {
1053           MacroAssembler _masm(cbuf);
1054           __ movq(Address(rsp, -8), rax);
1055           __ movl(rax, Address(rsp, src_offset));
1056           __ movl(Address(rsp, dst_offset), rax);
1057           __ movq(rax, Address(rsp, -8));
1058 #ifndef PRODUCT
1059         } else {
1060           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1061                     "movl    rax, [rsp + #%d]\n\t"
1062                     "movl    [rsp + #%d], rax\n\t"
1063                     "movq    rax, [rsp - #8]",
1064                      src_offset, dst_offset);
1065 #endif
1066         }
1067       }
1068       return 0;
1069     } else if (dst_first_rc == rc_int) {
1070       // mem -> gpr
1071       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1072           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1073         // 64-bit
1074         int offset = ra_->reg2offset(src_first);
1075         if (cbuf) {
1076           MacroAssembler _masm(cbuf);
1077           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1078 #ifndef PRODUCT
1079         } else {
1080           st->print("movq    %s, [rsp + #%d]\t# spill",
1081                      Matcher::regName[dst_first],
1082                      offset);
1083 #endif
1084         }
1085       } else {
1086         // 32-bit
1087         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1088         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1089         int offset = ra_->reg2offset(src_first);
1090         if (cbuf) {
1091           MacroAssembler _masm(cbuf);
1092           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1093 #ifndef PRODUCT
1094         } else {
1095           st->print("movl    %s, [rsp + #%d]\t# spill",
1096                      Matcher::regName[dst_first],
1097                      offset);
1098 #endif
1099         }
1100       }
1101       return 0;
1102     } else if (dst_first_rc == rc_float) {
1103       // mem-> xmm
1104       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1105           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1106         // 64-bit
1107         int offset = ra_->reg2offset(src_first);
1108         if (cbuf) {
1109           MacroAssembler _masm(cbuf);
1110           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1111 #ifndef PRODUCT
1112         } else {
1113           st->print("%s  %s, [rsp + #%d]\t# spill",
1114                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1115                      Matcher::regName[dst_first],
1116                      offset);
1117 #endif
1118         }
1119       } else {
1120         // 32-bit
1121         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1122         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1123         int offset = ra_->reg2offset(src_first);
1124         if (cbuf) {
1125           MacroAssembler _masm(cbuf);
1126           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1127 #ifndef PRODUCT
1128         } else {
1129           st->print("movss   %s, [rsp + #%d]\t# spill",
1130                      Matcher::regName[dst_first],
1131                      offset);
1132 #endif
1133         }
1134       }
1135       return 0;
1136     }
1137   } else if (src_first_rc == rc_int) {
1138     // gpr ->
1139     if (dst_first_rc == rc_stack) {
1140       // gpr -> mem
1141       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1142           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1143         // 64-bit
1144         int offset = ra_->reg2offset(dst_first);
1145         if (cbuf) {
1146           MacroAssembler _masm(cbuf);
1147           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1148 #ifndef PRODUCT
1149         } else {
1150           st->print("movq    [rsp + #%d], %s\t# spill",
1151                      offset,
1152                      Matcher::regName[src_first]);
1153 #endif
1154         }
1155       } else {
1156         // 32-bit
1157         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1158         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1159         int offset = ra_->reg2offset(dst_first);
1160         if (cbuf) {
1161           MacroAssembler _masm(cbuf);
1162           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1163 #ifndef PRODUCT
1164         } else {
1165           st->print("movl    [rsp + #%d], %s\t# spill",
1166                      offset,
1167                      Matcher::regName[src_first]);
1168 #endif
1169         }
1170       }
1171       return 0;
1172     } else if (dst_first_rc == rc_int) {
1173       // gpr -> gpr
1174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1176         // 64-bit
1177         if (cbuf) {
1178           MacroAssembler _masm(cbuf);
1179           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1180                   as_Register(Matcher::_regEncode[src_first]));
1181 #ifndef PRODUCT
1182         } else {
1183           st->print("movq    %s, %s\t# spill",
1184                      Matcher::regName[dst_first],
1185                      Matcher::regName[src_first]);
1186 #endif
1187         }
1188         return 0;
1189       } else {
1190         // 32-bit
1191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1193         if (cbuf) {
1194           MacroAssembler _masm(cbuf);
1195           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1196                   as_Register(Matcher::_regEncode[src_first]));
1197 #ifndef PRODUCT
1198         } else {
1199           st->print("movl    %s, %s\t# spill",
1200                      Matcher::regName[dst_first],
1201                      Matcher::regName[src_first]);
1202 #endif
1203         }
1204         return 0;
1205       }
1206     } else if (dst_first_rc == rc_float) {
1207       // gpr -> xmm
1208       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1209           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1210         // 64-bit
1211         if (cbuf) {
1212           MacroAssembler _masm(cbuf);
1213           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1214 #ifndef PRODUCT
1215         } else {
1216           st->print("movdq   %s, %s\t# spill",
1217                      Matcher::regName[dst_first],
1218                      Matcher::regName[src_first]);
1219 #endif
1220         }
1221       } else {
1222         // 32-bit
1223         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1224         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1225         if (cbuf) {
1226           MacroAssembler _masm(cbuf);
1227           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1228 #ifndef PRODUCT
1229         } else {
1230           st->print("movdl   %s, %s\t# spill",
1231                      Matcher::regName[dst_first],
1232                      Matcher::regName[src_first]);
1233 #endif
1234         }
1235       }
1236       return 0;
1237     }
1238   } else if (src_first_rc == rc_float) {
1239     // xmm ->
1240     if (dst_first_rc == rc_stack) {
1241       // xmm -> mem
1242       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1243           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1244         // 64-bit
1245         int offset = ra_->reg2offset(dst_first);
1246         if (cbuf) {
1247           MacroAssembler _masm(cbuf);
1248           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1249 #ifndef PRODUCT
1250         } else {
1251           st->print("movsd   [rsp + #%d], %s\t# spill",
1252                      offset,
1253                      Matcher::regName[src_first]);
1254 #endif
1255         }
1256       } else {
1257         // 32-bit
1258         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1259         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1260         int offset = ra_->reg2offset(dst_first);
1261         if (cbuf) {
1262           MacroAssembler _masm(cbuf);
1263           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1264 #ifndef PRODUCT
1265         } else {
1266           st->print("movss   [rsp + #%d], %s\t# spill",
1267                      offset,
1268                      Matcher::regName[src_first]);
1269 #endif
1270         }
1271       }
1272       return 0;
1273     } else if (dst_first_rc == rc_int) {
1274       // xmm -> gpr
1275       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1276           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1277         // 64-bit
1278         if (cbuf) {
1279           MacroAssembler _masm(cbuf);
1280           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1281 #ifndef PRODUCT
1282         } else {
1283           st->print("movdq   %s, %s\t# spill",
1284                      Matcher::regName[dst_first],
1285                      Matcher::regName[src_first]);
1286 #endif
1287         }
1288       } else {
1289         // 32-bit
1290         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1291         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1292         if (cbuf) {
1293           MacroAssembler _masm(cbuf);
1294           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1295 #ifndef PRODUCT
1296         } else {
1297           st->print("movdl   %s, %s\t# spill",
1298                      Matcher::regName[dst_first],
1299                      Matcher::regName[src_first]);
1300 #endif
1301         }
1302       }
1303       return 0;
1304     } else if (dst_first_rc == rc_float) {
1305       // xmm -> xmm
1306       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1307           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1308         // 64-bit
1309         if (cbuf) {
1310           MacroAssembler _masm(cbuf);
1311           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1312 #ifndef PRODUCT
1313         } else {
1314           st->print("%s  %s, %s\t# spill",
1315                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1316                      Matcher::regName[dst_first],
1317                      Matcher::regName[src_first]);
1318 #endif
1319         }
1320       } else {
1321         // 32-bit
1322         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1323         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1324         if (cbuf) {
1325           MacroAssembler _masm(cbuf);
1326           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1327 #ifndef PRODUCT
1328         } else {
1329           st->print("%s  %s, %s\t# spill",
1330                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1331                      Matcher::regName[dst_first],
1332                      Matcher::regName[src_first]);
1333 #endif
1334         }
1335       }
1336       return 0;
1337     }
1338   }
1339 
1340   assert(0," foo ");
1341   Unimplemented();
1342   return 0;
1343 }
1344 
1345 #ifndef PRODUCT
1346 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1347   implementation(NULL, ra_, false, st);
1348 }
1349 #endif
1350 
1351 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1352   implementation(&cbuf, ra_, false, NULL);
1353 }
1354 
1355 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1356   return MachNode::size(ra_);
1357 }
1358 
1359 //=============================================================================
1360 #ifndef PRODUCT
1361 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1362 {
1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1364   int reg = ra_->get_reg_first(this);
1365   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1366             Matcher::regName[reg], offset);
1367 }
1368 #endif
1369 
1370 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1371 {
1372   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1373   int reg = ra_->get_encode(this);
1374   if (offset >= 0x80) {
1375     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1376     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1377     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1378     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1379     emit_d32(cbuf, offset);
1380   } else {
1381     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1382     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1383     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1384     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1385     emit_d8(cbuf, offset);
1386   }
1387 }
1388 
1389 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1390 {
1391   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1392   return (offset < 0x80) ? 5 : 8; // REX
1393 }
1394 
1395 //=============================================================================
1396 #ifndef PRODUCT
1397 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1398 {
1399   if (UseCompressedClassPointers) {
1400     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1401     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1402     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1403   } else {
1404     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1405                  "# Inline cache check");
1406   }
1407   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1408   st->print_cr("\tnop\t# nops to align entry point");
1409 }
1410 #endif
1411 
1412 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1413 {
1414   MacroAssembler masm(&cbuf);
1415   uint insts_size = cbuf.insts_size();
1416   if (UseCompressedClassPointers) {
1417     masm.load_klass(rscratch1, j_rarg0);
1418     masm.cmpptr(rax, rscratch1);
1419   } else {
1420     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1421   }
1422 
1423   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1424 
1425   /* WARNING these NOPs are critical so that verified entry point is properly
1426      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1427   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1428   if (OptoBreakpoint) {
1429     // Leave space for int3
1430     nops_cnt -= 1;
1431   }
1432   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1433   if (nops_cnt > 0)
1434     masm.nop(nops_cnt);
1435 }
1436 
1437 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1438 {
1439   return MachNode::size(ra_); // too many variables; just compute it
1440                               // the hard way
1441 }
1442 
1443 
1444 //=============================================================================
1445 uint size_exception_handler()
1446 {
1447   // NativeCall instruction size is the same as NativeJump.
1448   // Note that this value is also credited (in output.cpp) to
1449   // the size of the code section.
1450   return NativeJump::instruction_size;
1451 }
1452 
1453 // Emit exception handler code.
1454 int emit_exception_handler(CodeBuffer& cbuf)
1455 {
1456 
1457   // Note that the code buffer's insts_mark is always relative to insts.
1458   // That's why we must use the macroassembler to generate a handler.
1459   MacroAssembler _masm(&cbuf);
1460   address base =
1461   __ start_a_stub(size_exception_handler());
1462   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1463   int offset = __ offset();
1464   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1465   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1466   __ end_a_stub();
1467   return offset;
1468 }
1469 
1470 uint size_deopt_handler()
1471 {
1472   // three 5 byte instructions
1473   return 15;
1474 }
1475 
1476 // Emit deopt handler code.
1477 int emit_deopt_handler(CodeBuffer& cbuf)
1478 {
1479 
1480   // Note that the code buffer's insts_mark is always relative to insts.
1481   // That's why we must use the macroassembler to generate a handler.
1482   MacroAssembler _masm(&cbuf);
1483   address base =
1484   __ start_a_stub(size_deopt_handler());
1485   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1486   int offset = __ offset();
1487   address the_pc = (address) __ pc();
1488   Label next;
1489   // push a "the_pc" on the stack without destroying any registers
1490   // as they all may be live.
1491 
1492   // push address of "next"
1493   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1494   __ bind(next);
1495   // adjust it so it matches "the_pc"
1496   __ subptr(Address(rsp, 0), __ offset() - offset);
1497   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1498   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1499   __ end_a_stub();
1500   return offset;
1501 }
1502 
1503 int Matcher::regnum_to_fpu_offset(int regnum)
1504 {
1505   return regnum - 32; // The FP registers are in the second chunk
1506 }
1507 
1508 // This is UltraSparc specific, true just means we have fast l2f conversion
1509 const bool Matcher::convL2FSupported(void) {
1510   return true;
1511 }
1512 
1513 // Is this branch offset short enough that a short branch can be used?
1514 //
1515 // NOTE: If the platform does not provide any short branch variants, then
1516 //       this method should return false for offset 0.
1517 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1518   // The passed offset is relative to address of the branch.
1519   // On 86 a branch displacement is calculated relative to address
1520   // of a next instruction.
1521   offset -= br_size;
1522 
1523   // the short version of jmpConUCF2 contains multiple branches,
1524   // making the reach slightly less
1525   if (rule == jmpConUCF2_rule)
1526     return (-126 <= offset && offset <= 125);
1527   return (-128 <= offset && offset <= 127);
1528 }
1529 
1530 const bool Matcher::isSimpleConstant64(jlong value) {
1531   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1532   //return value == (int) value;  // Cf. storeImmL and immL32.
1533 
1534   // Probably always true, even if a temp register is required.
1535   return true;
1536 }
1537 
1538 // The ecx parameter to rep stosq for the ClearArray node is in words.
1539 const bool Matcher::init_array_count_is_in_bytes = false;
1540 
1541 // Threshold size for cleararray.
1542 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1543 
1544 // No additional cost for CMOVL.
1545 const int Matcher::long_cmove_cost() { return 0; }
1546 
1547 // No CMOVF/CMOVD with SSE2
1548 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1549 
1550 // Does the CPU require late expand (see block.cpp for description of late expand)?
1551 const bool Matcher::require_postalloc_expand = false;
1552 
1553 // Should the Matcher clone shifts on addressing modes, expecting them
1554 // to be subsumed into complex addressing expressions or compute them
1555 // into registers?  True for Intel but false for most RISCs
1556 const bool Matcher::clone_shift_expressions = true;
1557 
1558 // Do we need to mask the count passed to shift instructions or does
1559 // the cpu only look at the lower 5/6 bits anyway?
1560 const bool Matcher::need_masked_shift_count = false;
1561 
1562 bool Matcher::narrow_oop_use_complex_address() {
1563   assert(UseCompressedOops, "only for compressed oops code");
1564   return (LogMinObjAlignmentInBytes <= 3);
1565 }
1566 
1567 bool Matcher::narrow_klass_use_complex_address() {
1568   assert(UseCompressedClassPointers, "only for compressed klass code");
1569   return (LogKlassAlignmentInBytes <= 3);
1570 }
1571 
1572 // Is it better to copy float constants, or load them directly from
1573 // memory?  Intel can load a float constant from a direct address,
1574 // requiring no extra registers.  Most RISCs will have to materialize
1575 // an address into a register first, so they would do better to copy
1576 // the constant from stack.
1577 const bool Matcher::rematerialize_float_constants = true; // XXX
1578 
1579 // If CPU can load and store mis-aligned doubles directly then no
1580 // fixup is needed.  Else we split the double into 2 integer pieces
1581 // and move it piece-by-piece.  Only happens when passing doubles into
1582 // C code as the Java calling convention forces doubles to be aligned.
1583 const bool Matcher::misaligned_doubles_ok = true;
1584 
1585 // No-op on amd64
1586 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1587 
1588 // Advertise here if the CPU requires explicit rounding operations to
1589 // implement the UseStrictFP mode.
1590 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1591 
1592 // Are floats conerted to double when stored to stack during deoptimization?
1593 // On x64 it is stored without convertion so we can use normal access.
1594 bool Matcher::float_in_double() { return false; }
1595 
1596 // Do ints take an entire long register or just half?
1597 const bool Matcher::int_in_long = true;
1598 
1599 // Return whether or not this register is ever used as an argument.
1600 // This function is used on startup to build the trampoline stubs in
1601 // generateOptoStub.  Registers not mentioned will be killed by the VM
1602 // call in the trampoline, and arguments in those registers not be
1603 // available to the callee.
1604 bool Matcher::can_be_java_arg(int reg)
1605 {
1606   return
1607     reg ==  RDI_num || reg == RDI_H_num ||
1608     reg ==  RSI_num || reg == RSI_H_num ||
1609     reg ==  RDX_num || reg == RDX_H_num ||
1610     reg ==  RCX_num || reg == RCX_H_num ||
1611     reg ==   R8_num || reg ==  R8_H_num ||
1612     reg ==   R9_num || reg ==  R9_H_num ||
1613     reg ==  R12_num || reg == R12_H_num ||
1614     reg == XMM0_num || reg == XMM0b_num ||
1615     reg == XMM1_num || reg == XMM1b_num ||
1616     reg == XMM2_num || reg == XMM2b_num ||
1617     reg == XMM3_num || reg == XMM3b_num ||
1618     reg == XMM4_num || reg == XMM4b_num ||
1619     reg == XMM5_num || reg == XMM5b_num ||
1620     reg == XMM6_num || reg == XMM6b_num ||
1621     reg == XMM7_num || reg == XMM7b_num;
1622 }
1623 
1624 bool Matcher::is_spillable_arg(int reg)
1625 {
1626   return can_be_java_arg(reg);
1627 }
1628 
1629 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1630   // In 64 bit mode a code which use multiply when
1631   // devisor is constant is faster than hardware
1632   // DIV instruction (it uses MulHiL).
1633   return false;
1634 }
1635 
1636 // Register for DIVI projection of divmodI
1637 RegMask Matcher::divI_proj_mask() {
1638   return INT_RAX_REG_mask();
1639 }
1640 
1641 // Register for MODI projection of divmodI
1642 RegMask Matcher::modI_proj_mask() {
1643   return INT_RDX_REG_mask();
1644 }
1645 
1646 // Register for DIVL projection of divmodL
1647 RegMask Matcher::divL_proj_mask() {
1648   return LONG_RAX_REG_mask();
1649 }
1650 
1651 // Register for MODL projection of divmodL
1652 RegMask Matcher::modL_proj_mask() {
1653   return LONG_RDX_REG_mask();
1654 }
1655 
1656 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1657   return PTR_RBP_REG_mask();
1658 }
1659 
1660 const RegMask Matcher::mathExactI_result_proj_mask() {
1661   return INT_RAX_REG_mask();
1662 }
1663 
1664 const RegMask Matcher::mathExactL_result_proj_mask() {
1665   return LONG_RAX_REG_mask();
1666 }
1667 
1668 const RegMask Matcher::mathExactI_flags_proj_mask() {
1669   return INT_FLAGS_mask();
1670 }
1671 
1672 %}
1673 
1674 //----------ENCODING BLOCK-----------------------------------------------------
1675 // This block specifies the encoding classes used by the compiler to
1676 // output byte streams.  Encoding classes are parameterized macros
1677 // used by Machine Instruction Nodes in order to generate the bit
1678 // encoding of the instruction.  Operands specify their base encoding
1679 // interface with the interface keyword.  There are currently
1680 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1681 // COND_INTER.  REG_INTER causes an operand to generate a function
1682 // which returns its register number when queried.  CONST_INTER causes
1683 // an operand to generate a function which returns the value of the
1684 // constant when queried.  MEMORY_INTER causes an operand to generate
1685 // four functions which return the Base Register, the Index Register,
1686 // the Scale Value, and the Offset Value of the operand when queried.
1687 // COND_INTER causes an operand to generate six functions which return
1688 // the encoding code (ie - encoding bits for the instruction)
1689 // associated with each basic boolean condition for a conditional
1690 // instruction.
1691 //
1692 // Instructions specify two basic values for encoding.  Again, a
1693 // function is available to check if the constant displacement is an
1694 // oop. They use the ins_encode keyword to specify their encoding
1695 // classes (which must be a sequence of enc_class names, and their
1696 // parameters, specified in the encoding block), and they use the
1697 // opcode keyword to specify, in order, their primary, secondary, and
1698 // tertiary opcode.  Only the opcode sections which a particular
1699 // instruction needs for encoding need to be specified.
1700 encode %{
1701   // Build emit functions for each basic byte or larger field in the
1702   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1703   // from C++ code in the enc_class source block.  Emit functions will
1704   // live in the main source block for now.  In future, we can
1705   // generalize this by adding a syntax that specifies the sizes of
1706   // fields in an order, so that the adlc can build the emit functions
1707   // automagically
1708 
1709   // Emit primary opcode
1710   enc_class OpcP
1711   %{
1712     emit_opcode(cbuf, $primary);
1713   %}
1714 
1715   // Emit secondary opcode
1716   enc_class OpcS
1717   %{
1718     emit_opcode(cbuf, $secondary);
1719   %}
1720 
1721   // Emit tertiary opcode
1722   enc_class OpcT
1723   %{
1724     emit_opcode(cbuf, $tertiary);
1725   %}
1726 
1727   // Emit opcode directly
1728   enc_class Opcode(immI d8)
1729   %{
1730     emit_opcode(cbuf, $d8$$constant);
1731   %}
1732 
1733   // Emit size prefix
1734   enc_class SizePrefix
1735   %{
1736     emit_opcode(cbuf, 0x66);
1737   %}
1738 
1739   enc_class reg(rRegI reg)
1740   %{
1741     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1742   %}
1743 
1744   enc_class reg_reg(rRegI dst, rRegI src)
1745   %{
1746     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1747   %}
1748 
1749   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1750   %{
1751     emit_opcode(cbuf, $opcode$$constant);
1752     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1753   %}
1754 
1755   enc_class cdql_enc(no_rax_rdx_RegI div)
1756   %{
1757     // Full implementation of Java idiv and irem; checks for
1758     // special case as described in JVM spec., p.243 & p.271.
1759     //
1760     //         normal case                           special case
1761     //
1762     // input : rax: dividend                         min_int
1763     //         reg: divisor                          -1
1764     //
1765     // output: rax: quotient  (= rax idiv reg)       min_int
1766     //         rdx: remainder (= rax irem reg)       0
1767     //
1768     //  Code sequnce:
1769     //
1770     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1771     //    5:   75 07/08                jne    e <normal>
1772     //    7:   33 d2                   xor    %edx,%edx
1773     //  [div >= 8 -> offset + 1]
1774     //  [REX_B]
1775     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1776     //    c:   74 03/04                je     11 <done>
1777     // 000000000000000e <normal>:
1778     //    e:   99                      cltd
1779     //  [div >= 8 -> offset + 1]
1780     //  [REX_B]
1781     //    f:   f7 f9                   idiv   $div
1782     // 0000000000000011 <done>:
1783 
1784     // cmp    $0x80000000,%eax
1785     emit_opcode(cbuf, 0x3d);
1786     emit_d8(cbuf, 0x00);
1787     emit_d8(cbuf, 0x00);
1788     emit_d8(cbuf, 0x00);
1789     emit_d8(cbuf, 0x80);
1790 
1791     // jne    e <normal>
1792     emit_opcode(cbuf, 0x75);
1793     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1794 
1795     // xor    %edx,%edx
1796     emit_opcode(cbuf, 0x33);
1797     emit_d8(cbuf, 0xD2);
1798 
1799     // cmp    $0xffffffffffffffff,%ecx
1800     if ($div$$reg >= 8) {
1801       emit_opcode(cbuf, Assembler::REX_B);
1802     }
1803     emit_opcode(cbuf, 0x83);
1804     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1805     emit_d8(cbuf, 0xFF);
1806 
1807     // je     11 <done>
1808     emit_opcode(cbuf, 0x74);
1809     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1810 
1811     // <normal>
1812     // cltd
1813     emit_opcode(cbuf, 0x99);
1814 
1815     // idivl (note: must be emitted by the user of this rule)
1816     // <done>
1817   %}
1818 
1819   enc_class cdqq_enc(no_rax_rdx_RegL div)
1820   %{
1821     // Full implementation of Java ldiv and lrem; checks for
1822     // special case as described in JVM spec., p.243 & p.271.
1823     //
1824     //         normal case                           special case
1825     //
1826     // input : rax: dividend                         min_long
1827     //         reg: divisor                          -1
1828     //
1829     // output: rax: quotient  (= rax idiv reg)       min_long
1830     //         rdx: remainder (= rax irem reg)       0
1831     //
1832     //  Code sequnce:
1833     //
1834     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1835     //    7:   00 00 80
1836     //    a:   48 39 d0                cmp    %rdx,%rax
1837     //    d:   75 08                   jne    17 <normal>
1838     //    f:   33 d2                   xor    %edx,%edx
1839     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1840     //   15:   74 05                   je     1c <done>
1841     // 0000000000000017 <normal>:
1842     //   17:   48 99                   cqto
1843     //   19:   48 f7 f9                idiv   $div
1844     // 000000000000001c <done>:
1845 
1846     // mov    $0x8000000000000000,%rdx
1847     emit_opcode(cbuf, Assembler::REX_W);
1848     emit_opcode(cbuf, 0xBA);
1849     emit_d8(cbuf, 0x00);
1850     emit_d8(cbuf, 0x00);
1851     emit_d8(cbuf, 0x00);
1852     emit_d8(cbuf, 0x00);
1853     emit_d8(cbuf, 0x00);
1854     emit_d8(cbuf, 0x00);
1855     emit_d8(cbuf, 0x00);
1856     emit_d8(cbuf, 0x80);
1857 
1858     // cmp    %rdx,%rax
1859     emit_opcode(cbuf, Assembler::REX_W);
1860     emit_opcode(cbuf, 0x39);
1861     emit_d8(cbuf, 0xD0);
1862 
1863     // jne    17 <normal>
1864     emit_opcode(cbuf, 0x75);
1865     emit_d8(cbuf, 0x08);
1866 
1867     // xor    %edx,%edx
1868     emit_opcode(cbuf, 0x33);
1869     emit_d8(cbuf, 0xD2);
1870 
1871     // cmp    $0xffffffffffffffff,$div
1872     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1873     emit_opcode(cbuf, 0x83);
1874     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1875     emit_d8(cbuf, 0xFF);
1876 
1877     // je     1e <done>
1878     emit_opcode(cbuf, 0x74);
1879     emit_d8(cbuf, 0x05);
1880 
1881     // <normal>
1882     // cqto
1883     emit_opcode(cbuf, Assembler::REX_W);
1884     emit_opcode(cbuf, 0x99);
1885 
1886     // idivq (note: must be emitted by the user of this rule)
1887     // <done>
1888   %}
1889 
1890   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1891   enc_class OpcSE(immI imm)
1892   %{
1893     // Emit primary opcode and set sign-extend bit
1894     // Check for 8-bit immediate, and set sign extend bit in opcode
1895     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1896       emit_opcode(cbuf, $primary | 0x02);
1897     } else {
1898       // 32-bit immediate
1899       emit_opcode(cbuf, $primary);
1900     }
1901   %}
1902 
1903   enc_class OpcSErm(rRegI dst, immI imm)
1904   %{
1905     // OpcSEr/m
1906     int dstenc = $dst$$reg;
1907     if (dstenc >= 8) {
1908       emit_opcode(cbuf, Assembler::REX_B);
1909       dstenc -= 8;
1910     }
1911     // Emit primary opcode and set sign-extend bit
1912     // Check for 8-bit immediate, and set sign extend bit in opcode
1913     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1914       emit_opcode(cbuf, $primary | 0x02);
1915     } else {
1916       // 32-bit immediate
1917       emit_opcode(cbuf, $primary);
1918     }
1919     // Emit r/m byte with secondary opcode, after primary opcode.
1920     emit_rm(cbuf, 0x3, $secondary, dstenc);
1921   %}
1922 
1923   enc_class OpcSErm_wide(rRegL dst, immI imm)
1924   %{
1925     // OpcSEr/m
1926     int dstenc = $dst$$reg;
1927     if (dstenc < 8) {
1928       emit_opcode(cbuf, Assembler::REX_W);
1929     } else {
1930       emit_opcode(cbuf, Assembler::REX_WB);
1931       dstenc -= 8;
1932     }
1933     // Emit primary opcode and set sign-extend bit
1934     // Check for 8-bit immediate, and set sign extend bit in opcode
1935     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1936       emit_opcode(cbuf, $primary | 0x02);
1937     } else {
1938       // 32-bit immediate
1939       emit_opcode(cbuf, $primary);
1940     }
1941     // Emit r/m byte with secondary opcode, after primary opcode.
1942     emit_rm(cbuf, 0x3, $secondary, dstenc);
1943   %}
1944 
1945   enc_class Con8or32(immI imm)
1946   %{
1947     // Check for 8-bit immediate, and set sign extend bit in opcode
1948     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
1949       $$$emit8$imm$$constant;
1950     } else {
1951       // 32-bit immediate
1952       $$$emit32$imm$$constant;
1953     }
1954   %}
1955 
1956   enc_class opc2_reg(rRegI dst)
1957   %{
1958     // BSWAP
1959     emit_cc(cbuf, $secondary, $dst$$reg);
1960   %}
1961 
1962   enc_class opc3_reg(rRegI dst)
1963   %{
1964     // BSWAP
1965     emit_cc(cbuf, $tertiary, $dst$$reg);
1966   %}
1967 
1968   enc_class reg_opc(rRegI div)
1969   %{
1970     // INC, DEC, IDIV, IMOD, JMP indirect, ...
1971     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
1972   %}
1973 
1974   enc_class enc_cmov(cmpOp cop)
1975   %{
1976     // CMOV
1977     $$$emit8$primary;
1978     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1979   %}
1980 
1981   enc_class enc_PartialSubtypeCheck()
1982   %{
1983     Register Rrdi = as_Register(RDI_enc); // result register
1984     Register Rrax = as_Register(RAX_enc); // super class
1985     Register Rrcx = as_Register(RCX_enc); // killed
1986     Register Rrsi = as_Register(RSI_enc); // sub class
1987     Label miss;
1988     const bool set_cond_codes = true;
1989 
1990     MacroAssembler _masm(&cbuf);
1991     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
1992                                      NULL, &miss,
1993                                      /*set_cond_codes:*/ true);
1994     if ($primary) {
1995       __ xorptr(Rrdi, Rrdi);
1996     }
1997     __ bind(miss);
1998   %}
1999 
2000   enc_class clear_avx %{
2001     debug_only(int off0 = cbuf.insts_size());
2002     if (ra_->C->max_vector_size() > 16) {
2003       // Clear upper bits of YMM registers when current compiled code uses
2004       // wide vectors to avoid AVX <-> SSE transition penalty during call.
2005       MacroAssembler _masm(&cbuf);
2006       __ vzeroupper();
2007     }
2008     debug_only(int off1 = cbuf.insts_size());
2009     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2010   %}
2011 
2012   enc_class Java_To_Runtime(method meth) %{
2013     // No relocation needed
2014     MacroAssembler _masm(&cbuf);
2015     __ mov64(r10, (int64_t) $meth$$method);
2016     __ call(r10);
2017   %}
2018 
2019   enc_class Java_To_Interpreter(method meth)
2020   %{
2021     // CALL Java_To_Interpreter
2022     // This is the instruction starting address for relocation info.
2023     cbuf.set_insts_mark();
2024     $$$emit8$primary;
2025     // CALL directly to the runtime
2026     emit_d32_reloc(cbuf,
2027                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2028                    runtime_call_Relocation::spec(),
2029                    RELOC_DISP32);
2030   %}
2031 
2032   enc_class Java_Static_Call(method meth)
2033   %{
2034     // JAVA STATIC CALL
2035     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2036     // determine who we intended to call.
2037     cbuf.set_insts_mark();
2038     $$$emit8$primary;
2039 
2040     if (!_method) {
2041       emit_d32_reloc(cbuf,
2042                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2043                      runtime_call_Relocation::spec(),
2044                      RELOC_DISP32);
2045     } else if (_optimized_virtual) {
2046       emit_d32_reloc(cbuf,
2047                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2048                      opt_virtual_call_Relocation::spec(),
2049                      RELOC_DISP32);
2050     } else {
2051       emit_d32_reloc(cbuf,
2052                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2053                      static_call_Relocation::spec(),
2054                      RELOC_DISP32);
2055     }
2056     if (_method) {
2057       // Emit stub for static call.
2058       CompiledStaticCall::emit_to_interp_stub(cbuf);
2059     }
2060   %}
2061 
2062   enc_class Java_Dynamic_Call(method meth) %{
2063     MacroAssembler _masm(&cbuf);
2064     __ ic_call((address)$meth$$method);
2065   %}
2066 
2067   enc_class Java_Compiled_Call(method meth)
2068   %{
2069     // JAVA COMPILED CALL
2070     int disp = in_bytes(Method:: from_compiled_offset());
2071 
2072     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2073     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2074 
2075     // callq *disp(%rax)
2076     cbuf.set_insts_mark();
2077     $$$emit8$primary;
2078     if (disp < 0x80) {
2079       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2080       emit_d8(cbuf, disp); // Displacement
2081     } else {
2082       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2083       emit_d32(cbuf, disp); // Displacement
2084     }
2085   %}
2086 
2087   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2088   %{
2089     // SAL, SAR, SHR
2090     int dstenc = $dst$$reg;
2091     if (dstenc >= 8) {
2092       emit_opcode(cbuf, Assembler::REX_B);
2093       dstenc -= 8;
2094     }
2095     $$$emit8$primary;
2096     emit_rm(cbuf, 0x3, $secondary, dstenc);
2097     $$$emit8$shift$$constant;
2098   %}
2099 
2100   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2101   %{
2102     // SAL, SAR, SHR
2103     int dstenc = $dst$$reg;
2104     if (dstenc < 8) {
2105       emit_opcode(cbuf, Assembler::REX_W);
2106     } else {
2107       emit_opcode(cbuf, Assembler::REX_WB);
2108       dstenc -= 8;
2109     }
2110     $$$emit8$primary;
2111     emit_rm(cbuf, 0x3, $secondary, dstenc);
2112     $$$emit8$shift$$constant;
2113   %}
2114 
2115   enc_class load_immI(rRegI dst, immI src)
2116   %{
2117     int dstenc = $dst$$reg;
2118     if (dstenc >= 8) {
2119       emit_opcode(cbuf, Assembler::REX_B);
2120       dstenc -= 8;
2121     }
2122     emit_opcode(cbuf, 0xB8 | dstenc);
2123     $$$emit32$src$$constant;
2124   %}
2125 
2126   enc_class load_immL(rRegL dst, immL src)
2127   %{
2128     int dstenc = $dst$$reg;
2129     if (dstenc < 8) {
2130       emit_opcode(cbuf, Assembler::REX_W);
2131     } else {
2132       emit_opcode(cbuf, Assembler::REX_WB);
2133       dstenc -= 8;
2134     }
2135     emit_opcode(cbuf, 0xB8 | dstenc);
2136     emit_d64(cbuf, $src$$constant);
2137   %}
2138 
2139   enc_class load_immUL32(rRegL dst, immUL32 src)
2140   %{
2141     // same as load_immI, but this time we care about zeroes in the high word
2142     int dstenc = $dst$$reg;
2143     if (dstenc >= 8) {
2144       emit_opcode(cbuf, Assembler::REX_B);
2145       dstenc -= 8;
2146     }
2147     emit_opcode(cbuf, 0xB8 | dstenc);
2148     $$$emit32$src$$constant;
2149   %}
2150 
2151   enc_class load_immL32(rRegL dst, immL32 src)
2152   %{
2153     int dstenc = $dst$$reg;
2154     if (dstenc < 8) {
2155       emit_opcode(cbuf, Assembler::REX_W);
2156     } else {
2157       emit_opcode(cbuf, Assembler::REX_WB);
2158       dstenc -= 8;
2159     }
2160     emit_opcode(cbuf, 0xC7);
2161     emit_rm(cbuf, 0x03, 0x00, dstenc);
2162     $$$emit32$src$$constant;
2163   %}
2164 
2165   enc_class load_immP31(rRegP dst, immP32 src)
2166   %{
2167     // same as load_immI, but this time we care about zeroes in the high word
2168     int dstenc = $dst$$reg;
2169     if (dstenc >= 8) {
2170       emit_opcode(cbuf, Assembler::REX_B);
2171       dstenc -= 8;
2172     }
2173     emit_opcode(cbuf, 0xB8 | dstenc);
2174     $$$emit32$src$$constant;
2175   %}
2176 
2177   enc_class load_immP(rRegP dst, immP src)
2178   %{
2179     int dstenc = $dst$$reg;
2180     if (dstenc < 8) {
2181       emit_opcode(cbuf, Assembler::REX_W);
2182     } else {
2183       emit_opcode(cbuf, Assembler::REX_WB);
2184       dstenc -= 8;
2185     }
2186     emit_opcode(cbuf, 0xB8 | dstenc);
2187     // This next line should be generated from ADLC
2188     if ($src->constant_reloc() != relocInfo::none) {
2189       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2190     } else {
2191       emit_d64(cbuf, $src$$constant);
2192     }
2193   %}
2194 
2195   enc_class Con32(immI src)
2196   %{
2197     // Output immediate
2198     $$$emit32$src$$constant;
2199   %}
2200 
2201   enc_class Con32F_as_bits(immF src)
2202   %{
2203     // Output Float immediate bits
2204     jfloat jf = $src$$constant;
2205     jint jf_as_bits = jint_cast(jf);
2206     emit_d32(cbuf, jf_as_bits);
2207   %}
2208 
2209   enc_class Con16(immI src)
2210   %{
2211     // Output immediate
2212     $$$emit16$src$$constant;
2213   %}
2214 
2215   // How is this different from Con32??? XXX
2216   enc_class Con_d32(immI src)
2217   %{
2218     emit_d32(cbuf,$src$$constant);
2219   %}
2220 
2221   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2222     // Output immediate memory reference
2223     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2224     emit_d32(cbuf, 0x00);
2225   %}
2226 
2227   enc_class lock_prefix()
2228   %{
2229     if (os::is_MP()) {
2230       emit_opcode(cbuf, 0xF0); // lock
2231     }
2232   %}
2233 
2234   enc_class REX_mem(memory mem)
2235   %{
2236     if ($mem$$base >= 8) {
2237       if ($mem$$index < 8) {
2238         emit_opcode(cbuf, Assembler::REX_B);
2239       } else {
2240         emit_opcode(cbuf, Assembler::REX_XB);
2241       }
2242     } else {
2243       if ($mem$$index >= 8) {
2244         emit_opcode(cbuf, Assembler::REX_X);
2245       }
2246     }
2247   %}
2248 
2249   enc_class REX_mem_wide(memory mem)
2250   %{
2251     if ($mem$$base >= 8) {
2252       if ($mem$$index < 8) {
2253         emit_opcode(cbuf, Assembler::REX_WB);
2254       } else {
2255         emit_opcode(cbuf, Assembler::REX_WXB);
2256       }
2257     } else {
2258       if ($mem$$index < 8) {
2259         emit_opcode(cbuf, Assembler::REX_W);
2260       } else {
2261         emit_opcode(cbuf, Assembler::REX_WX);
2262       }
2263     }
2264   %}
2265 
2266   // for byte regs
2267   enc_class REX_breg(rRegI reg)
2268   %{
2269     if ($reg$$reg >= 4) {
2270       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2271     }
2272   %}
2273 
2274   // for byte regs
2275   enc_class REX_reg_breg(rRegI dst, rRegI src)
2276   %{
2277     if ($dst$$reg < 8) {
2278       if ($src$$reg >= 4) {
2279         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2280       }
2281     } else {
2282       if ($src$$reg < 8) {
2283         emit_opcode(cbuf, Assembler::REX_R);
2284       } else {
2285         emit_opcode(cbuf, Assembler::REX_RB);
2286       }
2287     }
2288   %}
2289 
2290   // for byte regs
2291   enc_class REX_breg_mem(rRegI reg, memory mem)
2292   %{
2293     if ($reg$$reg < 8) {
2294       if ($mem$$base < 8) {
2295         if ($mem$$index >= 8) {
2296           emit_opcode(cbuf, Assembler::REX_X);
2297         } else if ($reg$$reg >= 4) {
2298           emit_opcode(cbuf, Assembler::REX);
2299         }
2300       } else {
2301         if ($mem$$index < 8) {
2302           emit_opcode(cbuf, Assembler::REX_B);
2303         } else {
2304           emit_opcode(cbuf, Assembler::REX_XB);
2305         }
2306       }
2307     } else {
2308       if ($mem$$base < 8) {
2309         if ($mem$$index < 8) {
2310           emit_opcode(cbuf, Assembler::REX_R);
2311         } else {
2312           emit_opcode(cbuf, Assembler::REX_RX);
2313         }
2314       } else {
2315         if ($mem$$index < 8) {
2316           emit_opcode(cbuf, Assembler::REX_RB);
2317         } else {
2318           emit_opcode(cbuf, Assembler::REX_RXB);
2319         }
2320       }
2321     }
2322   %}
2323 
2324   enc_class REX_reg(rRegI reg)
2325   %{
2326     if ($reg$$reg >= 8) {
2327       emit_opcode(cbuf, Assembler::REX_B);
2328     }
2329   %}
2330 
2331   enc_class REX_reg_wide(rRegI reg)
2332   %{
2333     if ($reg$$reg < 8) {
2334       emit_opcode(cbuf, Assembler::REX_W);
2335     } else {
2336       emit_opcode(cbuf, Assembler::REX_WB);
2337     }
2338   %}
2339 
2340   enc_class REX_reg_reg(rRegI dst, rRegI src)
2341   %{
2342     if ($dst$$reg < 8) {
2343       if ($src$$reg >= 8) {
2344         emit_opcode(cbuf, Assembler::REX_B);
2345       }
2346     } else {
2347       if ($src$$reg < 8) {
2348         emit_opcode(cbuf, Assembler::REX_R);
2349       } else {
2350         emit_opcode(cbuf, Assembler::REX_RB);
2351       }
2352     }
2353   %}
2354 
2355   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2356   %{
2357     if ($dst$$reg < 8) {
2358       if ($src$$reg < 8) {
2359         emit_opcode(cbuf, Assembler::REX_W);
2360       } else {
2361         emit_opcode(cbuf, Assembler::REX_WB);
2362       }
2363     } else {
2364       if ($src$$reg < 8) {
2365         emit_opcode(cbuf, Assembler::REX_WR);
2366       } else {
2367         emit_opcode(cbuf, Assembler::REX_WRB);
2368       }
2369     }
2370   %}
2371 
2372   enc_class REX_reg_mem(rRegI reg, memory mem)
2373   %{
2374     if ($reg$$reg < 8) {
2375       if ($mem$$base < 8) {
2376         if ($mem$$index >= 8) {
2377           emit_opcode(cbuf, Assembler::REX_X);
2378         }
2379       } else {
2380         if ($mem$$index < 8) {
2381           emit_opcode(cbuf, Assembler::REX_B);
2382         } else {
2383           emit_opcode(cbuf, Assembler::REX_XB);
2384         }
2385       }
2386     } else {
2387       if ($mem$$base < 8) {
2388         if ($mem$$index < 8) {
2389           emit_opcode(cbuf, Assembler::REX_R);
2390         } else {
2391           emit_opcode(cbuf, Assembler::REX_RX);
2392         }
2393       } else {
2394         if ($mem$$index < 8) {
2395           emit_opcode(cbuf, Assembler::REX_RB);
2396         } else {
2397           emit_opcode(cbuf, Assembler::REX_RXB);
2398         }
2399       }
2400     }
2401   %}
2402 
2403   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2404   %{
2405     if ($reg$$reg < 8) {
2406       if ($mem$$base < 8) {
2407         if ($mem$$index < 8) {
2408           emit_opcode(cbuf, Assembler::REX_W);
2409         } else {
2410           emit_opcode(cbuf, Assembler::REX_WX);
2411         }
2412       } else {
2413         if ($mem$$index < 8) {
2414           emit_opcode(cbuf, Assembler::REX_WB);
2415         } else {
2416           emit_opcode(cbuf, Assembler::REX_WXB);
2417         }
2418       }
2419     } else {
2420       if ($mem$$base < 8) {
2421         if ($mem$$index < 8) {
2422           emit_opcode(cbuf, Assembler::REX_WR);
2423         } else {
2424           emit_opcode(cbuf, Assembler::REX_WRX);
2425         }
2426       } else {
2427         if ($mem$$index < 8) {
2428           emit_opcode(cbuf, Assembler::REX_WRB);
2429         } else {
2430           emit_opcode(cbuf, Assembler::REX_WRXB);
2431         }
2432       }
2433     }
2434   %}
2435 
2436   enc_class reg_mem(rRegI ereg, memory mem)
2437   %{
2438     // High registers handle in encode_RegMem
2439     int reg = $ereg$$reg;
2440     int base = $mem$$base;
2441     int index = $mem$$index;
2442     int scale = $mem$$scale;
2443     int disp = $mem$$disp;
2444     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2445 
2446     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2447   %}
2448 
2449   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2450   %{
2451     int rm_byte_opcode = $rm_opcode$$constant;
2452 
2453     // High registers handle in encode_RegMem
2454     int base = $mem$$base;
2455     int index = $mem$$index;
2456     int scale = $mem$$scale;
2457     int displace = $mem$$disp;
2458 
2459     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2460                                             // working with static
2461                                             // globals
2462     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2463                   disp_reloc);
2464   %}
2465 
2466   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2467   %{
2468     int reg_encoding = $dst$$reg;
2469     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2470     int index        = 0x04;            // 0x04 indicates no index
2471     int scale        = 0x00;            // 0x00 indicates no scale
2472     int displace     = $src1$$constant; // 0x00 indicates no displacement
2473     relocInfo::relocType disp_reloc = relocInfo::none;
2474     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2475                   disp_reloc);
2476   %}
2477 
2478   enc_class neg_reg(rRegI dst)
2479   %{
2480     int dstenc = $dst$$reg;
2481     if (dstenc >= 8) {
2482       emit_opcode(cbuf, Assembler::REX_B);
2483       dstenc -= 8;
2484     }
2485     // NEG $dst
2486     emit_opcode(cbuf, 0xF7);
2487     emit_rm(cbuf, 0x3, 0x03, dstenc);
2488   %}
2489 
2490   enc_class neg_reg_wide(rRegI dst)
2491   %{
2492     int dstenc = $dst$$reg;
2493     if (dstenc < 8) {
2494       emit_opcode(cbuf, Assembler::REX_W);
2495     } else {
2496       emit_opcode(cbuf, Assembler::REX_WB);
2497       dstenc -= 8;
2498     }
2499     // NEG $dst
2500     emit_opcode(cbuf, 0xF7);
2501     emit_rm(cbuf, 0x3, 0x03, dstenc);
2502   %}
2503 
2504   enc_class setLT_reg(rRegI dst)
2505   %{
2506     int dstenc = $dst$$reg;
2507     if (dstenc >= 8) {
2508       emit_opcode(cbuf, Assembler::REX_B);
2509       dstenc -= 8;
2510     } else if (dstenc >= 4) {
2511       emit_opcode(cbuf, Assembler::REX);
2512     }
2513     // SETLT $dst
2514     emit_opcode(cbuf, 0x0F);
2515     emit_opcode(cbuf, 0x9C);
2516     emit_rm(cbuf, 0x3, 0x0, dstenc);
2517   %}
2518 
2519   enc_class setNZ_reg(rRegI dst)
2520   %{
2521     int dstenc = $dst$$reg;
2522     if (dstenc >= 8) {
2523       emit_opcode(cbuf, Assembler::REX_B);
2524       dstenc -= 8;
2525     } else if (dstenc >= 4) {
2526       emit_opcode(cbuf, Assembler::REX);
2527     }
2528     // SETNZ $dst
2529     emit_opcode(cbuf, 0x0F);
2530     emit_opcode(cbuf, 0x95);
2531     emit_rm(cbuf, 0x3, 0x0, dstenc);
2532   %}
2533 
2534 
2535   // Compare the lonogs and set -1, 0, or 1 into dst
2536   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2537   %{
2538     int src1enc = $src1$$reg;
2539     int src2enc = $src2$$reg;
2540     int dstenc = $dst$$reg;
2541 
2542     // cmpq $src1, $src2
2543     if (src1enc < 8) {
2544       if (src2enc < 8) {
2545         emit_opcode(cbuf, Assembler::REX_W);
2546       } else {
2547         emit_opcode(cbuf, Assembler::REX_WB);
2548       }
2549     } else {
2550       if (src2enc < 8) {
2551         emit_opcode(cbuf, Assembler::REX_WR);
2552       } else {
2553         emit_opcode(cbuf, Assembler::REX_WRB);
2554       }
2555     }
2556     emit_opcode(cbuf, 0x3B);
2557     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2558 
2559     // movl $dst, -1
2560     if (dstenc >= 8) {
2561       emit_opcode(cbuf, Assembler::REX_B);
2562     }
2563     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2564     emit_d32(cbuf, -1);
2565 
2566     // jl,s done
2567     emit_opcode(cbuf, 0x7C);
2568     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2569 
2570     // setne $dst
2571     if (dstenc >= 4) {
2572       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2573     }
2574     emit_opcode(cbuf, 0x0F);
2575     emit_opcode(cbuf, 0x95);
2576     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2577 
2578     // movzbl $dst, $dst
2579     if (dstenc >= 4) {
2580       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2581     }
2582     emit_opcode(cbuf, 0x0F);
2583     emit_opcode(cbuf, 0xB6);
2584     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2585   %}
2586 
2587   enc_class Push_ResultXD(regD dst) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ fstp_d(Address(rsp, 0));
2590     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2591     __ addptr(rsp, 8);
2592   %}
2593 
2594   enc_class Push_SrcXD(regD src) %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2598     __ fld_d(Address(rsp, 0));
2599   %}
2600 
2601 
2602   // obj: object to lock
2603   // box: box address (header location) -- killed
2604   // tmp: rax -- killed
2605   // scr: rbx -- killed
2606   //
2607   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2608   // from i486.ad.  See that file for comments.
2609   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2610   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2611 
2612 
2613   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2614   %{
2615     Register objReg = as_Register((int)$obj$$reg);
2616     Register boxReg = as_Register((int)$box$$reg);
2617     Register tmpReg = as_Register($tmp$$reg);
2618     Register scrReg = as_Register($scr$$reg);
2619     MacroAssembler masm(&cbuf);
2620 
2621     // Verify uniqueness of register assignments -- necessary but not sufficient
2622     assert (objReg != boxReg && objReg != tmpReg &&
2623             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2624 
2625     if (_counters != NULL) {
2626       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2627     }
2628     if (EmitSync & 1) {
2629         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2630         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2631         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2632     } else
2633     if (EmitSync & 2) {
2634         Label DONE_LABEL;
2635         if (UseBiasedLocking) {
2636            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2637           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2638         }
2639         // QQQ was movl...
2640         masm.movptr(tmpReg, 0x1);
2641         masm.orptr(tmpReg, Address(objReg, 0));
2642         masm.movptr(Address(boxReg, 0), tmpReg);
2643         if (os::is_MP()) {
2644           masm.lock();
2645         }
2646         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2647         masm.jcc(Assembler::equal, DONE_LABEL);
2648 
2649         // Recursive locking
2650         masm.subptr(tmpReg, rsp);
2651         masm.andptr(tmpReg, 7 - os::vm_page_size());
2652         masm.movptr(Address(boxReg, 0), tmpReg);
2653 
2654         masm.bind(DONE_LABEL);
2655         masm.nop(); // avoid branch to branch
2656     } else {
2657         Label DONE_LABEL, IsInflated, Egress;
2658 
2659         masm.movptr(tmpReg, Address(objReg, 0)) ;
2660         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2661         masm.jcc   (Assembler::notZero, IsInflated) ;
2662 
2663         // it's stack-locked, biased or neutral
2664         // TODO: optimize markword triage order to reduce the number of
2665         // conditional branches in the most common cases.
2666         // Beware -- there's a subtle invariant that fetch of the markword
2667         // at [FETCH], below, will never observe a biased encoding (*101b).
2668         // If this invariant is not held we'll suffer exclusion (safety) failure.
2669 
2670         if (UseBiasedLocking && !UseOptoBiasInlining) {
2671           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2672           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2673         }
2674 
2675         // was q will it destroy high?
2676         masm.orl   (tmpReg, 1) ;
2677         masm.movptr(Address(boxReg, 0), tmpReg) ;
2678         if (os::is_MP()) { masm.lock(); }
2679         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2680         if (_counters != NULL) {
2681            masm.cond_inc32(Assembler::equal,
2682                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2683         }
2684         masm.jcc   (Assembler::equal, DONE_LABEL);
2685 
2686         // Recursive locking
2687         masm.subptr(tmpReg, rsp);
2688         masm.andptr(tmpReg, 7 - os::vm_page_size());
2689         masm.movptr(Address(boxReg, 0), tmpReg);
2690         if (_counters != NULL) {
2691            masm.cond_inc32(Assembler::equal,
2692                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2693         }
2694         masm.jmp   (DONE_LABEL) ;
2695 
2696         masm.bind  (IsInflated) ;
2697         // It's inflated
2698 
2699         // TODO: someday avoid the ST-before-CAS penalty by
2700         // relocating (deferring) the following ST.
2701         // We should also think about trying a CAS without having
2702         // fetched _owner.  If the CAS is successful we may
2703         // avoid an RTO->RTS upgrade on the $line.
2704         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2705         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2706 
2707         masm.mov    (boxReg, tmpReg) ;
2708         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2709         masm.testptr(tmpReg, tmpReg) ;
2710         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2711 
2712         // It's inflated and appears unlocked
2713         if (os::is_MP()) { masm.lock(); }
2714         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2715         // Intentional fall-through into DONE_LABEL ...
2716 
2717         masm.bind  (DONE_LABEL) ;
2718         masm.nop   () ;                 // avoid jmp to jmp
2719     }
2720   %}
2721 
2722   // obj: object to unlock
2723   // box: box address (displaced header location), killed
2724   // RBX: killed tmp; cannot be obj nor box
2725   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2726   %{
2727 
2728     Register objReg = as_Register($obj$$reg);
2729     Register boxReg = as_Register($box$$reg);
2730     Register tmpReg = as_Register($tmp$$reg);
2731     MacroAssembler masm(&cbuf);
2732 
2733     if (EmitSync & 4) {
2734        masm.cmpptr(rsp, 0) ;
2735     } else
2736     if (EmitSync & 8) {
2737        Label DONE_LABEL;
2738        if (UseBiasedLocking) {
2739          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2740        }
2741 
2742        // Check whether the displaced header is 0
2743        //(=> recursive unlock)
2744        masm.movptr(tmpReg, Address(boxReg, 0));
2745        masm.testptr(tmpReg, tmpReg);
2746        masm.jcc(Assembler::zero, DONE_LABEL);
2747 
2748        // If not recursive lock, reset the header to displaced header
2749        if (os::is_MP()) {
2750          masm.lock();
2751        }
2752        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2753        masm.bind(DONE_LABEL);
2754        masm.nop(); // avoid branch to branch
2755     } else {
2756        Label DONE_LABEL, Stacked, CheckSucc ;
2757 
2758        if (UseBiasedLocking && !UseOptoBiasInlining) {
2759          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2760        }
2761 
2762        masm.movptr(tmpReg, Address(objReg, 0)) ;
2763        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
2764        masm.jcc   (Assembler::zero, DONE_LABEL) ;
2765        masm.testl (tmpReg, 0x02) ;
2766        masm.jcc   (Assembler::zero, Stacked) ;
2767 
2768        // It's inflated
2769        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2770        masm.xorptr(boxReg, r15_thread) ;
2771        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
2772        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
2773        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
2774        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
2775        masm.jcc   (Assembler::notZero, CheckSucc) ;
2776        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2777        masm.jmp   (DONE_LABEL) ;
2778 
2779        if ((EmitSync & 65536) == 0) {
2780          Label LSuccess, LGoSlowPath ;
2781          masm.bind  (CheckSucc) ;
2782          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2783          masm.jcc   (Assembler::zero, LGoSlowPath) ;
2784 
2785          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
2786          // the explicit ST;MEMBAR combination, but masm doesn't currently support
2787          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
2788          // are all faster when the write buffer is populated.
2789          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2790          if (os::is_MP()) {
2791             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
2792          }
2793          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
2794          masm.jcc   (Assembler::notZero, LSuccess) ;
2795 
2796          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
2797          if (os::is_MP()) { masm.lock(); }
2798          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
2799          masm.jcc   (Assembler::notEqual, LSuccess) ;
2800          // Intentional fall-through into slow-path
2801 
2802          masm.bind  (LGoSlowPath) ;
2803          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
2804          masm.jmp   (DONE_LABEL) ;
2805 
2806          masm.bind  (LSuccess) ;
2807          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
2808          masm.jmp   (DONE_LABEL) ;
2809        }
2810 
2811        masm.bind  (Stacked) ;
2812        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
2813        if (os::is_MP()) { masm.lock(); }
2814        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2815 
2816        if (EmitSync & 65536) {
2817           masm.bind (CheckSucc) ;
2818        }
2819        masm.bind(DONE_LABEL);
2820        if (EmitSync & 32768) {
2821           masm.nop();                      // avoid branch to branch
2822        }
2823     }
2824   %}
2825 
2826 
2827   enc_class enc_rethrow()
2828   %{
2829     cbuf.set_insts_mark();
2830     emit_opcode(cbuf, 0xE9); // jmp entry
2831     emit_d32_reloc(cbuf,
2832                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2833                    runtime_call_Relocation::spec(),
2834                    RELOC_DISP32);
2835   %}
2836 
2837 %}
2838 
2839 
2840 
2841 //----------FRAME--------------------------------------------------------------
2842 // Definition of frame structure and management information.
2843 //
2844 //  S T A C K   L A Y O U T    Allocators stack-slot number
2845 //                             |   (to get allocators register number
2846 //  G  Owned by    |        |  v    add OptoReg::stack0())
2847 //  r   CALLER     |        |
2848 //  o     |        +--------+      pad to even-align allocators stack-slot
2849 //  w     V        |  pad0  |        numbers; owned by CALLER
2850 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2851 //  h     ^        |   in   |  5
2852 //        |        |  args  |  4   Holes in incoming args owned by SELF
2853 //  |     |        |        |  3
2854 //  |     |        +--------+
2855 //  V     |        | old out|      Empty on Intel, window on Sparc
2856 //        |    old |preserve|      Must be even aligned.
2857 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2858 //        |        |   in   |  3   area for Intel ret address
2859 //     Owned by    |preserve|      Empty on Sparc.
2860 //       SELF      +--------+
2861 //        |        |  pad2  |  2   pad to align old SP
2862 //        |        +--------+  1
2863 //        |        | locks  |  0
2864 //        |        +--------+----> OptoReg::stack0(), even aligned
2865 //        |        |  pad1  | 11   pad to align new SP
2866 //        |        +--------+
2867 //        |        |        | 10
2868 //        |        | spills |  9   spills
2869 //        V        |        |  8   (pad0 slot for callee)
2870 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2871 //        ^        |  out   |  7
2872 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2873 //     Owned by    +--------+
2874 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2875 //        |    new |preserve|      Must be even-aligned.
2876 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2877 //        |        |        |
2878 //
2879 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2880 //         known from SELF's arguments and the Java calling convention.
2881 //         Region 6-7 is determined per call site.
2882 // Note 2: If the calling convention leaves holes in the incoming argument
2883 //         area, those holes are owned by SELF.  Holes in the outgoing area
2884 //         are owned by the CALLEE.  Holes should not be nessecary in the
2885 //         incoming area, as the Java calling convention is completely under
2886 //         the control of the AD file.  Doubles can be sorted and packed to
2887 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2888 //         varargs C calling conventions.
2889 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2890 //         even aligned with pad0 as needed.
2891 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2892 //         region 6-11 is even aligned; it may be padded out more so that
2893 //         the region from SP to FP meets the minimum stack alignment.
2894 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2895 //         alignment.  Region 11, pad1, may be dynamically extended so that
2896 //         SP meets the minimum alignment.
2897 
2898 frame
2899 %{
2900   // What direction does stack grow in (assumed to be same for C & Java)
2901   stack_direction(TOWARDS_LOW);
2902 
2903   // These three registers define part of the calling convention
2904   // between compiled code and the interpreter.
2905   inline_cache_reg(RAX);                // Inline Cache Register
2906   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2907                                         // calling interpreter
2908 
2909   // Optional: name the operand used by cisc-spilling to access
2910   // [stack_pointer + offset]
2911   cisc_spilling_operand_name(indOffset32);
2912 
2913   // Number of stack slots consumed by locking an object
2914   sync_stack_slots(2);
2915 
2916   // Compiled code's Frame Pointer
2917   frame_pointer(RSP);
2918 
2919   // Interpreter stores its frame pointer in a register which is
2920   // stored to the stack by I2CAdaptors.
2921   // I2CAdaptors convert from interpreted java to compiled java.
2922   interpreter_frame_pointer(RBP);
2923 
2924   // Stack alignment requirement
2925   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2926 
2927   // Number of stack slots between incoming argument block and the start of
2928   // a new frame.  The PROLOG must add this many slots to the stack.  The
2929   // EPILOG must remove this many slots.  amd64 needs two slots for
2930   // return address.
2931   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2932 
2933   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2934   // for calls to C.  Supports the var-args backing area for register parms.
2935   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2936 
2937   // The after-PROLOG location of the return address.  Location of
2938   // return address specifies a type (REG or STACK) and a number
2939   // representing the register number (i.e. - use a register name) or
2940   // stack slot.
2941   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2942   // Otherwise, it is above the locks and verification slot and alignment word
2943   return_addr(STACK - 2 +
2944               round_to((Compile::current()->in_preserve_stack_slots() +
2945                         Compile::current()->fixed_slots()),
2946                        stack_alignment_in_slots()));
2947 
2948   // Body of function which returns an integer array locating
2949   // arguments either in registers or in stack slots.  Passed an array
2950   // of ideal registers called "sig" and a "length" count.  Stack-slot
2951   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2952   // arguments for a CALLEE.  Incoming stack arguments are
2953   // automatically biased by the preserve_stack_slots field above.
2954 
2955   calling_convention
2956   %{
2957     // No difference between ingoing/outgoing just pass false
2958     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2959   %}
2960 
2961   c_calling_convention
2962   %{
2963     // This is obviously always outgoing
2964     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2965   %}
2966 
2967   // Location of compiled Java return values.  Same as C for now.
2968   return_value
2969   %{
2970     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2971            "only return normal values");
2972 
2973     static const int lo[Op_RegL + 1] = {
2974       0,
2975       0,
2976       RAX_num,  // Op_RegN
2977       RAX_num,  // Op_RegI
2978       RAX_num,  // Op_RegP
2979       XMM0_num, // Op_RegF
2980       XMM0_num, // Op_RegD
2981       RAX_num   // Op_RegL
2982     };
2983     static const int hi[Op_RegL + 1] = {
2984       0,
2985       0,
2986       OptoReg::Bad, // Op_RegN
2987       OptoReg::Bad, // Op_RegI
2988       RAX_H_num,    // Op_RegP
2989       OptoReg::Bad, // Op_RegF
2990       XMM0b_num,    // Op_RegD
2991       RAX_H_num     // Op_RegL
2992     };
2993     // Excluded flags and vector registers.
2994     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
2995     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2996   %}
2997 %}
2998 
2999 //----------ATTRIBUTES---------------------------------------------------------
3000 //----------Operand Attributes-------------------------------------------------
3001 op_attrib op_cost(0);        // Required cost attribute
3002 
3003 //----------Instruction Attributes---------------------------------------------
3004 ins_attrib ins_cost(100);       // Required cost attribute
3005 ins_attrib ins_size(8);         // Required size attribute (in bits)
3006 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3007                                 // a non-matching short branch variant
3008                                 // of some long branch?
3009 ins_attrib ins_alignment(1);    // Required alignment attribute (must
3010                                 // be a power of 2) specifies the
3011                                 // alignment that some part of the
3012                                 // instruction (not necessarily the
3013                                 // start) requires.  If > 1, a
3014                                 // compute_padding() function must be
3015                                 // provided for the instruction
3016 
3017 //----------OPERANDS-----------------------------------------------------------
3018 // Operand definitions must precede instruction definitions for correct parsing
3019 // in the ADLC because operands constitute user defined types which are used in
3020 // instruction definitions.
3021 
3022 //----------Simple Operands----------------------------------------------------
3023 // Immediate Operands
3024 // Integer Immediate
3025 operand immI()
3026 %{
3027   match(ConI);
3028 
3029   op_cost(10);
3030   format %{ %}
3031   interface(CONST_INTER);
3032 %}
3033 
3034 // Constant for test vs zero
3035 operand immI0()
3036 %{
3037   predicate(n->get_int() == 0);
3038   match(ConI);
3039 
3040   op_cost(0);
3041   format %{ %}
3042   interface(CONST_INTER);
3043 %}
3044 
3045 // Constant for increment
3046 operand immI1()
3047 %{
3048   predicate(n->get_int() == 1);
3049   match(ConI);
3050 
3051   op_cost(0);
3052   format %{ %}
3053   interface(CONST_INTER);
3054 %}
3055 
3056 // Constant for decrement
3057 operand immI_M1()
3058 %{
3059   predicate(n->get_int() == -1);
3060   match(ConI);
3061 
3062   op_cost(0);
3063   format %{ %}
3064   interface(CONST_INTER);
3065 %}
3066 
3067 // Valid scale values for addressing modes
3068 operand immI2()
3069 %{
3070   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3071   match(ConI);
3072 
3073   format %{ %}
3074   interface(CONST_INTER);
3075 %}
3076 
3077 operand immI8()
3078 %{
3079   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3080   match(ConI);
3081 
3082   op_cost(5);
3083   format %{ %}
3084   interface(CONST_INTER);
3085 %}
3086 
3087 operand immI16()
3088 %{
3089   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3090   match(ConI);
3091 
3092   op_cost(10);
3093   format %{ %}
3094   interface(CONST_INTER);
3095 %}
3096 
3097 // Int Immediate non-negative
3098 operand immU31()
3099 %{
3100   predicate(n->get_int() >= 0);
3101   match(ConI);
3102 
3103   op_cost(0);
3104   format %{ %}
3105   interface(CONST_INTER);
3106 %}
3107 
3108 // Constant for long shifts
3109 operand immI_32()
3110 %{
3111   predicate( n->get_int() == 32 );
3112   match(ConI);
3113 
3114   op_cost(0);
3115   format %{ %}
3116   interface(CONST_INTER);
3117 %}
3118 
3119 // Constant for long shifts
3120 operand immI_64()
3121 %{
3122   predicate( n->get_int() == 64 );
3123   match(ConI);
3124 
3125   op_cost(0);
3126   format %{ %}
3127   interface(CONST_INTER);
3128 %}
3129 
3130 // Pointer Immediate
3131 operand immP()
3132 %{
3133   match(ConP);
3134 
3135   op_cost(10);
3136   format %{ %}
3137   interface(CONST_INTER);
3138 %}
3139 
3140 // NULL Pointer Immediate
3141 operand immP0()
3142 %{
3143   predicate(n->get_ptr() == 0);
3144   match(ConP);
3145 
3146   op_cost(5);
3147   format %{ %}
3148   interface(CONST_INTER);
3149 %}
3150 
3151 // Pointer Immediate
3152 operand immN() %{
3153   match(ConN);
3154 
3155   op_cost(10);
3156   format %{ %}
3157   interface(CONST_INTER);
3158 %}
3159 
3160 operand immNKlass() %{
3161   match(ConNKlass);
3162 
3163   op_cost(10);
3164   format %{ %}
3165   interface(CONST_INTER);
3166 %}
3167 
3168 // NULL Pointer Immediate
3169 operand immN0() %{
3170   predicate(n->get_narrowcon() == 0);
3171   match(ConN);
3172 
3173   op_cost(5);
3174   format %{ %}
3175   interface(CONST_INTER);
3176 %}
3177 
3178 operand immP31()
3179 %{
3180   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3181             && (n->get_ptr() >> 31) == 0);
3182   match(ConP);
3183 
3184   op_cost(5);
3185   format %{ %}
3186   interface(CONST_INTER);
3187 %}
3188 
3189 
3190 // Long Immediate
3191 operand immL()
3192 %{
3193   match(ConL);
3194 
3195   op_cost(20);
3196   format %{ %}
3197   interface(CONST_INTER);
3198 %}
3199 
3200 // Long Immediate 8-bit
3201 operand immL8()
3202 %{
3203   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3204   match(ConL);
3205 
3206   op_cost(5);
3207   format %{ %}
3208   interface(CONST_INTER);
3209 %}
3210 
3211 // Long Immediate 32-bit unsigned
3212 operand immUL32()
3213 %{
3214   predicate(n->get_long() == (unsigned int) (n->get_long()));
3215   match(ConL);
3216 
3217   op_cost(10);
3218   format %{ %}
3219   interface(CONST_INTER);
3220 %}
3221 
3222 // Long Immediate 32-bit signed
3223 operand immL32()
3224 %{
3225   predicate(n->get_long() == (int) (n->get_long()));
3226   match(ConL);
3227 
3228   op_cost(15);
3229   format %{ %}
3230   interface(CONST_INTER);
3231 %}
3232 
3233 // Long Immediate zero
3234 operand immL0()
3235 %{
3236   predicate(n->get_long() == 0L);
3237   match(ConL);
3238 
3239   op_cost(10);
3240   format %{ %}
3241   interface(CONST_INTER);
3242 %}
3243 
3244 // Constant for increment
3245 operand immL1()
3246 %{
3247   predicate(n->get_long() == 1);
3248   match(ConL);
3249 
3250   format %{ %}
3251   interface(CONST_INTER);
3252 %}
3253 
3254 // Constant for decrement
3255 operand immL_M1()
3256 %{
3257   predicate(n->get_long() == -1);
3258   match(ConL);
3259 
3260   format %{ %}
3261   interface(CONST_INTER);
3262 %}
3263 
3264 // Long Immediate: the value 10
3265 operand immL10()
3266 %{
3267   predicate(n->get_long() == 10);
3268   match(ConL);
3269 
3270   format %{ %}
3271   interface(CONST_INTER);
3272 %}
3273 
3274 // Long immediate from 0 to 127.
3275 // Used for a shorter form of long mul by 10.
3276 operand immL_127()
3277 %{
3278   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3279   match(ConL);
3280 
3281   op_cost(10);
3282   format %{ %}
3283   interface(CONST_INTER);
3284 %}
3285 
3286 // Long Immediate: low 32-bit mask
3287 operand immL_32bits()
3288 %{
3289   predicate(n->get_long() == 0xFFFFFFFFL);
3290   match(ConL);
3291   op_cost(20);
3292 
3293   format %{ %}
3294   interface(CONST_INTER);
3295 %}
3296 
3297 // Float Immediate zero
3298 operand immF0()
3299 %{
3300   predicate(jint_cast(n->getf()) == 0);
3301   match(ConF);
3302 
3303   op_cost(5);
3304   format %{ %}
3305   interface(CONST_INTER);
3306 %}
3307 
3308 // Float Immediate
3309 operand immF()
3310 %{
3311   match(ConF);
3312 
3313   op_cost(15);
3314   format %{ %}
3315   interface(CONST_INTER);
3316 %}
3317 
3318 // Double Immediate zero
3319 operand immD0()
3320 %{
3321   predicate(jlong_cast(n->getd()) == 0);
3322   match(ConD);
3323 
3324   op_cost(5);
3325   format %{ %}
3326   interface(CONST_INTER);
3327 %}
3328 
3329 // Double Immediate
3330 operand immD()
3331 %{
3332   match(ConD);
3333 
3334   op_cost(15);
3335   format %{ %}
3336   interface(CONST_INTER);
3337 %}
3338 
3339 // Immediates for special shifts (sign extend)
3340 
3341 // Constants for increment
3342 operand immI_16()
3343 %{
3344   predicate(n->get_int() == 16);
3345   match(ConI);
3346 
3347   format %{ %}
3348   interface(CONST_INTER);
3349 %}
3350 
3351 operand immI_24()
3352 %{
3353   predicate(n->get_int() == 24);
3354   match(ConI);
3355 
3356   format %{ %}
3357   interface(CONST_INTER);
3358 %}
3359 
3360 // Constant for byte-wide masking
3361 operand immI_255()
3362 %{
3363   predicate(n->get_int() == 255);
3364   match(ConI);
3365 
3366   format %{ %}
3367   interface(CONST_INTER);
3368 %}
3369 
3370 // Constant for short-wide masking
3371 operand immI_65535()
3372 %{
3373   predicate(n->get_int() == 65535);
3374   match(ConI);
3375 
3376   format %{ %}
3377   interface(CONST_INTER);
3378 %}
3379 
3380 // Constant for byte-wide masking
3381 operand immL_255()
3382 %{
3383   predicate(n->get_long() == 255);
3384   match(ConL);
3385 
3386   format %{ %}
3387   interface(CONST_INTER);
3388 %}
3389 
3390 // Constant for short-wide masking
3391 operand immL_65535()
3392 %{
3393   predicate(n->get_long() == 65535);
3394   match(ConL);
3395 
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 // Register Operands
3401 // Integer Register
3402 operand rRegI()
3403 %{
3404   constraint(ALLOC_IN_RC(int_reg));
3405   match(RegI);
3406 
3407   match(rax_RegI);
3408   match(rbx_RegI);
3409   match(rcx_RegI);
3410   match(rdx_RegI);
3411   match(rdi_RegI);
3412 
3413   format %{ %}
3414   interface(REG_INTER);
3415 %}
3416 
3417 // Special Registers
3418 operand rax_RegI()
3419 %{
3420   constraint(ALLOC_IN_RC(int_rax_reg));
3421   match(RegI);
3422   match(rRegI);
3423 
3424   format %{ "RAX" %}
3425   interface(REG_INTER);
3426 %}
3427 
3428 // Special Registers
3429 operand rbx_RegI()
3430 %{
3431   constraint(ALLOC_IN_RC(int_rbx_reg));
3432   match(RegI);
3433   match(rRegI);
3434 
3435   format %{ "RBX" %}
3436   interface(REG_INTER);
3437 %}
3438 
3439 operand rcx_RegI()
3440 %{
3441   constraint(ALLOC_IN_RC(int_rcx_reg));
3442   match(RegI);
3443   match(rRegI);
3444 
3445   format %{ "RCX" %}
3446   interface(REG_INTER);
3447 %}
3448 
3449 operand rdx_RegI()
3450 %{
3451   constraint(ALLOC_IN_RC(int_rdx_reg));
3452   match(RegI);
3453   match(rRegI);
3454 
3455   format %{ "RDX" %}
3456   interface(REG_INTER);
3457 %}
3458 
3459 operand rdi_RegI()
3460 %{
3461   constraint(ALLOC_IN_RC(int_rdi_reg));
3462   match(RegI);
3463   match(rRegI);
3464 
3465   format %{ "RDI" %}
3466   interface(REG_INTER);
3467 %}
3468 
3469 operand no_rcx_RegI()
3470 %{
3471   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3472   match(RegI);
3473   match(rax_RegI);
3474   match(rbx_RegI);
3475   match(rdx_RegI);
3476   match(rdi_RegI);
3477 
3478   format %{ %}
3479   interface(REG_INTER);
3480 %}
3481 
3482 operand no_rax_rdx_RegI()
3483 %{
3484   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3485   match(RegI);
3486   match(rbx_RegI);
3487   match(rcx_RegI);
3488   match(rdi_RegI);
3489 
3490   format %{ %}
3491   interface(REG_INTER);
3492 %}
3493 
3494 // Pointer Register
3495 operand any_RegP()
3496 %{
3497   constraint(ALLOC_IN_RC(any_reg));
3498   match(RegP);
3499   match(rax_RegP);
3500   match(rbx_RegP);
3501   match(rdi_RegP);
3502   match(rsi_RegP);
3503   match(rbp_RegP);
3504   match(r15_RegP);
3505   match(rRegP);
3506 
3507   format %{ %}
3508   interface(REG_INTER);
3509 %}
3510 
3511 operand rRegP()
3512 %{
3513   constraint(ALLOC_IN_RC(ptr_reg));
3514   match(RegP);
3515   match(rax_RegP);
3516   match(rbx_RegP);
3517   match(rdi_RegP);
3518   match(rsi_RegP);
3519   match(rbp_RegP);
3520   match(r15_RegP);  // See Q&A below about r15_RegP.
3521 
3522   format %{ %}
3523   interface(REG_INTER);
3524 %}
3525 
3526 operand rRegN() %{
3527   constraint(ALLOC_IN_RC(int_reg));
3528   match(RegN);
3529 
3530   format %{ %}
3531   interface(REG_INTER);
3532 %}
3533 
3534 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3535 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3536 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3537 // The output of an instruction is controlled by the allocator, which respects
3538 // register class masks, not match rules.  Unless an instruction mentions
3539 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3540 // by the allocator as an input.
3541 
3542 operand no_rax_RegP()
3543 %{
3544   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3545   match(RegP);
3546   match(rbx_RegP);
3547   match(rsi_RegP);
3548   match(rdi_RegP);
3549 
3550   format %{ %}
3551   interface(REG_INTER);
3552 %}
3553 
3554 operand no_rbp_RegP()
3555 %{
3556   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3557   match(RegP);
3558   match(rbx_RegP);
3559   match(rsi_RegP);
3560   match(rdi_RegP);
3561 
3562   format %{ %}
3563   interface(REG_INTER);
3564 %}
3565 
3566 operand no_rax_rbx_RegP()
3567 %{
3568   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3569   match(RegP);
3570   match(rsi_RegP);
3571   match(rdi_RegP);
3572 
3573   format %{ %}
3574   interface(REG_INTER);
3575 %}
3576 
3577 // Special Registers
3578 // Return a pointer value
3579 operand rax_RegP()
3580 %{
3581   constraint(ALLOC_IN_RC(ptr_rax_reg));
3582   match(RegP);
3583   match(rRegP);
3584 
3585   format %{ %}
3586   interface(REG_INTER);
3587 %}
3588 
3589 // Special Registers
3590 // Return a compressed pointer value
3591 operand rax_RegN()
3592 %{
3593   constraint(ALLOC_IN_RC(int_rax_reg));
3594   match(RegN);
3595   match(rRegN);
3596 
3597   format %{ %}
3598   interface(REG_INTER);
3599 %}
3600 
3601 // Used in AtomicAdd
3602 operand rbx_RegP()
3603 %{
3604   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3605   match(RegP);
3606   match(rRegP);
3607 
3608   format %{ %}
3609   interface(REG_INTER);
3610 %}
3611 
3612 operand rsi_RegP()
3613 %{
3614   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3615   match(RegP);
3616   match(rRegP);
3617 
3618   format %{ %}
3619   interface(REG_INTER);
3620 %}
3621 
3622 // Used in rep stosq
3623 operand rdi_RegP()
3624 %{
3625   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3626   match(RegP);
3627   match(rRegP);
3628 
3629   format %{ %}
3630   interface(REG_INTER);
3631 %}
3632 
3633 operand rbp_RegP()
3634 %{
3635   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3636   match(RegP);
3637   match(rRegP);
3638 
3639   format %{ %}
3640   interface(REG_INTER);
3641 %}
3642 
3643 operand r15_RegP()
3644 %{
3645   constraint(ALLOC_IN_RC(ptr_r15_reg));
3646   match(RegP);
3647   match(rRegP);
3648 
3649   format %{ %}
3650   interface(REG_INTER);
3651 %}
3652 
3653 operand rRegL()
3654 %{
3655   constraint(ALLOC_IN_RC(long_reg));
3656   match(RegL);
3657   match(rax_RegL);
3658   match(rdx_RegL);
3659 
3660   format %{ %}
3661   interface(REG_INTER);
3662 %}
3663 
3664 // Special Registers
3665 operand no_rax_rdx_RegL()
3666 %{
3667   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3668   match(RegL);
3669   match(rRegL);
3670 
3671   format %{ %}
3672   interface(REG_INTER);
3673 %}
3674 
3675 operand no_rax_RegL()
3676 %{
3677   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3678   match(RegL);
3679   match(rRegL);
3680   match(rdx_RegL);
3681 
3682   format %{ %}
3683   interface(REG_INTER);
3684 %}
3685 
3686 operand no_rcx_RegL()
3687 %{
3688   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3689   match(RegL);
3690   match(rRegL);
3691 
3692   format %{ %}
3693   interface(REG_INTER);
3694 %}
3695 
3696 operand rax_RegL()
3697 %{
3698   constraint(ALLOC_IN_RC(long_rax_reg));
3699   match(RegL);
3700   match(rRegL);
3701 
3702   format %{ "RAX" %}
3703   interface(REG_INTER);
3704 %}
3705 
3706 operand rcx_RegL()
3707 %{
3708   constraint(ALLOC_IN_RC(long_rcx_reg));
3709   match(RegL);
3710   match(rRegL);
3711 
3712   format %{ %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 operand rdx_RegL()
3717 %{
3718   constraint(ALLOC_IN_RC(long_rdx_reg));
3719   match(RegL);
3720   match(rRegL);
3721 
3722   format %{ %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 // Flags register, used as output of compare instructions
3727 operand rFlagsReg()
3728 %{
3729   constraint(ALLOC_IN_RC(int_flags));
3730   match(RegFlags);
3731 
3732   format %{ "RFLAGS" %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 // Flags register, used as output of FLOATING POINT compare instructions
3737 operand rFlagsRegU()
3738 %{
3739   constraint(ALLOC_IN_RC(int_flags));
3740   match(RegFlags);
3741 
3742   format %{ "RFLAGS_U" %}
3743   interface(REG_INTER);
3744 %}
3745 
3746 operand rFlagsRegUCF() %{
3747   constraint(ALLOC_IN_RC(int_flags));
3748   match(RegFlags);
3749   predicate(false);
3750 
3751   format %{ "RFLAGS_U_CF" %}
3752   interface(REG_INTER);
3753 %}
3754 
3755 // Float register operands
3756 operand regF()
3757 %{
3758   constraint(ALLOC_IN_RC(float_reg));
3759   match(RegF);
3760 
3761   format %{ %}
3762   interface(REG_INTER);
3763 %}
3764 
3765 // Double register operands
3766 operand regD()
3767 %{
3768   constraint(ALLOC_IN_RC(double_reg));
3769   match(RegD);
3770 
3771   format %{ %}
3772   interface(REG_INTER);
3773 %}
3774 
3775 //----------Memory Operands----------------------------------------------------
3776 // Direct Memory Operand
3777 // operand direct(immP addr)
3778 // %{
3779 //   match(addr);
3780 
3781 //   format %{ "[$addr]" %}
3782 //   interface(MEMORY_INTER) %{
3783 //     base(0xFFFFFFFF);
3784 //     index(0x4);
3785 //     scale(0x0);
3786 //     disp($addr);
3787 //   %}
3788 // %}
3789 
3790 // Indirect Memory Operand
3791 operand indirect(any_RegP reg)
3792 %{
3793   constraint(ALLOC_IN_RC(ptr_reg));
3794   match(reg);
3795 
3796   format %{ "[$reg]" %}
3797   interface(MEMORY_INTER) %{
3798     base($reg);
3799     index(0x4);
3800     scale(0x0);
3801     disp(0x0);
3802   %}
3803 %}
3804 
3805 // Indirect Memory Plus Short Offset Operand
3806 operand indOffset8(any_RegP reg, immL8 off)
3807 %{
3808   constraint(ALLOC_IN_RC(ptr_reg));
3809   match(AddP reg off);
3810 
3811   format %{ "[$reg + $off (8-bit)]" %}
3812   interface(MEMORY_INTER) %{
3813     base($reg);
3814     index(0x4);
3815     scale(0x0);
3816     disp($off);
3817   %}
3818 %}
3819 
3820 // Indirect Memory Plus Long Offset Operand
3821 operand indOffset32(any_RegP reg, immL32 off)
3822 %{
3823   constraint(ALLOC_IN_RC(ptr_reg));
3824   match(AddP reg off);
3825 
3826   format %{ "[$reg + $off (32-bit)]" %}
3827   interface(MEMORY_INTER) %{
3828     base($reg);
3829     index(0x4);
3830     scale(0x0);
3831     disp($off);
3832   %}
3833 %}
3834 
3835 // Indirect Memory Plus Index Register Plus Offset Operand
3836 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3837 %{
3838   constraint(ALLOC_IN_RC(ptr_reg));
3839   match(AddP (AddP reg lreg) off);
3840 
3841   op_cost(10);
3842   format %{"[$reg + $off + $lreg]" %}
3843   interface(MEMORY_INTER) %{
3844     base($reg);
3845     index($lreg);
3846     scale(0x0);
3847     disp($off);
3848   %}
3849 %}
3850 
3851 // Indirect Memory Plus Index Register Plus Offset Operand
3852 operand indIndex(any_RegP reg, rRegL lreg)
3853 %{
3854   constraint(ALLOC_IN_RC(ptr_reg));
3855   match(AddP reg lreg);
3856 
3857   op_cost(10);
3858   format %{"[$reg + $lreg]" %}
3859   interface(MEMORY_INTER) %{
3860     base($reg);
3861     index($lreg);
3862     scale(0x0);
3863     disp(0x0);
3864   %}
3865 %}
3866 
3867 // Indirect Memory Times Scale Plus Index Register
3868 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3869 %{
3870   constraint(ALLOC_IN_RC(ptr_reg));
3871   match(AddP reg (LShiftL lreg scale));
3872 
3873   op_cost(10);
3874   format %{"[$reg + $lreg << $scale]" %}
3875   interface(MEMORY_INTER) %{
3876     base($reg);
3877     index($lreg);
3878     scale($scale);
3879     disp(0x0);
3880   %}
3881 %}
3882 
3883 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3884 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3885 %{
3886   constraint(ALLOC_IN_RC(ptr_reg));
3887   match(AddP (AddP reg (LShiftL lreg scale)) off);
3888 
3889   op_cost(10);
3890   format %{"[$reg + $off + $lreg << $scale]" %}
3891   interface(MEMORY_INTER) %{
3892     base($reg);
3893     index($lreg);
3894     scale($scale);
3895     disp($off);
3896   %}
3897 %}
3898 
3899 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3900 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3901 %{
3902   constraint(ALLOC_IN_RC(ptr_reg));
3903   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3904   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3905 
3906   op_cost(10);
3907   format %{"[$reg + $off + $idx << $scale]" %}
3908   interface(MEMORY_INTER) %{
3909     base($reg);
3910     index($idx);
3911     scale($scale);
3912     disp($off);
3913   %}
3914 %}
3915 
3916 // Indirect Narrow Oop Plus Offset Operand
3917 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3918 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3919 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3920   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3921   constraint(ALLOC_IN_RC(ptr_reg));
3922   match(AddP (DecodeN reg) off);
3923 
3924   op_cost(10);
3925   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3926   interface(MEMORY_INTER) %{
3927     base(0xc); // R12
3928     index($reg);
3929     scale(0x3);
3930     disp($off);
3931   %}
3932 %}
3933 
3934 // Indirect Memory Operand
3935 operand indirectNarrow(rRegN reg)
3936 %{
3937   predicate(Universe::narrow_oop_shift() == 0);
3938   constraint(ALLOC_IN_RC(ptr_reg));
3939   match(DecodeN reg);
3940 
3941   format %{ "[$reg]" %}
3942   interface(MEMORY_INTER) %{
3943     base($reg);
3944     index(0x4);
3945     scale(0x0);
3946     disp(0x0);
3947   %}
3948 %}
3949 
3950 // Indirect Memory Plus Short Offset Operand
3951 operand indOffset8Narrow(rRegN reg, immL8 off)
3952 %{
3953   predicate(Universe::narrow_oop_shift() == 0);
3954   constraint(ALLOC_IN_RC(ptr_reg));
3955   match(AddP (DecodeN reg) off);
3956 
3957   format %{ "[$reg + $off (8-bit)]" %}
3958   interface(MEMORY_INTER) %{
3959     base($reg);
3960     index(0x4);
3961     scale(0x0);
3962     disp($off);
3963   %}
3964 %}
3965 
3966 // Indirect Memory Plus Long Offset Operand
3967 operand indOffset32Narrow(rRegN reg, immL32 off)
3968 %{
3969   predicate(Universe::narrow_oop_shift() == 0);
3970   constraint(ALLOC_IN_RC(ptr_reg));
3971   match(AddP (DecodeN reg) off);
3972 
3973   format %{ "[$reg + $off (32-bit)]" %}
3974   interface(MEMORY_INTER) %{
3975     base($reg);
3976     index(0x4);
3977     scale(0x0);
3978     disp($off);
3979   %}
3980 %}
3981 
3982 // Indirect Memory Plus Index Register Plus Offset Operand
3983 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3984 %{
3985   predicate(Universe::narrow_oop_shift() == 0);
3986   constraint(ALLOC_IN_RC(ptr_reg));
3987   match(AddP (AddP (DecodeN reg) lreg) off);
3988 
3989   op_cost(10);
3990   format %{"[$reg + $off + $lreg]" %}
3991   interface(MEMORY_INTER) %{
3992     base($reg);
3993     index($lreg);
3994     scale(0x0);
3995     disp($off);
3996   %}
3997 %}
3998 
3999 // Indirect Memory Plus Index Register Plus Offset Operand
4000 operand indIndexNarrow(rRegN reg, rRegL lreg)
4001 %{
4002   predicate(Universe::narrow_oop_shift() == 0);
4003   constraint(ALLOC_IN_RC(ptr_reg));
4004   match(AddP (DecodeN reg) lreg);
4005 
4006   op_cost(10);
4007   format %{"[$reg + $lreg]" %}
4008   interface(MEMORY_INTER) %{
4009     base($reg);
4010     index($lreg);
4011     scale(0x0);
4012     disp(0x0);
4013   %}
4014 %}
4015 
4016 // Indirect Memory Times Scale Plus Index Register
4017 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4018 %{
4019   predicate(Universe::narrow_oop_shift() == 0);
4020   constraint(ALLOC_IN_RC(ptr_reg));
4021   match(AddP (DecodeN reg) (LShiftL lreg scale));
4022 
4023   op_cost(10);
4024   format %{"[$reg + $lreg << $scale]" %}
4025   interface(MEMORY_INTER) %{
4026     base($reg);
4027     index($lreg);
4028     scale($scale);
4029     disp(0x0);
4030   %}
4031 %}
4032 
4033 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4034 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4035 %{
4036   predicate(Universe::narrow_oop_shift() == 0);
4037   constraint(ALLOC_IN_RC(ptr_reg));
4038   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4039 
4040   op_cost(10);
4041   format %{"[$reg + $off + $lreg << $scale]" %}
4042   interface(MEMORY_INTER) %{
4043     base($reg);
4044     index($lreg);
4045     scale($scale);
4046     disp($off);
4047   %}
4048 %}
4049 
4050 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4051 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4052 %{
4053   constraint(ALLOC_IN_RC(ptr_reg));
4054   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4055   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4056 
4057   op_cost(10);
4058   format %{"[$reg + $off + $idx << $scale]" %}
4059   interface(MEMORY_INTER) %{
4060     base($reg);
4061     index($idx);
4062     scale($scale);
4063     disp($off);
4064   %}
4065 %}
4066 
4067 //----------Special Memory Operands--------------------------------------------
4068 // Stack Slot Operand - This operand is used for loading and storing temporary
4069 //                      values on the stack where a match requires a value to
4070 //                      flow through memory.
4071 operand stackSlotP(sRegP reg)
4072 %{
4073   constraint(ALLOC_IN_RC(stack_slots));
4074   // No match rule because this operand is only generated in matching
4075 
4076   format %{ "[$reg]" %}
4077   interface(MEMORY_INTER) %{
4078     base(0x4);   // RSP
4079     index(0x4);  // No Index
4080     scale(0x0);  // No Scale
4081     disp($reg);  // Stack Offset
4082   %}
4083 %}
4084 
4085 operand stackSlotI(sRegI reg)
4086 %{
4087   constraint(ALLOC_IN_RC(stack_slots));
4088   // No match rule because this operand is only generated in matching
4089 
4090   format %{ "[$reg]" %}
4091   interface(MEMORY_INTER) %{
4092     base(0x4);   // RSP
4093     index(0x4);  // No Index
4094     scale(0x0);  // No Scale
4095     disp($reg);  // Stack Offset
4096   %}
4097 %}
4098 
4099 operand stackSlotF(sRegF reg)
4100 %{
4101   constraint(ALLOC_IN_RC(stack_slots));
4102   // No match rule because this operand is only generated in matching
4103 
4104   format %{ "[$reg]" %}
4105   interface(MEMORY_INTER) %{
4106     base(0x4);   // RSP
4107     index(0x4);  // No Index
4108     scale(0x0);  // No Scale
4109     disp($reg);  // Stack Offset
4110   %}
4111 %}
4112 
4113 operand stackSlotD(sRegD reg)
4114 %{
4115   constraint(ALLOC_IN_RC(stack_slots));
4116   // No match rule because this operand is only generated in matching
4117 
4118   format %{ "[$reg]" %}
4119   interface(MEMORY_INTER) %{
4120     base(0x4);   // RSP
4121     index(0x4);  // No Index
4122     scale(0x0);  // No Scale
4123     disp($reg);  // Stack Offset
4124   %}
4125 %}
4126 operand stackSlotL(sRegL reg)
4127 %{
4128   constraint(ALLOC_IN_RC(stack_slots));
4129   // No match rule because this operand is only generated in matching
4130 
4131   format %{ "[$reg]" %}
4132   interface(MEMORY_INTER) %{
4133     base(0x4);   // RSP
4134     index(0x4);  // No Index
4135     scale(0x0);  // No Scale
4136     disp($reg);  // Stack Offset
4137   %}
4138 %}
4139 
4140 //----------Conditional Branch Operands----------------------------------------
4141 // Comparison Op  - This is the operation of the comparison, and is limited to
4142 //                  the following set of codes:
4143 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4144 //
4145 // Other attributes of the comparison, such as unsignedness, are specified
4146 // by the comparison instruction that sets a condition code flags register.
4147 // That result is represented by a flags operand whose subtype is appropriate
4148 // to the unsignedness (etc.) of the comparison.
4149 //
4150 // Later, the instruction which matches both the Comparison Op (a Bool) and
4151 // the flags (produced by the Cmp) specifies the coding of the comparison op
4152 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4153 
4154 // Comparision Code
4155 operand cmpOp()
4156 %{
4157   match(Bool);
4158 
4159   format %{ "" %}
4160   interface(COND_INTER) %{
4161     equal(0x4, "e");
4162     not_equal(0x5, "ne");
4163     less(0xC, "l");
4164     greater_equal(0xD, "ge");
4165     less_equal(0xE, "le");
4166     greater(0xF, "g");
4167     overflow(0x0, "o");
4168     no_overflow(0x1, "no");
4169   %}
4170 %}
4171 
4172 // Comparison Code, unsigned compare.  Used by FP also, with
4173 // C2 (unordered) turned into GT or LT already.  The other bits
4174 // C0 and C3 are turned into Carry & Zero flags.
4175 operand cmpOpU()
4176 %{
4177   match(Bool);
4178 
4179   format %{ "" %}
4180   interface(COND_INTER) %{
4181     equal(0x4, "e");
4182     not_equal(0x5, "ne");
4183     less(0x2, "b");
4184     greater_equal(0x3, "nb");
4185     less_equal(0x6, "be");
4186     greater(0x7, "nbe");
4187     overflow(0x0, "o");
4188     no_overflow(0x1, "no");
4189   %}
4190 %}
4191 
4192 
4193 // Floating comparisons that don't require any fixup for the unordered case
4194 operand cmpOpUCF() %{
4195   match(Bool);
4196   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4197             n->as_Bool()->_test._test == BoolTest::ge ||
4198             n->as_Bool()->_test._test == BoolTest::le ||
4199             n->as_Bool()->_test._test == BoolTest::gt);
4200   format %{ "" %}
4201   interface(COND_INTER) %{
4202     equal(0x4, "e");
4203     not_equal(0x5, "ne");
4204     less(0x2, "b");
4205     greater_equal(0x3, "nb");
4206     less_equal(0x6, "be");
4207     greater(0x7, "nbe");
4208     overflow(0x0, "o");
4209     no_overflow(0x1, "no");
4210   %}
4211 %}
4212 
4213 
4214 // Floating comparisons that can be fixed up with extra conditional jumps
4215 operand cmpOpUCF2() %{
4216   match(Bool);
4217   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4218             n->as_Bool()->_test._test == BoolTest::eq);
4219   format %{ "" %}
4220   interface(COND_INTER) %{
4221     equal(0x4, "e");
4222     not_equal(0x5, "ne");
4223     less(0x2, "b");
4224     greater_equal(0x3, "nb");
4225     less_equal(0x6, "be");
4226     greater(0x7, "nbe");
4227     overflow(0x0, "o");
4228     no_overflow(0x1, "no");
4229   %}
4230 %}
4231 
4232 
4233 //----------OPERAND CLASSES----------------------------------------------------
4234 // Operand Classes are groups of operands that are used as to simplify
4235 // instruction definitions by not requiring the AD writer to specify separate
4236 // instructions for every form of operand when the instruction accepts
4237 // multiple operand types with the same basic encoding and format.  The classic
4238 // case of this is memory operands.
4239 
4240 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4241                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4242                indCompressedOopOffset,
4243                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4244                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4245                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
4246 
4247 //----------PIPELINE-----------------------------------------------------------
4248 // Rules which define the behavior of the target architectures pipeline.
4249 pipeline %{
4250 
4251 //----------ATTRIBUTES---------------------------------------------------------
4252 attributes %{
4253   variable_size_instructions;        // Fixed size instructions
4254   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4255   instruction_unit_size = 1;         // An instruction is 1 bytes long
4256   instruction_fetch_unit_size = 16;  // The processor fetches one line
4257   instruction_fetch_units = 1;       // of 16 bytes
4258 
4259   // List of nop instructions
4260   nops( MachNop );
4261 %}
4262 
4263 //----------RESOURCES----------------------------------------------------------
4264 // Resources are the functional units available to the machine
4265 
4266 // Generic P2/P3 pipeline
4267 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4268 // 3 instructions decoded per cycle.
4269 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4270 // 3 ALU op, only ALU0 handles mul instructions.
4271 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4272            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4273            BR, FPU,
4274            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4275 
4276 //----------PIPELINE DESCRIPTION-----------------------------------------------
4277 // Pipeline Description specifies the stages in the machine's pipeline
4278 
4279 // Generic P2/P3 pipeline
4280 pipe_desc(S0, S1, S2, S3, S4, S5);
4281 
4282 //----------PIPELINE CLASSES---------------------------------------------------
4283 // Pipeline Classes describe the stages in which input and output are
4284 // referenced by the hardware pipeline.
4285 
4286 // Naming convention: ialu or fpu
4287 // Then: _reg
4288 // Then: _reg if there is a 2nd register
4289 // Then: _long if it's a pair of instructions implementing a long
4290 // Then: _fat if it requires the big decoder
4291 //   Or: _mem if it requires the big decoder and a memory unit.
4292 
4293 // Integer ALU reg operation
4294 pipe_class ialu_reg(rRegI dst)
4295 %{
4296     single_instruction;
4297     dst    : S4(write);
4298     dst    : S3(read);
4299     DECODE : S0;        // any decoder
4300     ALU    : S3;        // any alu
4301 %}
4302 
4303 // Long ALU reg operation
4304 pipe_class ialu_reg_long(rRegL dst)
4305 %{
4306     instruction_count(2);
4307     dst    : S4(write);
4308     dst    : S3(read);
4309     DECODE : S0(2);     // any 2 decoders
4310     ALU    : S3(2);     // both alus
4311 %}
4312 
4313 // Integer ALU reg operation using big decoder
4314 pipe_class ialu_reg_fat(rRegI dst)
4315 %{
4316     single_instruction;
4317     dst    : S4(write);
4318     dst    : S3(read);
4319     D0     : S0;        // big decoder only
4320     ALU    : S3;        // any alu
4321 %}
4322 
4323 // Long ALU reg operation using big decoder
4324 pipe_class ialu_reg_long_fat(rRegL dst)
4325 %{
4326     instruction_count(2);
4327     dst    : S4(write);
4328     dst    : S3(read);
4329     D0     : S0(2);     // big decoder only; twice
4330     ALU    : S3(2);     // any 2 alus
4331 %}
4332 
4333 // Integer ALU reg-reg operation
4334 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4335 %{
4336     single_instruction;
4337     dst    : S4(write);
4338     src    : S3(read);
4339     DECODE : S0;        // any decoder
4340     ALU    : S3;        // any alu
4341 %}
4342 
4343 // Long ALU reg-reg operation
4344 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4345 %{
4346     instruction_count(2);
4347     dst    : S4(write);
4348     src    : S3(read);
4349     DECODE : S0(2);     // any 2 decoders
4350     ALU    : S3(2);     // both alus
4351 %}
4352 
4353 // Integer ALU reg-reg operation
4354 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4355 %{
4356     single_instruction;
4357     dst    : S4(write);
4358     src    : S3(read);
4359     D0     : S0;        // big decoder only
4360     ALU    : S3;        // any alu
4361 %}
4362 
4363 // Long ALU reg-reg operation
4364 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4365 %{
4366     instruction_count(2);
4367     dst    : S4(write);
4368     src    : S3(read);
4369     D0     : S0(2);     // big decoder only; twice
4370     ALU    : S3(2);     // both alus
4371 %}
4372 
4373 // Integer ALU reg-mem operation
4374 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4375 %{
4376     single_instruction;
4377     dst    : S5(write);
4378     mem    : S3(read);
4379     D0     : S0;        // big decoder only
4380     ALU    : S4;        // any alu
4381     MEM    : S3;        // any mem
4382 %}
4383 
4384 // Integer mem operation (prefetch)
4385 pipe_class ialu_mem(memory mem)
4386 %{
4387     single_instruction;
4388     mem    : S3(read);
4389     D0     : S0;        // big decoder only
4390     MEM    : S3;        // any mem
4391 %}
4392 
4393 // Integer Store to Memory
4394 pipe_class ialu_mem_reg(memory mem, rRegI src)
4395 %{
4396     single_instruction;
4397     mem    : S3(read);
4398     src    : S5(read);
4399     D0     : S0;        // big decoder only
4400     ALU    : S4;        // any alu
4401     MEM    : S3;
4402 %}
4403 
4404 // // Long Store to Memory
4405 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4406 // %{
4407 //     instruction_count(2);
4408 //     mem    : S3(read);
4409 //     src    : S5(read);
4410 //     D0     : S0(2);          // big decoder only; twice
4411 //     ALU    : S4(2);     // any 2 alus
4412 //     MEM    : S3(2);  // Both mems
4413 // %}
4414 
4415 // Integer Store to Memory
4416 pipe_class ialu_mem_imm(memory mem)
4417 %{
4418     single_instruction;
4419     mem    : S3(read);
4420     D0     : S0;        // big decoder only
4421     ALU    : S4;        // any alu
4422     MEM    : S3;
4423 %}
4424 
4425 // Integer ALU0 reg-reg operation
4426 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4427 %{
4428     single_instruction;
4429     dst    : S4(write);
4430     src    : S3(read);
4431     D0     : S0;        // Big decoder only
4432     ALU0   : S3;        // only alu0
4433 %}
4434 
4435 // Integer ALU0 reg-mem operation
4436 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4437 %{
4438     single_instruction;
4439     dst    : S5(write);
4440     mem    : S3(read);
4441     D0     : S0;        // big decoder only
4442     ALU0   : S4;        // ALU0 only
4443     MEM    : S3;        // any mem
4444 %}
4445 
4446 // Integer ALU reg-reg operation
4447 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4448 %{
4449     single_instruction;
4450     cr     : S4(write);
4451     src1   : S3(read);
4452     src2   : S3(read);
4453     DECODE : S0;        // any decoder
4454     ALU    : S3;        // any alu
4455 %}
4456 
4457 // Integer ALU reg-imm operation
4458 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4459 %{
4460     single_instruction;
4461     cr     : S4(write);
4462     src1   : S3(read);
4463     DECODE : S0;        // any decoder
4464     ALU    : S3;        // any alu
4465 %}
4466 
4467 // Integer ALU reg-mem operation
4468 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4469 %{
4470     single_instruction;
4471     cr     : S4(write);
4472     src1   : S3(read);
4473     src2   : S3(read);
4474     D0     : S0;        // big decoder only
4475     ALU    : S4;        // any alu
4476     MEM    : S3;
4477 %}
4478 
4479 // Conditional move reg-reg
4480 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4481 %{
4482     instruction_count(4);
4483     y      : S4(read);
4484     q      : S3(read);
4485     p      : S3(read);
4486     DECODE : S0(4);     // any decoder
4487 %}
4488 
4489 // Conditional move reg-reg
4490 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4491 %{
4492     single_instruction;
4493     dst    : S4(write);
4494     src    : S3(read);
4495     cr     : S3(read);
4496     DECODE : S0;        // any decoder
4497 %}
4498 
4499 // Conditional move reg-mem
4500 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4501 %{
4502     single_instruction;
4503     dst    : S4(write);
4504     src    : S3(read);
4505     cr     : S3(read);
4506     DECODE : S0;        // any decoder
4507     MEM    : S3;
4508 %}
4509 
4510 // Conditional move reg-reg long
4511 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4512 %{
4513     single_instruction;
4514     dst    : S4(write);
4515     src    : S3(read);
4516     cr     : S3(read);
4517     DECODE : S0(2);     // any 2 decoders
4518 %}
4519 
4520 // XXX
4521 // // Conditional move double reg-reg
4522 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4523 // %{
4524 //     single_instruction;
4525 //     dst    : S4(write);
4526 //     src    : S3(read);
4527 //     cr     : S3(read);
4528 //     DECODE : S0;     // any decoder
4529 // %}
4530 
4531 // Float reg-reg operation
4532 pipe_class fpu_reg(regD dst)
4533 %{
4534     instruction_count(2);
4535     dst    : S3(read);
4536     DECODE : S0(2);     // any 2 decoders
4537     FPU    : S3;
4538 %}
4539 
4540 // Float reg-reg operation
4541 pipe_class fpu_reg_reg(regD dst, regD src)
4542 %{
4543     instruction_count(2);
4544     dst    : S4(write);
4545     src    : S3(read);
4546     DECODE : S0(2);     // any 2 decoders
4547     FPU    : S3;
4548 %}
4549 
4550 // Float reg-reg operation
4551 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4552 %{
4553     instruction_count(3);
4554     dst    : S4(write);
4555     src1   : S3(read);
4556     src2   : S3(read);
4557     DECODE : S0(3);     // any 3 decoders
4558     FPU    : S3(2);
4559 %}
4560 
4561 // Float reg-reg operation
4562 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4563 %{
4564     instruction_count(4);
4565     dst    : S4(write);
4566     src1   : S3(read);
4567     src2   : S3(read);
4568     src3   : S3(read);
4569     DECODE : S0(4);     // any 3 decoders
4570     FPU    : S3(2);
4571 %}
4572 
4573 // Float reg-reg operation
4574 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4575 %{
4576     instruction_count(4);
4577     dst    : S4(write);
4578     src1   : S3(read);
4579     src2   : S3(read);
4580     src3   : S3(read);
4581     DECODE : S1(3);     // any 3 decoders
4582     D0     : S0;        // Big decoder only
4583     FPU    : S3(2);
4584     MEM    : S3;
4585 %}
4586 
4587 // Float reg-mem operation
4588 pipe_class fpu_reg_mem(regD dst, memory mem)
4589 %{
4590     instruction_count(2);
4591     dst    : S5(write);
4592     mem    : S3(read);
4593     D0     : S0;        // big decoder only
4594     DECODE : S1;        // any decoder for FPU POP
4595     FPU    : S4;
4596     MEM    : S3;        // any mem
4597 %}
4598 
4599 // Float reg-mem operation
4600 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4601 %{
4602     instruction_count(3);
4603     dst    : S5(write);
4604     src1   : S3(read);
4605     mem    : S3(read);
4606     D0     : S0;        // big decoder only
4607     DECODE : S1(2);     // any decoder for FPU POP
4608     FPU    : S4;
4609     MEM    : S3;        // any mem
4610 %}
4611 
4612 // Float mem-reg operation
4613 pipe_class fpu_mem_reg(memory mem, regD src)
4614 %{
4615     instruction_count(2);
4616     src    : S5(read);
4617     mem    : S3(read);
4618     DECODE : S0;        // any decoder for FPU PUSH
4619     D0     : S1;        // big decoder only
4620     FPU    : S4;
4621     MEM    : S3;        // any mem
4622 %}
4623 
4624 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4625 %{
4626     instruction_count(3);
4627     src1   : S3(read);
4628     src2   : S3(read);
4629     mem    : S3(read);
4630     DECODE : S0(2);     // any decoder for FPU PUSH
4631     D0     : S1;        // big decoder only
4632     FPU    : S4;
4633     MEM    : S3;        // any mem
4634 %}
4635 
4636 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4637 %{
4638     instruction_count(3);
4639     src1   : S3(read);
4640     src2   : S3(read);
4641     mem    : S4(read);
4642     DECODE : S0;        // any decoder for FPU PUSH
4643     D0     : S0(2);     // big decoder only
4644     FPU    : S4;
4645     MEM    : S3(2);     // any mem
4646 %}
4647 
4648 pipe_class fpu_mem_mem(memory dst, memory src1)
4649 %{
4650     instruction_count(2);
4651     src1   : S3(read);
4652     dst    : S4(read);
4653     D0     : S0(2);     // big decoder only
4654     MEM    : S3(2);     // any mem
4655 %}
4656 
4657 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4658 %{
4659     instruction_count(3);
4660     src1   : S3(read);
4661     src2   : S3(read);
4662     dst    : S4(read);
4663     D0     : S0(3);     // big decoder only
4664     FPU    : S4;
4665     MEM    : S3(3);     // any mem
4666 %}
4667 
4668 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4669 %{
4670     instruction_count(3);
4671     src1   : S4(read);
4672     mem    : S4(read);
4673     DECODE : S0;        // any decoder for FPU PUSH
4674     D0     : S0(2);     // big decoder only
4675     FPU    : S4;
4676     MEM    : S3(2);     // any mem
4677 %}
4678 
4679 // Float load constant
4680 pipe_class fpu_reg_con(regD dst)
4681 %{
4682     instruction_count(2);
4683     dst    : S5(write);
4684     D0     : S0;        // big decoder only for the load
4685     DECODE : S1;        // any decoder for FPU POP
4686     FPU    : S4;
4687     MEM    : S3;        // any mem
4688 %}
4689 
4690 // Float load constant
4691 pipe_class fpu_reg_reg_con(regD dst, regD src)
4692 %{
4693     instruction_count(3);
4694     dst    : S5(write);
4695     src    : S3(read);
4696     D0     : S0;        // big decoder only for the load
4697     DECODE : S1(2);     // any decoder for FPU POP
4698     FPU    : S4;
4699     MEM    : S3;        // any mem
4700 %}
4701 
4702 // UnConditional branch
4703 pipe_class pipe_jmp(label labl)
4704 %{
4705     single_instruction;
4706     BR   : S3;
4707 %}
4708 
4709 // Conditional branch
4710 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4711 %{
4712     single_instruction;
4713     cr    : S1(read);
4714     BR    : S3;
4715 %}
4716 
4717 // Allocation idiom
4718 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4719 %{
4720     instruction_count(1); force_serialization;
4721     fixed_latency(6);
4722     heap_ptr : S3(read);
4723     DECODE   : S0(3);
4724     D0       : S2;
4725     MEM      : S3;
4726     ALU      : S3(2);
4727     dst      : S5(write);
4728     BR       : S5;
4729 %}
4730 
4731 // Generic big/slow expanded idiom
4732 pipe_class pipe_slow()
4733 %{
4734     instruction_count(10); multiple_bundles; force_serialization;
4735     fixed_latency(100);
4736     D0  : S0(2);
4737     MEM : S3(2);
4738 %}
4739 
4740 // The real do-nothing guy
4741 pipe_class empty()
4742 %{
4743     instruction_count(0);
4744 %}
4745 
4746 // Define the class for the Nop node
4747 define
4748 %{
4749    MachNop = empty;
4750 %}
4751 
4752 %}
4753 
4754 //----------INSTRUCTIONS-------------------------------------------------------
4755 //
4756 // match      -- States which machine-independent subtree may be replaced
4757 //               by this instruction.
4758 // ins_cost   -- The estimated cost of this instruction is used by instruction
4759 //               selection to identify a minimum cost tree of machine
4760 //               instructions that matches a tree of machine-independent
4761 //               instructions.
4762 // format     -- A string providing the disassembly for this instruction.
4763 //               The value of an instruction's operand may be inserted
4764 //               by referring to it with a '$' prefix.
4765 // opcode     -- Three instruction opcodes may be provided.  These are referred
4766 //               to within an encode class as $primary, $secondary, and $tertiary
4767 //               rrspectively.  The primary opcode is commonly used to
4768 //               indicate the type of machine instruction, while secondary
4769 //               and tertiary are often used for prefix options or addressing
4770 //               modes.
4771 // ins_encode -- A list of encode classes with parameters. The encode class
4772 //               name must have been defined in an 'enc_class' specification
4773 //               in the encode section of the architecture description.
4774 
4775 
4776 //----------Load/Store/Move Instructions---------------------------------------
4777 //----------Load Instructions--------------------------------------------------
4778 
4779 // Load Byte (8 bit signed)
4780 instruct loadB(rRegI dst, memory mem)
4781 %{
4782   match(Set dst (LoadB mem));
4783 
4784   ins_cost(125);
4785   format %{ "movsbl  $dst, $mem\t# byte" %}
4786 
4787   ins_encode %{
4788     __ movsbl($dst$$Register, $mem$$Address);
4789   %}
4790 
4791   ins_pipe(ialu_reg_mem);
4792 %}
4793 
4794 // Load Byte (8 bit signed) into Long Register
4795 instruct loadB2L(rRegL dst, memory mem)
4796 %{
4797   match(Set dst (ConvI2L (LoadB mem)));
4798 
4799   ins_cost(125);
4800   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4801 
4802   ins_encode %{
4803     __ movsbq($dst$$Register, $mem$$Address);
4804   %}
4805 
4806   ins_pipe(ialu_reg_mem);
4807 %}
4808 
4809 // Load Unsigned Byte (8 bit UNsigned)
4810 instruct loadUB(rRegI dst, memory mem)
4811 %{
4812   match(Set dst (LoadUB mem));
4813 
4814   ins_cost(125);
4815   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4816 
4817   ins_encode %{
4818     __ movzbl($dst$$Register, $mem$$Address);
4819   %}
4820 
4821   ins_pipe(ialu_reg_mem);
4822 %}
4823 
4824 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4825 instruct loadUB2L(rRegL dst, memory mem)
4826 %{
4827   match(Set dst (ConvI2L (LoadUB mem)));
4828 
4829   ins_cost(125);
4830   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4831 
4832   ins_encode %{
4833     __ movzbq($dst$$Register, $mem$$Address);
4834   %}
4835 
4836   ins_pipe(ialu_reg_mem);
4837 %}
4838 
4839 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
4840 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
4841   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4842   effect(KILL cr);
4843 
4844   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
4845             "andl    $dst, $mask" %}
4846   ins_encode %{
4847     Register Rdst = $dst$$Register;
4848     __ movzbq(Rdst, $mem$$Address);
4849     __ andl(Rdst, $mask$$constant);
4850   %}
4851   ins_pipe(ialu_reg_mem);
4852 %}
4853 
4854 // Load Short (16 bit signed)
4855 instruct loadS(rRegI dst, memory mem)
4856 %{
4857   match(Set dst (LoadS mem));
4858 
4859   ins_cost(125);
4860   format %{ "movswl $dst, $mem\t# short" %}
4861 
4862   ins_encode %{
4863     __ movswl($dst$$Register, $mem$$Address);
4864   %}
4865 
4866   ins_pipe(ialu_reg_mem);
4867 %}
4868 
4869 // Load Short (16 bit signed) to Byte (8 bit signed)
4870 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4871   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4872 
4873   ins_cost(125);
4874   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4875   ins_encode %{
4876     __ movsbl($dst$$Register, $mem$$Address);
4877   %}
4878   ins_pipe(ialu_reg_mem);
4879 %}
4880 
4881 // Load Short (16 bit signed) into Long Register
4882 instruct loadS2L(rRegL dst, memory mem)
4883 %{
4884   match(Set dst (ConvI2L (LoadS mem)));
4885 
4886   ins_cost(125);
4887   format %{ "movswq $dst, $mem\t# short -> long" %}
4888 
4889   ins_encode %{
4890     __ movswq($dst$$Register, $mem$$Address);
4891   %}
4892 
4893   ins_pipe(ialu_reg_mem);
4894 %}
4895 
4896 // Load Unsigned Short/Char (16 bit UNsigned)
4897 instruct loadUS(rRegI dst, memory mem)
4898 %{
4899   match(Set dst (LoadUS mem));
4900 
4901   ins_cost(125);
4902   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
4903 
4904   ins_encode %{
4905     __ movzwl($dst$$Register, $mem$$Address);
4906   %}
4907 
4908   ins_pipe(ialu_reg_mem);
4909 %}
4910 
4911 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
4912 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4913   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
4914 
4915   ins_cost(125);
4916   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
4917   ins_encode %{
4918     __ movsbl($dst$$Register, $mem$$Address);
4919   %}
4920   ins_pipe(ialu_reg_mem);
4921 %}
4922 
4923 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
4924 instruct loadUS2L(rRegL dst, memory mem)
4925 %{
4926   match(Set dst (ConvI2L (LoadUS mem)));
4927 
4928   ins_cost(125);
4929   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
4930 
4931   ins_encode %{
4932     __ movzwq($dst$$Register, $mem$$Address);
4933   %}
4934 
4935   ins_pipe(ialu_reg_mem);
4936 %}
4937 
4938 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
4939 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
4940   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4941 
4942   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
4943   ins_encode %{
4944     __ movzbq($dst$$Register, $mem$$Address);
4945   %}
4946   ins_pipe(ialu_reg_mem);
4947 %}
4948 
4949 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
4950 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
4951   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4952   effect(KILL cr);
4953 
4954   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
4955             "andl    $dst, $mask" %}
4956   ins_encode %{
4957     Register Rdst = $dst$$Register;
4958     __ movzwq(Rdst, $mem$$Address);
4959     __ andl(Rdst, $mask$$constant);
4960   %}
4961   ins_pipe(ialu_reg_mem);
4962 %}
4963 
4964 // Load Integer
4965 instruct loadI(rRegI dst, memory mem)
4966 %{
4967   match(Set dst (LoadI mem));
4968 
4969   ins_cost(125);
4970   format %{ "movl    $dst, $mem\t# int" %}
4971 
4972   ins_encode %{
4973     __ movl($dst$$Register, $mem$$Address);
4974   %}
4975 
4976   ins_pipe(ialu_reg_mem);
4977 %}
4978 
4979 // Load Integer (32 bit signed) to Byte (8 bit signed)
4980 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4981   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
4982 
4983   ins_cost(125);
4984   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
4985   ins_encode %{
4986     __ movsbl($dst$$Register, $mem$$Address);
4987   %}
4988   ins_pipe(ialu_reg_mem);
4989 %}
4990 
4991 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
4992 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
4993   match(Set dst (AndI (LoadI mem) mask));
4994 
4995   ins_cost(125);
4996   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
4997   ins_encode %{
4998     __ movzbl($dst$$Register, $mem$$Address);
4999   %}
5000   ins_pipe(ialu_reg_mem);
5001 %}
5002 
5003 // Load Integer (32 bit signed) to Short (16 bit signed)
5004 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5005   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5006 
5007   ins_cost(125);
5008   format %{ "movswl  $dst, $mem\t# int -> short" %}
5009   ins_encode %{
5010     __ movswl($dst$$Register, $mem$$Address);
5011   %}
5012   ins_pipe(ialu_reg_mem);
5013 %}
5014 
5015 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5016 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5017   match(Set dst (AndI (LoadI mem) mask));
5018 
5019   ins_cost(125);
5020   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5021   ins_encode %{
5022     __ movzwl($dst$$Register, $mem$$Address);
5023   %}
5024   ins_pipe(ialu_reg_mem);
5025 %}
5026 
5027 // Load Integer into Long Register
5028 instruct loadI2L(rRegL dst, memory mem)
5029 %{
5030   match(Set dst (ConvI2L (LoadI mem)));
5031 
5032   ins_cost(125);
5033   format %{ "movslq  $dst, $mem\t# int -> long" %}
5034 
5035   ins_encode %{
5036     __ movslq($dst$$Register, $mem$$Address);
5037   %}
5038 
5039   ins_pipe(ialu_reg_mem);
5040 %}
5041 
5042 // Load Integer with mask 0xFF into Long Register
5043 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5044   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5045 
5046   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5047   ins_encode %{
5048     __ movzbq($dst$$Register, $mem$$Address);
5049   %}
5050   ins_pipe(ialu_reg_mem);
5051 %}
5052 
5053 // Load Integer with mask 0xFFFF into Long Register
5054 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5055   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5056 
5057   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5058   ins_encode %{
5059     __ movzwq($dst$$Register, $mem$$Address);
5060   %}
5061   ins_pipe(ialu_reg_mem);
5062 %}
5063 
5064 // Load Integer with a 31-bit mask into Long Register
5065 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5066   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5067   effect(KILL cr);
5068 
5069   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5070             "andl    $dst, $mask" %}
5071   ins_encode %{
5072     Register Rdst = $dst$$Register;
5073     __ movl(Rdst, $mem$$Address);
5074     __ andl(Rdst, $mask$$constant);
5075   %}
5076   ins_pipe(ialu_reg_mem);
5077 %}
5078 
5079 // Load Unsigned Integer into Long Register
5080 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) 
5081 %{
5082   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5083 
5084   ins_cost(125);
5085   format %{ "movl    $dst, $mem\t# uint -> long" %}
5086 
5087   ins_encode %{
5088     __ movl($dst$$Register, $mem$$Address);
5089   %}
5090 
5091   ins_pipe(ialu_reg_mem);
5092 %}
5093 
5094 // Load Long
5095 instruct loadL(rRegL dst, memory mem)
5096 %{
5097   match(Set dst (LoadL mem));
5098 
5099   ins_cost(125);
5100   format %{ "movq    $dst, $mem\t# long" %}
5101 
5102   ins_encode %{
5103     __ movq($dst$$Register, $mem$$Address);
5104   %}
5105 
5106   ins_pipe(ialu_reg_mem); // XXX
5107 %}
5108 
5109 // Load Range
5110 instruct loadRange(rRegI dst, memory mem)
5111 %{
5112   match(Set dst (LoadRange mem));
5113 
5114   ins_cost(125); // XXX
5115   format %{ "movl    $dst, $mem\t# range" %}
5116   opcode(0x8B);
5117   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5118   ins_pipe(ialu_reg_mem);
5119 %}
5120 
5121 // Load Pointer
5122 instruct loadP(rRegP dst, memory mem)
5123 %{
5124   match(Set dst (LoadP mem));
5125 
5126   ins_cost(125); // XXX
5127   format %{ "movq    $dst, $mem\t# ptr" %}
5128   opcode(0x8B);
5129   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5130   ins_pipe(ialu_reg_mem); // XXX
5131 %}
5132 
5133 // Load Compressed Pointer
5134 instruct loadN(rRegN dst, memory mem)
5135 %{
5136    match(Set dst (LoadN mem));
5137 
5138    ins_cost(125); // XXX
5139    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5140    ins_encode %{
5141      __ movl($dst$$Register, $mem$$Address);
5142    %}
5143    ins_pipe(ialu_reg_mem); // XXX
5144 %}
5145 
5146 
5147 // Load Klass Pointer
5148 instruct loadKlass(rRegP dst, memory mem)
5149 %{
5150   match(Set dst (LoadKlass mem));
5151 
5152   ins_cost(125); // XXX
5153   format %{ "movq    $dst, $mem\t# class" %}
5154   opcode(0x8B);
5155   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5156   ins_pipe(ialu_reg_mem); // XXX
5157 %}
5158 
5159 // Load narrow Klass Pointer
5160 instruct loadNKlass(rRegN dst, memory mem)
5161 %{
5162   match(Set dst (LoadNKlass mem));
5163 
5164   ins_cost(125); // XXX
5165   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5166   ins_encode %{
5167     __ movl($dst$$Register, $mem$$Address);
5168   %}
5169   ins_pipe(ialu_reg_mem); // XXX
5170 %}
5171 
5172 // Load Float
5173 instruct loadF(regF dst, memory mem)
5174 %{
5175   match(Set dst (LoadF mem));
5176 
5177   ins_cost(145); // XXX
5178   format %{ "movss   $dst, $mem\t# float" %}
5179   ins_encode %{
5180     __ movflt($dst$$XMMRegister, $mem$$Address);
5181   %}
5182   ins_pipe(pipe_slow); // XXX
5183 %}
5184 
5185 // Load Double
5186 instruct loadD_partial(regD dst, memory mem)
5187 %{
5188   predicate(!UseXmmLoadAndClearUpper);
5189   match(Set dst (LoadD mem));
5190 
5191   ins_cost(145); // XXX
5192   format %{ "movlpd  $dst, $mem\t# double" %}
5193   ins_encode %{
5194     __ movdbl($dst$$XMMRegister, $mem$$Address);
5195   %}
5196   ins_pipe(pipe_slow); // XXX
5197 %}
5198 
5199 instruct loadD(regD dst, memory mem)
5200 %{
5201   predicate(UseXmmLoadAndClearUpper);
5202   match(Set dst (LoadD mem));
5203 
5204   ins_cost(145); // XXX
5205   format %{ "movsd   $dst, $mem\t# double" %}
5206   ins_encode %{
5207     __ movdbl($dst$$XMMRegister, $mem$$Address);
5208   %}
5209   ins_pipe(pipe_slow); // XXX
5210 %}
5211 
5212 // Load Effective Address
5213 instruct leaP8(rRegP dst, indOffset8 mem)
5214 %{
5215   match(Set dst mem);
5216 
5217   ins_cost(110); // XXX
5218   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5219   opcode(0x8D);
5220   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5221   ins_pipe(ialu_reg_reg_fat);
5222 %}
5223 
5224 instruct leaP32(rRegP dst, indOffset32 mem)
5225 %{
5226   match(Set dst mem);
5227 
5228   ins_cost(110);
5229   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5230   opcode(0x8D);
5231   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5232   ins_pipe(ialu_reg_reg_fat);
5233 %}
5234 
5235 // instruct leaPIdx(rRegP dst, indIndex mem)
5236 // %{
5237 //   match(Set dst mem);
5238 
5239 //   ins_cost(110);
5240 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5241 //   opcode(0x8D);
5242 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5243 //   ins_pipe(ialu_reg_reg_fat);
5244 // %}
5245 
5246 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5247 %{
5248   match(Set dst mem);
5249 
5250   ins_cost(110);
5251   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5252   opcode(0x8D);
5253   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5254   ins_pipe(ialu_reg_reg_fat);
5255 %}
5256 
5257 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5258 %{
5259   match(Set dst mem);
5260 
5261   ins_cost(110);
5262   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5263   opcode(0x8D);
5264   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5265   ins_pipe(ialu_reg_reg_fat);
5266 %}
5267 
5268 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5269 %{
5270   match(Set dst mem);
5271 
5272   ins_cost(110);
5273   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5274   opcode(0x8D);
5275   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5276   ins_pipe(ialu_reg_reg_fat);
5277 %}
5278 
5279 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5280 %{
5281   match(Set dst mem);
5282 
5283   ins_cost(110);
5284   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5285   opcode(0x8D);
5286   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5287   ins_pipe(ialu_reg_reg_fat);
5288 %}
5289 
5290 // Load Effective Address which uses Narrow (32-bits) oop
5291 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5292 %{
5293   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5294   match(Set dst mem);
5295 
5296   ins_cost(110);
5297   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5298   opcode(0x8D);
5299   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5300   ins_pipe(ialu_reg_reg_fat);
5301 %}
5302 
5303 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5304 %{
5305   predicate(Universe::narrow_oop_shift() == 0);
5306   match(Set dst mem);
5307 
5308   ins_cost(110); // XXX
5309   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5310   opcode(0x8D);
5311   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5312   ins_pipe(ialu_reg_reg_fat);
5313 %}
5314 
5315 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5316 %{
5317   predicate(Universe::narrow_oop_shift() == 0);
5318   match(Set dst mem);
5319 
5320   ins_cost(110);
5321   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5322   opcode(0x8D);
5323   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5324   ins_pipe(ialu_reg_reg_fat);
5325 %}
5326 
5327 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5328 %{
5329   predicate(Universe::narrow_oop_shift() == 0);
5330   match(Set dst mem);
5331 
5332   ins_cost(110);
5333   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5334   opcode(0x8D);
5335   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5336   ins_pipe(ialu_reg_reg_fat);
5337 %}
5338 
5339 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5340 %{
5341   predicate(Universe::narrow_oop_shift() == 0);
5342   match(Set dst mem);
5343 
5344   ins_cost(110);
5345   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5346   opcode(0x8D);
5347   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5348   ins_pipe(ialu_reg_reg_fat);
5349 %}
5350 
5351 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5352 %{
5353   predicate(Universe::narrow_oop_shift() == 0);
5354   match(Set dst mem);
5355 
5356   ins_cost(110);
5357   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5358   opcode(0x8D);
5359   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5360   ins_pipe(ialu_reg_reg_fat);
5361 %}
5362 
5363 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5364 %{
5365   predicate(Universe::narrow_oop_shift() == 0);
5366   match(Set dst mem);
5367 
5368   ins_cost(110);
5369   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5370   opcode(0x8D);
5371   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5372   ins_pipe(ialu_reg_reg_fat);
5373 %}
5374 
5375 instruct loadConI(rRegI dst, immI src)
5376 %{
5377   match(Set dst src);
5378 
5379   format %{ "movl    $dst, $src\t# int" %}
5380   ins_encode(load_immI(dst, src));
5381   ins_pipe(ialu_reg_fat); // XXX
5382 %}
5383 
5384 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5385 %{
5386   match(Set dst src);
5387   effect(KILL cr);
5388 
5389   ins_cost(50);
5390   format %{ "xorl    $dst, $dst\t# int" %}
5391   opcode(0x33); /* + rd */
5392   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5393   ins_pipe(ialu_reg);
5394 %}
5395 
5396 instruct loadConL(rRegL dst, immL src)
5397 %{
5398   match(Set dst src);
5399 
5400   ins_cost(150);
5401   format %{ "movq    $dst, $src\t# long" %}
5402   ins_encode(load_immL(dst, src));
5403   ins_pipe(ialu_reg);
5404 %}
5405 
5406 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5407 %{
5408   match(Set dst src);
5409   effect(KILL cr);
5410 
5411   ins_cost(50);
5412   format %{ "xorl    $dst, $dst\t# long" %}
5413   opcode(0x33); /* + rd */
5414   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5415   ins_pipe(ialu_reg); // XXX
5416 %}
5417 
5418 instruct loadConUL32(rRegL dst, immUL32 src)
5419 %{
5420   match(Set dst src);
5421 
5422   ins_cost(60);
5423   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5424   ins_encode(load_immUL32(dst, src));
5425   ins_pipe(ialu_reg);
5426 %}
5427 
5428 instruct loadConL32(rRegL dst, immL32 src)
5429 %{
5430   match(Set dst src);
5431 
5432   ins_cost(70);
5433   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5434   ins_encode(load_immL32(dst, src));
5435   ins_pipe(ialu_reg);
5436 %}
5437 
5438 instruct loadConP(rRegP dst, immP con) %{
5439   match(Set dst con);
5440 
5441   format %{ "movq    $dst, $con\t# ptr" %}
5442   ins_encode(load_immP(dst, con));
5443   ins_pipe(ialu_reg_fat); // XXX
5444 %}
5445 
5446 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5447 %{
5448   match(Set dst src);
5449   effect(KILL cr);
5450 
5451   ins_cost(50);
5452   format %{ "xorl    $dst, $dst\t# ptr" %}
5453   opcode(0x33); /* + rd */
5454   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5455   ins_pipe(ialu_reg);
5456 %}
5457 
5458 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5459 %{
5460   match(Set dst src);
5461   effect(KILL cr);
5462 
5463   ins_cost(60);
5464   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5465   ins_encode(load_immP31(dst, src));
5466   ins_pipe(ialu_reg);
5467 %}
5468 
5469 instruct loadConF(regF dst, immF con) %{
5470   match(Set dst con);
5471   ins_cost(125);
5472   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5473   ins_encode %{
5474     __ movflt($dst$$XMMRegister, $constantaddress($con));
5475   %}
5476   ins_pipe(pipe_slow);
5477 %}
5478 
5479 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5480   match(Set dst src);
5481   effect(KILL cr);
5482   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5483   ins_encode %{
5484     __ xorq($dst$$Register, $dst$$Register);
5485   %}
5486   ins_pipe(ialu_reg);
5487 %}
5488 
5489 instruct loadConN(rRegN dst, immN src) %{
5490   match(Set dst src);
5491 
5492   ins_cost(125);
5493   format %{ "movl    $dst, $src\t# compressed ptr" %}
5494   ins_encode %{
5495     address con = (address)$src$$constant;
5496     if (con == NULL) {
5497       ShouldNotReachHere();
5498     } else {
5499       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5500     }
5501   %}
5502   ins_pipe(ialu_reg_fat); // XXX
5503 %}
5504 
5505 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5506   match(Set dst src);
5507 
5508   ins_cost(125);
5509   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5510   ins_encode %{
5511     address con = (address)$src$$constant;
5512     if (con == NULL) {
5513       ShouldNotReachHere();
5514     } else {
5515       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5516     }
5517   %}
5518   ins_pipe(ialu_reg_fat); // XXX
5519 %}
5520 
5521 instruct loadConF0(regF dst, immF0 src)
5522 %{
5523   match(Set dst src);
5524   ins_cost(100);
5525 
5526   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5527   ins_encode %{
5528     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5529   %}
5530   ins_pipe(pipe_slow);
5531 %}
5532 
5533 // Use the same format since predicate() can not be used here.
5534 instruct loadConD(regD dst, immD con) %{
5535   match(Set dst con);
5536   ins_cost(125);
5537   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5538   ins_encode %{
5539     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5540   %}
5541   ins_pipe(pipe_slow);
5542 %}
5543 
5544 instruct loadConD0(regD dst, immD0 src)
5545 %{
5546   match(Set dst src);
5547   ins_cost(100);
5548 
5549   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5550   ins_encode %{
5551     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5552   %}
5553   ins_pipe(pipe_slow);
5554 %}
5555 
5556 instruct loadSSI(rRegI dst, stackSlotI src)
5557 %{
5558   match(Set dst src);
5559 
5560   ins_cost(125);
5561   format %{ "movl    $dst, $src\t# int stk" %}
5562   opcode(0x8B);
5563   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5564   ins_pipe(ialu_reg_mem);
5565 %}
5566 
5567 instruct loadSSL(rRegL dst, stackSlotL src)
5568 %{
5569   match(Set dst src);
5570 
5571   ins_cost(125);
5572   format %{ "movq    $dst, $src\t# long stk" %}
5573   opcode(0x8B);
5574   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 instruct loadSSP(rRegP dst, stackSlotP src)
5579 %{
5580   match(Set dst src);
5581 
5582   ins_cost(125);
5583   format %{ "movq    $dst, $src\t# ptr stk" %}
5584   opcode(0x8B);
5585   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5586   ins_pipe(ialu_reg_mem);
5587 %}
5588 
5589 instruct loadSSF(regF dst, stackSlotF src)
5590 %{
5591   match(Set dst src);
5592 
5593   ins_cost(125);
5594   format %{ "movss   $dst, $src\t# float stk" %}
5595   ins_encode %{
5596     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5597   %}
5598   ins_pipe(pipe_slow); // XXX
5599 %}
5600 
5601 // Use the same format since predicate() can not be used here.
5602 instruct loadSSD(regD dst, stackSlotD src)
5603 %{
5604   match(Set dst src);
5605 
5606   ins_cost(125);
5607   format %{ "movsd   $dst, $src\t# double stk" %}
5608   ins_encode  %{
5609     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5610   %}
5611   ins_pipe(pipe_slow); // XXX
5612 %}
5613 
5614 // Prefetch instructions.
5615 // Must be safe to execute with invalid address (cannot fault).
5616 
5617 instruct prefetchr( memory mem ) %{
5618   predicate(ReadPrefetchInstr==3);
5619   match(PrefetchRead mem);
5620   ins_cost(125);
5621 
5622   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5623   ins_encode %{
5624     __ prefetchr($mem$$Address);
5625   %}
5626   ins_pipe(ialu_mem);
5627 %}
5628 
5629 instruct prefetchrNTA( memory mem ) %{
5630   predicate(ReadPrefetchInstr==0);
5631   match(PrefetchRead mem);
5632   ins_cost(125);
5633 
5634   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5635   ins_encode %{
5636     __ prefetchnta($mem$$Address);
5637   %}
5638   ins_pipe(ialu_mem);
5639 %}
5640 
5641 instruct prefetchrT0( memory mem ) %{
5642   predicate(ReadPrefetchInstr==1);
5643   match(PrefetchRead mem);
5644   ins_cost(125);
5645 
5646   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5647   ins_encode %{
5648     __ prefetcht0($mem$$Address);
5649   %}
5650   ins_pipe(ialu_mem);
5651 %}
5652 
5653 instruct prefetchrT2( memory mem ) %{
5654   predicate(ReadPrefetchInstr==2);
5655   match(PrefetchRead mem);
5656   ins_cost(125);
5657 
5658   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5659   ins_encode %{
5660     __ prefetcht2($mem$$Address);
5661   %}
5662   ins_pipe(ialu_mem);
5663 %}
5664 
5665 instruct prefetchwNTA( memory mem ) %{
5666   match(PrefetchWrite mem);
5667   ins_cost(125);
5668 
5669   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5670   ins_encode %{
5671     __ prefetchnta($mem$$Address);
5672   %}
5673   ins_pipe(ialu_mem);
5674 %}
5675 
5676 // Prefetch instructions for allocation.
5677 
5678 instruct prefetchAlloc( memory mem ) %{
5679   predicate(AllocatePrefetchInstr==3);
5680   match(PrefetchAllocation mem);
5681   ins_cost(125);
5682 
5683   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5684   ins_encode %{
5685     __ prefetchw($mem$$Address);
5686   %}
5687   ins_pipe(ialu_mem);
5688 %}
5689 
5690 instruct prefetchAllocNTA( memory mem ) %{
5691   predicate(AllocatePrefetchInstr==0);
5692   match(PrefetchAllocation mem);
5693   ins_cost(125);
5694 
5695   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5696   ins_encode %{
5697     __ prefetchnta($mem$$Address);
5698   %}
5699   ins_pipe(ialu_mem);
5700 %}
5701 
5702 instruct prefetchAllocT0( memory mem ) %{
5703   predicate(AllocatePrefetchInstr==1);
5704   match(PrefetchAllocation mem);
5705   ins_cost(125);
5706 
5707   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5708   ins_encode %{
5709     __ prefetcht0($mem$$Address);
5710   %}
5711   ins_pipe(ialu_mem);
5712 %}
5713 
5714 instruct prefetchAllocT2( memory mem ) %{
5715   predicate(AllocatePrefetchInstr==2);
5716   match(PrefetchAllocation mem);
5717   ins_cost(125);
5718 
5719   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5720   ins_encode %{
5721     __ prefetcht2($mem$$Address);
5722   %}
5723   ins_pipe(ialu_mem);
5724 %}
5725 
5726 //----------Store Instructions-------------------------------------------------
5727 
5728 // Store Byte
5729 instruct storeB(memory mem, rRegI src)
5730 %{
5731   match(Set mem (StoreB mem src));
5732 
5733   ins_cost(125); // XXX
5734   format %{ "movb    $mem, $src\t# byte" %}
5735   opcode(0x88);
5736   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5737   ins_pipe(ialu_mem_reg);
5738 %}
5739 
5740 // Store Char/Short
5741 instruct storeC(memory mem, rRegI src)
5742 %{
5743   match(Set mem (StoreC mem src));
5744 
5745   ins_cost(125); // XXX
5746   format %{ "movw    $mem, $src\t# char/short" %}
5747   opcode(0x89);
5748   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5749   ins_pipe(ialu_mem_reg);
5750 %}
5751 
5752 // Store Integer
5753 instruct storeI(memory mem, rRegI src)
5754 %{
5755   match(Set mem (StoreI mem src));
5756 
5757   ins_cost(125); // XXX
5758   format %{ "movl    $mem, $src\t# int" %}
5759   opcode(0x89);
5760   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5761   ins_pipe(ialu_mem_reg);
5762 %}
5763 
5764 // Store Long
5765 instruct storeL(memory mem, rRegL src)
5766 %{
5767   match(Set mem (StoreL mem src));
5768 
5769   ins_cost(125); // XXX
5770   format %{ "movq    $mem, $src\t# long" %}
5771   opcode(0x89);
5772   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5773   ins_pipe(ialu_mem_reg); // XXX
5774 %}
5775 
5776 // Store Pointer
5777 instruct storeP(memory mem, any_RegP src)
5778 %{
5779   match(Set mem (StoreP mem src));
5780 
5781   ins_cost(125); // XXX
5782   format %{ "movq    $mem, $src\t# ptr" %}
5783   opcode(0x89);
5784   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5785   ins_pipe(ialu_mem_reg);
5786 %}
5787 
5788 instruct storeImmP0(memory mem, immP0 zero)
5789 %{
5790   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5791   match(Set mem (StoreP mem zero));
5792 
5793   ins_cost(125); // XXX
5794   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5795   ins_encode %{
5796     __ movq($mem$$Address, r12);
5797   %}
5798   ins_pipe(ialu_mem_reg);
5799 %}
5800 
5801 // Store NULL Pointer, mark word, or other simple pointer constant.
5802 instruct storeImmP(memory mem, immP31 src)
5803 %{
5804   match(Set mem (StoreP mem src));
5805 
5806   ins_cost(150); // XXX
5807   format %{ "movq    $mem, $src\t# ptr" %}
5808   opcode(0xC7); /* C7 /0 */
5809   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5810   ins_pipe(ialu_mem_imm);
5811 %}
5812 
5813 // Store Compressed Pointer
5814 instruct storeN(memory mem, rRegN src)
5815 %{
5816   match(Set mem (StoreN mem src));
5817 
5818   ins_cost(125); // XXX
5819   format %{ "movl    $mem, $src\t# compressed ptr" %}
5820   ins_encode %{
5821     __ movl($mem$$Address, $src$$Register);
5822   %}
5823   ins_pipe(ialu_mem_reg);
5824 %}
5825 
5826 instruct storeNKlass(memory mem, rRegN src)
5827 %{
5828   match(Set mem (StoreNKlass mem src));
5829 
5830   ins_cost(125); // XXX
5831   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5832   ins_encode %{
5833     __ movl($mem$$Address, $src$$Register);
5834   %}
5835   ins_pipe(ialu_mem_reg);
5836 %}
5837 
5838 instruct storeImmN0(memory mem, immN0 zero)
5839 %{
5840   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5841   match(Set mem (StoreN mem zero));
5842 
5843   ins_cost(125); // XXX
5844   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5845   ins_encode %{
5846     __ movl($mem$$Address, r12);
5847   %}
5848   ins_pipe(ialu_mem_reg);
5849 %}
5850 
5851 instruct storeImmN(memory mem, immN src)
5852 %{
5853   match(Set mem (StoreN mem src));
5854 
5855   ins_cost(150); // XXX
5856   format %{ "movl    $mem, $src\t# compressed ptr" %}
5857   ins_encode %{
5858     address con = (address)$src$$constant;
5859     if (con == NULL) {
5860       __ movl($mem$$Address, (int32_t)0);
5861     } else {
5862       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5863     }
5864   %}
5865   ins_pipe(ialu_mem_imm);
5866 %}
5867 
5868 instruct storeImmNKlass(memory mem, immNKlass src)
5869 %{
5870   match(Set mem (StoreNKlass mem src));
5871 
5872   ins_cost(150); // XXX
5873   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5874   ins_encode %{
5875     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5876   %}
5877   ins_pipe(ialu_mem_imm);
5878 %}
5879 
5880 // Store Integer Immediate
5881 instruct storeImmI0(memory mem, immI0 zero)
5882 %{
5883   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5884   match(Set mem (StoreI mem zero));
5885 
5886   ins_cost(125); // XXX
5887   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
5888   ins_encode %{
5889     __ movl($mem$$Address, r12);
5890   %}
5891   ins_pipe(ialu_mem_reg);
5892 %}
5893 
5894 instruct storeImmI(memory mem, immI src)
5895 %{
5896   match(Set mem (StoreI mem src));
5897 
5898   ins_cost(150);
5899   format %{ "movl    $mem, $src\t# int" %}
5900   opcode(0xC7); /* C7 /0 */
5901   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5902   ins_pipe(ialu_mem_imm);
5903 %}
5904 
5905 // Store Long Immediate
5906 instruct storeImmL0(memory mem, immL0 zero)
5907 %{
5908   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5909   match(Set mem (StoreL mem zero));
5910 
5911   ins_cost(125); // XXX
5912   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
5913   ins_encode %{
5914     __ movq($mem$$Address, r12);
5915   %}
5916   ins_pipe(ialu_mem_reg);
5917 %}
5918 
5919 instruct storeImmL(memory mem, immL32 src)
5920 %{
5921   match(Set mem (StoreL mem src));
5922 
5923   ins_cost(150);
5924   format %{ "movq    $mem, $src\t# long" %}
5925   opcode(0xC7); /* C7 /0 */
5926   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5927   ins_pipe(ialu_mem_imm);
5928 %}
5929 
5930 // Store Short/Char Immediate
5931 instruct storeImmC0(memory mem, immI0 zero)
5932 %{
5933   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5934   match(Set mem (StoreC mem zero));
5935 
5936   ins_cost(125); // XXX
5937   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
5938   ins_encode %{
5939     __ movw($mem$$Address, r12);
5940   %}
5941   ins_pipe(ialu_mem_reg);
5942 %}
5943 
5944 instruct storeImmI16(memory mem, immI16 src)
5945 %{
5946   predicate(UseStoreImmI16);
5947   match(Set mem (StoreC mem src));
5948 
5949   ins_cost(150);
5950   format %{ "movw    $mem, $src\t# short/char" %}
5951   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
5952   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
5953   ins_pipe(ialu_mem_imm);
5954 %}
5955 
5956 // Store Byte Immediate
5957 instruct storeImmB0(memory mem, immI0 zero)
5958 %{
5959   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5960   match(Set mem (StoreB mem zero));
5961 
5962   ins_cost(125); // XXX
5963   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
5964   ins_encode %{
5965     __ movb($mem$$Address, r12);
5966   %}
5967   ins_pipe(ialu_mem_reg);
5968 %}
5969 
5970 instruct storeImmB(memory mem, immI8 src)
5971 %{
5972   match(Set mem (StoreB mem src));
5973 
5974   ins_cost(150); // XXX
5975   format %{ "movb    $mem, $src\t# byte" %}
5976   opcode(0xC6); /* C6 /0 */
5977   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
5978   ins_pipe(ialu_mem_imm);
5979 %}
5980 
5981 // Store CMS card-mark Immediate
5982 instruct storeImmCM0_reg(memory mem, immI0 zero)
5983 %{
5984   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5985   match(Set mem (StoreCM mem zero));
5986 
5987   ins_cost(125); // XXX
5988   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
5989   ins_encode %{
5990     __ movb($mem$$Address, r12);
5991   %}
5992   ins_pipe(ialu_mem_reg);
5993 %}
5994 
5995 instruct storeImmCM0(memory mem, immI0 src)
5996 %{
5997   match(Set mem (StoreCM mem src));
5998 
5999   ins_cost(150); // XXX
6000   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6001   opcode(0xC6); /* C6 /0 */
6002   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6003   ins_pipe(ialu_mem_imm);
6004 %}
6005 
6006 // Store Float
6007 instruct storeF(memory mem, regF src)
6008 %{
6009   match(Set mem (StoreF mem src));
6010 
6011   ins_cost(95); // XXX
6012   format %{ "movss   $mem, $src\t# float" %}
6013   ins_encode %{
6014     __ movflt($mem$$Address, $src$$XMMRegister);
6015   %}
6016   ins_pipe(pipe_slow); // XXX
6017 %}
6018 
6019 // Store immediate Float value (it is faster than store from XMM register)
6020 instruct storeF0(memory mem, immF0 zero)
6021 %{
6022   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6023   match(Set mem (StoreF mem zero));
6024 
6025   ins_cost(25); // XXX
6026   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6027   ins_encode %{
6028     __ movl($mem$$Address, r12);
6029   %}
6030   ins_pipe(ialu_mem_reg);
6031 %}
6032 
6033 instruct storeF_imm(memory mem, immF src)
6034 %{
6035   match(Set mem (StoreF mem src));
6036 
6037   ins_cost(50);
6038   format %{ "movl    $mem, $src\t# float" %}
6039   opcode(0xC7); /* C7 /0 */
6040   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6041   ins_pipe(ialu_mem_imm);
6042 %}
6043 
6044 // Store Double
6045 instruct storeD(memory mem, regD src)
6046 %{
6047   match(Set mem (StoreD mem src));
6048 
6049   ins_cost(95); // XXX
6050   format %{ "movsd   $mem, $src\t# double" %}
6051   ins_encode %{
6052     __ movdbl($mem$$Address, $src$$XMMRegister);
6053   %}
6054   ins_pipe(pipe_slow); // XXX
6055 %}
6056 
6057 // Store immediate double 0.0 (it is faster than store from XMM register)
6058 instruct storeD0_imm(memory mem, immD0 src)
6059 %{
6060   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6061   match(Set mem (StoreD mem src));
6062 
6063   ins_cost(50);
6064   format %{ "movq    $mem, $src\t# double 0." %}
6065   opcode(0xC7); /* C7 /0 */
6066   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6067   ins_pipe(ialu_mem_imm);
6068 %}
6069 
6070 instruct storeD0(memory mem, immD0 zero)
6071 %{
6072   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6073   match(Set mem (StoreD mem zero));
6074 
6075   ins_cost(25); // XXX
6076   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6077   ins_encode %{
6078     __ movq($mem$$Address, r12);
6079   %}
6080   ins_pipe(ialu_mem_reg);
6081 %}
6082 
6083 instruct storeSSI(stackSlotI dst, rRegI src)
6084 %{
6085   match(Set dst src);
6086 
6087   ins_cost(100);
6088   format %{ "movl    $dst, $src\t# int stk" %}
6089   opcode(0x89);
6090   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6091   ins_pipe( ialu_mem_reg );
6092 %}
6093 
6094 instruct storeSSL(stackSlotL dst, rRegL src)
6095 %{
6096   match(Set dst src);
6097 
6098   ins_cost(100);
6099   format %{ "movq    $dst, $src\t# long stk" %}
6100   opcode(0x89);
6101   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6102   ins_pipe(ialu_mem_reg);
6103 %}
6104 
6105 instruct storeSSP(stackSlotP dst, rRegP src)
6106 %{
6107   match(Set dst src);
6108 
6109   ins_cost(100);
6110   format %{ "movq    $dst, $src\t# ptr stk" %}
6111   opcode(0x89);
6112   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6113   ins_pipe(ialu_mem_reg);
6114 %}
6115 
6116 instruct storeSSF(stackSlotF dst, regF src)
6117 %{
6118   match(Set dst src);
6119 
6120   ins_cost(95); // XXX
6121   format %{ "movss   $dst, $src\t# float stk" %}
6122   ins_encode %{
6123     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6124   %}
6125   ins_pipe(pipe_slow); // XXX
6126 %}
6127 
6128 instruct storeSSD(stackSlotD dst, regD src)
6129 %{
6130   match(Set dst src);
6131 
6132   ins_cost(95); // XXX
6133   format %{ "movsd   $dst, $src\t# double stk" %}
6134   ins_encode %{
6135     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6136   %}
6137   ins_pipe(pipe_slow); // XXX
6138 %}
6139 
6140 //----------BSWAP Instructions-------------------------------------------------
6141 instruct bytes_reverse_int(rRegI dst) %{
6142   match(Set dst (ReverseBytesI dst));
6143 
6144   format %{ "bswapl  $dst" %}
6145   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6146   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6147   ins_pipe( ialu_reg );
6148 %}
6149 
6150 instruct bytes_reverse_long(rRegL dst) %{
6151   match(Set dst (ReverseBytesL dst));
6152 
6153   format %{ "bswapq  $dst" %}
6154   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6155   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6156   ins_pipe( ialu_reg);
6157 %}
6158 
6159 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6160   match(Set dst (ReverseBytesUS dst));
6161   effect(KILL cr);
6162 
6163   format %{ "bswapl  $dst\n\t"
6164             "shrl    $dst,16\n\t" %}
6165   ins_encode %{
6166     __ bswapl($dst$$Register);
6167     __ shrl($dst$$Register, 16);
6168   %}
6169   ins_pipe( ialu_reg );
6170 %}
6171 
6172 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6173   match(Set dst (ReverseBytesS dst));
6174   effect(KILL cr);
6175 
6176   format %{ "bswapl  $dst\n\t"
6177             "sar     $dst,16\n\t" %}
6178   ins_encode %{
6179     __ bswapl($dst$$Register);
6180     __ sarl($dst$$Register, 16);
6181   %}
6182   ins_pipe( ialu_reg );
6183 %}
6184 
6185 //---------- Zeros Count Instructions ------------------------------------------
6186 
6187 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6188   predicate(UseCountLeadingZerosInstruction);
6189   match(Set dst (CountLeadingZerosI src));
6190   effect(KILL cr);
6191 
6192   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6193   ins_encode %{
6194     __ lzcntl($dst$$Register, $src$$Register);
6195   %}
6196   ins_pipe(ialu_reg);
6197 %}
6198 
6199 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6200   predicate(!UseCountLeadingZerosInstruction);
6201   match(Set dst (CountLeadingZerosI src));
6202   effect(KILL cr);
6203 
6204   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6205             "jnz     skip\n\t"
6206             "movl    $dst, -1\n"
6207       "skip:\n\t"
6208             "negl    $dst\n\t"
6209             "addl    $dst, 31" %}
6210   ins_encode %{
6211     Register Rdst = $dst$$Register;
6212     Register Rsrc = $src$$Register;
6213     Label skip;
6214     __ bsrl(Rdst, Rsrc);
6215     __ jccb(Assembler::notZero, skip);
6216     __ movl(Rdst, -1);
6217     __ bind(skip);
6218     __ negl(Rdst);
6219     __ addl(Rdst, BitsPerInt - 1);
6220   %}
6221   ins_pipe(ialu_reg);
6222 %}
6223 
6224 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6225   predicate(UseCountLeadingZerosInstruction);
6226   match(Set dst (CountLeadingZerosL src));
6227   effect(KILL cr);
6228 
6229   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6230   ins_encode %{
6231     __ lzcntq($dst$$Register, $src$$Register);
6232   %}
6233   ins_pipe(ialu_reg);
6234 %}
6235 
6236 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6237   predicate(!UseCountLeadingZerosInstruction);
6238   match(Set dst (CountLeadingZerosL src));
6239   effect(KILL cr);
6240 
6241   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6242             "jnz     skip\n\t"
6243             "movl    $dst, -1\n"
6244       "skip:\n\t"
6245             "negl    $dst\n\t"
6246             "addl    $dst, 63" %}
6247   ins_encode %{
6248     Register Rdst = $dst$$Register;
6249     Register Rsrc = $src$$Register;
6250     Label skip;
6251     __ bsrq(Rdst, Rsrc);
6252     __ jccb(Assembler::notZero, skip);
6253     __ movl(Rdst, -1);
6254     __ bind(skip);
6255     __ negl(Rdst);
6256     __ addl(Rdst, BitsPerLong - 1);
6257   %}
6258   ins_pipe(ialu_reg);
6259 %}
6260 
6261 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6262   predicate(UseCountTrailingZerosInstruction);
6263   match(Set dst (CountTrailingZerosI src));
6264   effect(KILL cr);
6265 
6266   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
6267   ins_encode %{
6268     __ tzcntl($dst$$Register, $src$$Register);
6269   %}
6270   ins_pipe(ialu_reg);
6271 %}
6272 
6273 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
6274   predicate(!UseCountTrailingZerosInstruction);
6275   match(Set dst (CountTrailingZerosI src));
6276   effect(KILL cr);
6277 
6278   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6279             "jnz     done\n\t"
6280             "movl    $dst, 32\n"
6281       "done:" %}
6282   ins_encode %{
6283     Register Rdst = $dst$$Register;
6284     Label done;
6285     __ bsfl(Rdst, $src$$Register);
6286     __ jccb(Assembler::notZero, done);
6287     __ movl(Rdst, BitsPerInt);
6288     __ bind(done);
6289   %}
6290   ins_pipe(ialu_reg);
6291 %}
6292 
6293 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6294   predicate(UseCountTrailingZerosInstruction);
6295   match(Set dst (CountTrailingZerosL src));
6296   effect(KILL cr);
6297 
6298   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
6299   ins_encode %{
6300     __ tzcntq($dst$$Register, $src$$Register);
6301   %}
6302   ins_pipe(ialu_reg);
6303 %}
6304 
6305 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
6306   predicate(!UseCountTrailingZerosInstruction);
6307   match(Set dst (CountTrailingZerosL src));
6308   effect(KILL cr);
6309 
6310   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6311             "jnz     done\n\t"
6312             "movl    $dst, 64\n"
6313       "done:" %}
6314   ins_encode %{
6315     Register Rdst = $dst$$Register;
6316     Label done;
6317     __ bsfq(Rdst, $src$$Register);
6318     __ jccb(Assembler::notZero, done);
6319     __ movl(Rdst, BitsPerLong);
6320     __ bind(done);
6321   %}
6322   ins_pipe(ialu_reg);
6323 %}
6324 
6325 
6326 //---------- Population Count Instructions -------------------------------------
6327 
6328 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6329   predicate(UsePopCountInstruction);
6330   match(Set dst (PopCountI src));
6331   effect(KILL cr);
6332 
6333   format %{ "popcnt  $dst, $src" %}
6334   ins_encode %{
6335     __ popcntl($dst$$Register, $src$$Register);
6336   %}
6337   ins_pipe(ialu_reg);
6338 %}
6339 
6340 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6341   predicate(UsePopCountInstruction);
6342   match(Set dst (PopCountI (LoadI mem)));
6343   effect(KILL cr);
6344 
6345   format %{ "popcnt  $dst, $mem" %}
6346   ins_encode %{
6347     __ popcntl($dst$$Register, $mem$$Address);
6348   %}
6349   ins_pipe(ialu_reg);
6350 %}
6351 
6352 // Note: Long.bitCount(long) returns an int.
6353 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6354   predicate(UsePopCountInstruction);
6355   match(Set dst (PopCountL src));
6356   effect(KILL cr);
6357 
6358   format %{ "popcnt  $dst, $src" %}
6359   ins_encode %{
6360     __ popcntq($dst$$Register, $src$$Register);
6361   %}
6362   ins_pipe(ialu_reg);
6363 %}
6364 
6365 // Note: Long.bitCount(long) returns an int.
6366 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6367   predicate(UsePopCountInstruction);
6368   match(Set dst (PopCountL (LoadL mem)));
6369   effect(KILL cr);
6370 
6371   format %{ "popcnt  $dst, $mem" %}
6372   ins_encode %{
6373     __ popcntq($dst$$Register, $mem$$Address);
6374   %}
6375   ins_pipe(ialu_reg);
6376 %}
6377 
6378 
6379 //----------MemBar Instructions-----------------------------------------------
6380 // Memory barrier flavors
6381 
6382 instruct membar_acquire()
6383 %{
6384   match(MemBarAcquire);
6385   match(LoadFence);
6386   ins_cost(0);
6387 
6388   size(0);
6389   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6390   ins_encode();
6391   ins_pipe(empty);
6392 %}
6393 
6394 instruct membar_acquire_lock()
6395 %{
6396   match(MemBarAcquireLock);
6397   ins_cost(0);
6398 
6399   size(0);
6400   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6401   ins_encode();
6402   ins_pipe(empty);
6403 %}
6404 
6405 instruct membar_release()
6406 %{
6407   match(MemBarRelease);
6408   match(StoreFence);
6409   ins_cost(0);
6410 
6411   size(0);
6412   format %{ "MEMBAR-release ! (empty encoding)" %}
6413   ins_encode();
6414   ins_pipe(empty);
6415 %}
6416 
6417 instruct membar_release_lock()
6418 %{
6419   match(MemBarReleaseLock);
6420   ins_cost(0);
6421 
6422   size(0);
6423   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6424   ins_encode();
6425   ins_pipe(empty);
6426 %}
6427 
6428 instruct membar_volatile(rFlagsReg cr) %{
6429   match(MemBarVolatile);
6430   effect(KILL cr);
6431   ins_cost(400);
6432 
6433   format %{
6434     $$template
6435     if (os::is_MP()) {
6436       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6437     } else {
6438       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6439     }
6440   %}
6441   ins_encode %{
6442     __ membar(Assembler::StoreLoad);
6443   %}
6444   ins_pipe(pipe_slow);
6445 %}
6446 
6447 instruct unnecessary_membar_volatile()
6448 %{
6449   match(MemBarVolatile);
6450   predicate(Matcher::post_store_load_barrier(n));
6451   ins_cost(0);
6452 
6453   size(0);
6454   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6455   ins_encode();
6456   ins_pipe(empty);
6457 %}
6458 
6459 instruct membar_storestore() %{
6460   match(MemBarStoreStore);
6461   ins_cost(0);
6462 
6463   size(0);
6464   format %{ "MEMBAR-storestore (empty encoding)" %}
6465   ins_encode( );
6466   ins_pipe(empty);
6467 %}
6468 
6469 //----------Move Instructions--------------------------------------------------
6470 
6471 instruct castX2P(rRegP dst, rRegL src)
6472 %{
6473   match(Set dst (CastX2P src));
6474 
6475   format %{ "movq    $dst, $src\t# long->ptr" %}
6476   ins_encode %{
6477     if ($dst$$reg != $src$$reg) {
6478       __ movptr($dst$$Register, $src$$Register);
6479     }
6480   %}
6481   ins_pipe(ialu_reg_reg); // XXX
6482 %}
6483 
6484 instruct castP2X(rRegL dst, rRegP src)
6485 %{
6486   match(Set dst (CastP2X src));
6487 
6488   format %{ "movq    $dst, $src\t# ptr -> long" %}
6489   ins_encode %{
6490     if ($dst$$reg != $src$$reg) {
6491       __ movptr($dst$$Register, $src$$Register);
6492     }
6493   %}
6494   ins_pipe(ialu_reg_reg); // XXX
6495 %}
6496 
6497 // Convert oop into int for vectors alignment masking
6498 instruct convP2I(rRegI dst, rRegP src)
6499 %{
6500   match(Set dst (ConvL2I (CastP2X src)));
6501 
6502   format %{ "movl    $dst, $src\t# ptr -> int" %}
6503   ins_encode %{
6504     __ movl($dst$$Register, $src$$Register);
6505   %}
6506   ins_pipe(ialu_reg_reg); // XXX
6507 %}
6508 
6509 // Convert compressed oop into int for vectors alignment masking
6510 // in case of 32bit oops (heap < 4Gb).
6511 instruct convN2I(rRegI dst, rRegN src)
6512 %{
6513   predicate(Universe::narrow_oop_shift() == 0);
6514   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6515 
6516   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6517   ins_encode %{
6518     __ movl($dst$$Register, $src$$Register);
6519   %}
6520   ins_pipe(ialu_reg_reg); // XXX
6521 %}
6522 
6523 // Convert oop pointer into compressed form
6524 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6525   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6526   match(Set dst (EncodeP src));
6527   effect(KILL cr);
6528   format %{ "encode_heap_oop $dst,$src" %}
6529   ins_encode %{
6530     Register s = $src$$Register;
6531     Register d = $dst$$Register;
6532     if (s != d) {
6533       __ movq(d, s);
6534     }
6535     __ encode_heap_oop(d);
6536   %}
6537   ins_pipe(ialu_reg_long);
6538 %}
6539 
6540 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6541   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6542   match(Set dst (EncodeP src));
6543   effect(KILL cr);
6544   format %{ "encode_heap_oop_not_null $dst,$src" %}
6545   ins_encode %{
6546     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6547   %}
6548   ins_pipe(ialu_reg_long);
6549 %}
6550 
6551 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6552   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6553             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6554   match(Set dst (DecodeN src));
6555   effect(KILL cr);
6556   format %{ "decode_heap_oop $dst,$src" %}
6557   ins_encode %{
6558     Register s = $src$$Register;
6559     Register d = $dst$$Register;
6560     if (s != d) {
6561       __ movq(d, s);
6562     }
6563     __ decode_heap_oop(d);
6564   %}
6565   ins_pipe(ialu_reg_long);
6566 %}
6567 
6568 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6569   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6570             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6571   match(Set dst (DecodeN src));
6572   effect(KILL cr);
6573   format %{ "decode_heap_oop_not_null $dst,$src" %}
6574   ins_encode %{
6575     Register s = $src$$Register;
6576     Register d = $dst$$Register;
6577     if (s != d) {
6578       __ decode_heap_oop_not_null(d, s);
6579     } else {
6580       __ decode_heap_oop_not_null(d);
6581     }
6582   %}
6583   ins_pipe(ialu_reg_long);
6584 %}
6585 
6586 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6587   match(Set dst (EncodePKlass src));
6588   effect(KILL cr);
6589   format %{ "encode_klass_not_null $dst,$src" %}
6590   ins_encode %{
6591     __ encode_klass_not_null($dst$$Register, $src$$Register);
6592   %}
6593   ins_pipe(ialu_reg_long);
6594 %}
6595 
6596 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6597   match(Set dst (DecodeNKlass src));
6598   effect(KILL cr);
6599   format %{ "decode_klass_not_null $dst,$src" %}
6600   ins_encode %{
6601     Register s = $src$$Register;
6602     Register d = $dst$$Register;
6603     if (s != d) {
6604       __ decode_klass_not_null(d, s);
6605     } else {
6606       __ decode_klass_not_null(d);
6607     }
6608   %}
6609   ins_pipe(ialu_reg_long);
6610 %}
6611 
6612 
6613 //----------Conditional Move---------------------------------------------------
6614 // Jump
6615 // dummy instruction for generating temp registers
6616 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6617   match(Jump (LShiftL switch_val shift));
6618   ins_cost(350);
6619   predicate(false);
6620   effect(TEMP dest);
6621 
6622   format %{ "leaq    $dest, [$constantaddress]\n\t"
6623             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6624   ins_encode %{
6625     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6626     // to do that and the compiler is using that register as one it can allocate.
6627     // So we build it all by hand.
6628     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6629     // ArrayAddress dispatch(table, index);
6630     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6631     __ lea($dest$$Register, $constantaddress);
6632     __ jmp(dispatch);
6633   %}
6634   ins_pipe(pipe_jmp);
6635 %}
6636 
6637 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6638   match(Jump (AddL (LShiftL switch_val shift) offset));
6639   ins_cost(350);
6640   effect(TEMP dest);
6641 
6642   format %{ "leaq    $dest, [$constantaddress]\n\t"
6643             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6644   ins_encode %{
6645     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6646     // to do that and the compiler is using that register as one it can allocate.
6647     // So we build it all by hand.
6648     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6649     // ArrayAddress dispatch(table, index);
6650     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6651     __ lea($dest$$Register, $constantaddress);
6652     __ jmp(dispatch);
6653   %}
6654   ins_pipe(pipe_jmp);
6655 %}
6656 
6657 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6658   match(Jump switch_val);
6659   ins_cost(350);
6660   effect(TEMP dest);
6661 
6662   format %{ "leaq    $dest, [$constantaddress]\n\t"
6663             "jmp     [$dest + $switch_val]\n\t" %}
6664   ins_encode %{
6665     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6666     // to do that and the compiler is using that register as one it can allocate.
6667     // So we build it all by hand.
6668     // Address index(noreg, switch_reg, Address::times_1);
6669     // ArrayAddress dispatch(table, index);
6670     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6671     __ lea($dest$$Register, $constantaddress);
6672     __ jmp(dispatch);
6673   %}
6674   ins_pipe(pipe_jmp);
6675 %}
6676 
6677 // Conditional move
6678 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6679 %{
6680   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6681 
6682   ins_cost(200); // XXX
6683   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6684   opcode(0x0F, 0x40);
6685   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6686   ins_pipe(pipe_cmov_reg);
6687 %}
6688 
6689 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6690   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6691 
6692   ins_cost(200); // XXX
6693   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6694   opcode(0x0F, 0x40);
6695   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6696   ins_pipe(pipe_cmov_reg);
6697 %}
6698 
6699 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6700   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6701   ins_cost(200);
6702   expand %{
6703     cmovI_regU(cop, cr, dst, src);
6704   %}
6705 %}
6706 
6707 // Conditional move
6708 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6709   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6710 
6711   ins_cost(250); // XXX
6712   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6713   opcode(0x0F, 0x40);
6714   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6715   ins_pipe(pipe_cmov_mem);
6716 %}
6717 
6718 // Conditional move
6719 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6720 %{
6721   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6722 
6723   ins_cost(250); // XXX
6724   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6725   opcode(0x0F, 0x40);
6726   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6727   ins_pipe(pipe_cmov_mem);
6728 %}
6729 
6730 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6732   ins_cost(250);
6733   expand %{
6734     cmovI_memU(cop, cr, dst, src);
6735   %}
6736 %}
6737 
6738 // Conditional move
6739 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6740 %{
6741   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6742 
6743   ins_cost(200); // XXX
6744   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6745   opcode(0x0F, 0x40);
6746   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6747   ins_pipe(pipe_cmov_reg);
6748 %}
6749 
6750 // Conditional move
6751 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6752 %{
6753   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6754 
6755   ins_cost(200); // XXX
6756   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6757   opcode(0x0F, 0x40);
6758   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6759   ins_pipe(pipe_cmov_reg);
6760 %}
6761 
6762 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6763   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6764   ins_cost(200);
6765   expand %{
6766     cmovN_regU(cop, cr, dst, src);
6767   %}
6768 %}
6769 
6770 // Conditional move
6771 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6772 %{
6773   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6774 
6775   ins_cost(200); // XXX
6776   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6777   opcode(0x0F, 0x40);
6778   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6779   ins_pipe(pipe_cmov_reg);  // XXX
6780 %}
6781 
6782 // Conditional move
6783 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6784 %{
6785   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6786 
6787   ins_cost(200); // XXX
6788   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6789   opcode(0x0F, 0x40);
6790   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6791   ins_pipe(pipe_cmov_reg); // XXX
6792 %}
6793 
6794 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6795   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6796   ins_cost(200);
6797   expand %{
6798     cmovP_regU(cop, cr, dst, src);
6799   %}
6800 %}
6801 
6802 // DISABLED: Requires the ADLC to emit a bottom_type call that
6803 // correctly meets the two pointer arguments; one is an incoming
6804 // register but the other is a memory operand.  ALSO appears to
6805 // be buggy with implicit null checks.
6806 //
6807 //// Conditional move
6808 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6809 //%{
6810 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6811 //  ins_cost(250);
6812 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6813 //  opcode(0x0F,0x40);
6814 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6815 //  ins_pipe( pipe_cmov_mem );
6816 //%}
6817 //
6818 //// Conditional move
6819 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6820 //%{
6821 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6822 //  ins_cost(250);
6823 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6824 //  opcode(0x0F,0x40);
6825 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6826 //  ins_pipe( pipe_cmov_mem );
6827 //%}
6828 
6829 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6830 %{
6831   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6832 
6833   ins_cost(200); // XXX
6834   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6835   opcode(0x0F, 0x40);
6836   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6837   ins_pipe(pipe_cmov_reg);  // XXX
6838 %}
6839 
6840 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6841 %{
6842   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6843 
6844   ins_cost(200); // XXX
6845   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6846   opcode(0x0F, 0x40);
6847   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6848   ins_pipe(pipe_cmov_mem);  // XXX
6849 %}
6850 
6851 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6852 %{
6853   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6854 
6855   ins_cost(200); // XXX
6856   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6857   opcode(0x0F, 0x40);
6858   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6859   ins_pipe(pipe_cmov_reg); // XXX
6860 %}
6861 
6862 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6863   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6864   ins_cost(200);
6865   expand %{
6866     cmovL_regU(cop, cr, dst, src);
6867   %}
6868 %}
6869 
6870 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6871 %{
6872   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6873 
6874   ins_cost(200); // XXX
6875   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6876   opcode(0x0F, 0x40);
6877   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6878   ins_pipe(pipe_cmov_mem); // XXX
6879 %}
6880 
6881 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6882   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6883   ins_cost(200);
6884   expand %{
6885     cmovL_memU(cop, cr, dst, src);
6886   %}
6887 %}
6888 
6889 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6890 %{
6891   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6892 
6893   ins_cost(200); // XXX
6894   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6895             "movss     $dst, $src\n"
6896     "skip:" %}
6897   ins_encode %{
6898     Label Lskip;
6899     // Invert sense of branch from sense of CMOV
6900     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6901     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6902     __ bind(Lskip);
6903   %}
6904   ins_pipe(pipe_slow);
6905 %}
6906 
6907 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
6908 // %{
6909 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
6910 
6911 //   ins_cost(200); // XXX
6912 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6913 //             "movss     $dst, $src\n"
6914 //     "skip:" %}
6915 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
6916 //   ins_pipe(pipe_slow);
6917 // %}
6918 
6919 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
6920 %{
6921   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6922 
6923   ins_cost(200); // XXX
6924   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
6925             "movss     $dst, $src\n"
6926     "skip:" %}
6927   ins_encode %{
6928     Label Lskip;
6929     // Invert sense of branch from sense of CMOV
6930     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6931     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6932     __ bind(Lskip);
6933   %}
6934   ins_pipe(pipe_slow);
6935 %}
6936 
6937 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
6938   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6939   ins_cost(200);
6940   expand %{
6941     cmovF_regU(cop, cr, dst, src);
6942   %}
6943 %}
6944 
6945 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
6946 %{
6947   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6948 
6949   ins_cost(200); // XXX
6950   format %{ "jn$cop    skip\t# signed cmove double\n\t"
6951             "movsd     $dst, $src\n"
6952     "skip:" %}
6953   ins_encode %{
6954     Label Lskip;
6955     // Invert sense of branch from sense of CMOV
6956     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6957     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6958     __ bind(Lskip);
6959   %}
6960   ins_pipe(pipe_slow);
6961 %}
6962 
6963 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
6964 %{
6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6966 
6967   ins_cost(200); // XXX
6968   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
6969             "movsd     $dst, $src\n"
6970     "skip:" %}
6971   ins_encode %{
6972     Label Lskip;
6973     // Invert sense of branch from sense of CMOV
6974     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6975     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6976     __ bind(Lskip);
6977   %}
6978   ins_pipe(pipe_slow);
6979 %}
6980 
6981 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
6982   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6983   ins_cost(200);
6984   expand %{
6985     cmovD_regU(cop, cr, dst, src);
6986   %}
6987 %}
6988 
6989 //----------Arithmetic Instructions--------------------------------------------
6990 //----------Addition Instructions----------------------------------------------
6991 
6992 instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
6993 %{
6994   match(AddExactI dst src);
6995   effect(DEF cr);
6996 
6997   format %{ "addl    $dst, $src\t# addExact int" %}
6998   ins_encode %{
6999     __ addl($dst$$Register, $src$$Register);
7000   %}
7001   ins_pipe(ialu_reg_reg);
7002 %}
7003 
7004 instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
7005 %{
7006   match(AddExactI dst src);
7007   effect(DEF cr);
7008 
7009   format %{ "addl    $dst, $src\t# addExact int" %}
7010   ins_encode %{
7011     __ addl($dst$$Register, $src$$constant);
7012   %}
7013   ins_pipe(ialu_reg_reg);
7014 %}
7015 
7016 instruct addExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
7017 %{
7018   match(AddExactI dst (LoadI src));
7019   effect(DEF cr);
7020 
7021   ins_cost(125); // XXX
7022   format %{ "addl    $dst, $src\t# addExact int" %}
7023   ins_encode %{
7024     __ addl($dst$$Register, $src$$Address);
7025   %}
7026 
7027   ins_pipe(ialu_reg_mem);
7028 %}
7029 
7030 instruct addExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
7031 %{
7032   match(AddExactL dst src);
7033   effect(DEF cr);
7034 
7035   format %{ "addq    $dst, $src\t# addExact long" %}
7036   ins_encode %{
7037     __ addq($dst$$Register, $src$$Register);
7038   %}
7039   ins_pipe(ialu_reg_reg);
7040 %}
7041 
7042 instruct addExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
7043 %{
7044   match(AddExactL dst src);
7045   effect(DEF cr);
7046 
7047   format %{ "addq    $dst, $src\t# addExact long" %}
7048   ins_encode %{
7049     __ addq($dst$$Register, $src$$constant);
7050   %}
7051   ins_pipe(ialu_reg_reg);
7052 %}
7053 
7054 instruct addExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
7055 %{
7056   match(AddExactL dst (LoadL src));
7057   effect(DEF cr);
7058 
7059   ins_cost(125); // XXX
7060   format %{ "addq    $dst, $src\t# addExact long" %}
7061   ins_encode %{
7062     __ addq($dst$$Register, $src$$Address);
7063   %}
7064 
7065   ins_pipe(ialu_reg_mem);
7066 %}
7067 
7068 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7069 %{
7070   match(Set dst (AddI dst src));
7071   effect(KILL cr);
7072 
7073   format %{ "addl    $dst, $src\t# int" %}
7074   opcode(0x03);
7075   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7076   ins_pipe(ialu_reg_reg);
7077 %}
7078 
7079 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7080 %{
7081   match(Set dst (AddI dst src));
7082   effect(KILL cr);
7083 
7084   format %{ "addl    $dst, $src\t# int" %}
7085   opcode(0x81, 0x00); /* /0 id */
7086   ins_encode(OpcSErm(dst, src), Con8or32(src));
7087   ins_pipe( ialu_reg );
7088 %}
7089 
7090 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7091 %{
7092   match(Set dst (AddI dst (LoadI src)));
7093   effect(KILL cr);
7094 
7095   ins_cost(125); // XXX
7096   format %{ "addl    $dst, $src\t# int" %}
7097   opcode(0x03);
7098   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7099   ins_pipe(ialu_reg_mem);
7100 %}
7101 
7102 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7103 %{
7104   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7105   effect(KILL cr);
7106 
7107   ins_cost(150); // XXX
7108   format %{ "addl    $dst, $src\t# int" %}
7109   opcode(0x01); /* Opcode 01 /r */
7110   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7111   ins_pipe(ialu_mem_reg);
7112 %}
7113 
7114 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7115 %{
7116   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7117   effect(KILL cr);
7118 
7119   ins_cost(125); // XXX
7120   format %{ "addl    $dst, $src\t# int" %}
7121   opcode(0x81); /* Opcode 81 /0 id */
7122   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7123   ins_pipe(ialu_mem_imm);
7124 %}
7125 
7126 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7127 %{
7128   predicate(UseIncDec);
7129   match(Set dst (AddI dst src));
7130   effect(KILL cr);
7131 
7132   format %{ "incl    $dst\t# int" %}
7133   opcode(0xFF, 0x00); // FF /0
7134   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7135   ins_pipe(ialu_reg);
7136 %}
7137 
7138 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7139 %{
7140   predicate(UseIncDec);
7141   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7142   effect(KILL cr);
7143 
7144   ins_cost(125); // XXX
7145   format %{ "incl    $dst\t# int" %}
7146   opcode(0xFF); /* Opcode FF /0 */
7147   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7148   ins_pipe(ialu_mem_imm);
7149 %}
7150 
7151 // XXX why does that use AddI
7152 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7153 %{
7154   predicate(UseIncDec);
7155   match(Set dst (AddI dst src));
7156   effect(KILL cr);
7157 
7158   format %{ "decl    $dst\t# int" %}
7159   opcode(0xFF, 0x01); // FF /1
7160   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7161   ins_pipe(ialu_reg);
7162 %}
7163 
7164 // XXX why does that use AddI
7165 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7166 %{
7167   predicate(UseIncDec);
7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7169   effect(KILL cr);
7170 
7171   ins_cost(125); // XXX
7172   format %{ "decl    $dst\t# int" %}
7173   opcode(0xFF); /* Opcode FF /1 */
7174   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7175   ins_pipe(ialu_mem_imm);
7176 %}
7177 
7178 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7179 %{
7180   match(Set dst (AddI src0 src1));
7181 
7182   ins_cost(110);
7183   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7184   opcode(0x8D); /* 0x8D /r */
7185   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7186   ins_pipe(ialu_reg_reg);
7187 %}
7188 
7189 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7190 %{
7191   match(Set dst (AddL dst src));
7192   effect(KILL cr);
7193 
7194   format %{ "addq    $dst, $src\t# long" %}
7195   opcode(0x03);
7196   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7197   ins_pipe(ialu_reg_reg);
7198 %}
7199 
7200 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7201 %{
7202   match(Set dst (AddL dst src));
7203   effect(KILL cr);
7204 
7205   format %{ "addq    $dst, $src\t# long" %}
7206   opcode(0x81, 0x00); /* /0 id */
7207   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7208   ins_pipe( ialu_reg );
7209 %}
7210 
7211 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7212 %{
7213   match(Set dst (AddL dst (LoadL src)));
7214   effect(KILL cr);
7215 
7216   ins_cost(125); // XXX
7217   format %{ "addq    $dst, $src\t# long" %}
7218   opcode(0x03);
7219   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7220   ins_pipe(ialu_reg_mem);
7221 %}
7222 
7223 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7224 %{
7225   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7226   effect(KILL cr);
7227 
7228   ins_cost(150); // XXX
7229   format %{ "addq    $dst, $src\t# long" %}
7230   opcode(0x01); /* Opcode 01 /r */
7231   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7232   ins_pipe(ialu_mem_reg);
7233 %}
7234 
7235 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7236 %{
7237   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7238   effect(KILL cr);
7239 
7240   ins_cost(125); // XXX
7241   format %{ "addq    $dst, $src\t# long" %}
7242   opcode(0x81); /* Opcode 81 /0 id */
7243   ins_encode(REX_mem_wide(dst),
7244              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7245   ins_pipe(ialu_mem_imm);
7246 %}
7247 
7248 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7249 %{
7250   predicate(UseIncDec);
7251   match(Set dst (AddL dst src));
7252   effect(KILL cr);
7253 
7254   format %{ "incq    $dst\t# long" %}
7255   opcode(0xFF, 0x00); // FF /0
7256   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7257   ins_pipe(ialu_reg);
7258 %}
7259 
7260 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7261 %{
7262   predicate(UseIncDec);
7263   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7264   effect(KILL cr);
7265 
7266   ins_cost(125); // XXX
7267   format %{ "incq    $dst\t# long" %}
7268   opcode(0xFF); /* Opcode FF /0 */
7269   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7270   ins_pipe(ialu_mem_imm);
7271 %}
7272 
7273 // XXX why does that use AddL
7274 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7275 %{
7276   predicate(UseIncDec);
7277   match(Set dst (AddL dst src));
7278   effect(KILL cr);
7279 
7280   format %{ "decq    $dst\t# long" %}
7281   opcode(0xFF, 0x01); // FF /1
7282   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7283   ins_pipe(ialu_reg);
7284 %}
7285 
7286 // XXX why does that use AddL
7287 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7288 %{
7289   predicate(UseIncDec);
7290   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7291   effect(KILL cr);
7292 
7293   ins_cost(125); // XXX
7294   format %{ "decq    $dst\t# long" %}
7295   opcode(0xFF); /* Opcode FF /1 */
7296   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7297   ins_pipe(ialu_mem_imm);
7298 %}
7299 
7300 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7301 %{
7302   match(Set dst (AddL src0 src1));
7303 
7304   ins_cost(110);
7305   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7306   opcode(0x8D); /* 0x8D /r */
7307   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7308   ins_pipe(ialu_reg_reg);
7309 %}
7310 
7311 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7312 %{
7313   match(Set dst (AddP dst src));
7314   effect(KILL cr);
7315 
7316   format %{ "addq    $dst, $src\t# ptr" %}
7317   opcode(0x03);
7318   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7319   ins_pipe(ialu_reg_reg);
7320 %}
7321 
7322 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7323 %{
7324   match(Set dst (AddP dst src));
7325   effect(KILL cr);
7326 
7327   format %{ "addq    $dst, $src\t# ptr" %}
7328   opcode(0x81, 0x00); /* /0 id */
7329   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7330   ins_pipe( ialu_reg );
7331 %}
7332 
7333 // XXX addP mem ops ????
7334 
7335 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7336 %{
7337   match(Set dst (AddP src0 src1));
7338 
7339   ins_cost(110);
7340   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7341   opcode(0x8D); /* 0x8D /r */
7342   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7343   ins_pipe(ialu_reg_reg);
7344 %}
7345 
7346 instruct checkCastPP(rRegP dst)
7347 %{
7348   match(Set dst (CheckCastPP dst));
7349 
7350   size(0);
7351   format %{ "# checkcastPP of $dst" %}
7352   ins_encode(/* empty encoding */);
7353   ins_pipe(empty);
7354 %}
7355 
7356 instruct castPP(rRegP dst)
7357 %{
7358   match(Set dst (CastPP dst));
7359 
7360   size(0);
7361   format %{ "# castPP of $dst" %}
7362   ins_encode(/* empty encoding */);
7363   ins_pipe(empty);
7364 %}
7365 
7366 instruct castII(rRegI dst)
7367 %{
7368   match(Set dst (CastII dst));
7369 
7370   size(0);
7371   format %{ "# castII of $dst" %}
7372   ins_encode(/* empty encoding */);
7373   ins_cost(0);
7374   ins_pipe(empty);
7375 %}
7376 
7377 // LoadP-locked same as a regular LoadP when used with compare-swap
7378 instruct loadPLocked(rRegP dst, memory mem)
7379 %{
7380   match(Set dst (LoadPLocked mem));
7381 
7382   ins_cost(125); // XXX
7383   format %{ "movq    $dst, $mem\t# ptr locked" %}
7384   opcode(0x8B);
7385   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7386   ins_pipe(ialu_reg_mem); // XXX
7387 %}
7388 
7389 // Conditional-store of the updated heap-top.
7390 // Used during allocation of the shared heap.
7391 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7392 
7393 instruct storePConditional(memory heap_top_ptr,
7394                            rax_RegP oldval, rRegP newval,
7395                            rFlagsReg cr)
7396 %{
7397   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7398 
7399   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7400             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7401   opcode(0x0F, 0xB1);
7402   ins_encode(lock_prefix,
7403              REX_reg_mem_wide(newval, heap_top_ptr),
7404              OpcP, OpcS,
7405              reg_mem(newval, heap_top_ptr));
7406   ins_pipe(pipe_cmpxchg);
7407 %}
7408 
7409 // Conditional-store of an int value.
7410 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7411 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7412 %{
7413   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7414   effect(KILL oldval);
7415 
7416   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7417   opcode(0x0F, 0xB1);
7418   ins_encode(lock_prefix,
7419              REX_reg_mem(newval, mem),
7420              OpcP, OpcS,
7421              reg_mem(newval, mem));
7422   ins_pipe(pipe_cmpxchg);
7423 %}
7424 
7425 // Conditional-store of a long value.
7426 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7427 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7428 %{
7429   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7430   effect(KILL oldval);
7431 
7432   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7433   opcode(0x0F, 0xB1);
7434   ins_encode(lock_prefix,
7435              REX_reg_mem_wide(newval, mem),
7436              OpcP, OpcS,
7437              reg_mem(newval, mem));
7438   ins_pipe(pipe_cmpxchg);
7439 %}
7440 
7441 
7442 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7443 instruct compareAndSwapP(rRegI res,
7444                          memory mem_ptr,
7445                          rax_RegP oldval, rRegP newval,
7446                          rFlagsReg cr)
7447 %{
7448   predicate(VM_Version::supports_cx8());
7449   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7450   effect(KILL cr, KILL oldval);
7451 
7452   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7453             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7454             "sete    $res\n\t"
7455             "movzbl  $res, $res" %}
7456   opcode(0x0F, 0xB1);
7457   ins_encode(lock_prefix,
7458              REX_reg_mem_wide(newval, mem_ptr),
7459              OpcP, OpcS,
7460              reg_mem(newval, mem_ptr),
7461              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7462              REX_reg_breg(res, res), // movzbl
7463              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7464   ins_pipe( pipe_cmpxchg );
7465 %}
7466 
7467 instruct compareAndSwapL(rRegI res,
7468                          memory mem_ptr,
7469                          rax_RegL oldval, rRegL newval,
7470                          rFlagsReg cr)
7471 %{
7472   predicate(VM_Version::supports_cx8());
7473   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7474   effect(KILL cr, KILL oldval);
7475 
7476   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7477             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7478             "sete    $res\n\t"
7479             "movzbl  $res, $res" %}
7480   opcode(0x0F, 0xB1);
7481   ins_encode(lock_prefix,
7482              REX_reg_mem_wide(newval, mem_ptr),
7483              OpcP, OpcS,
7484              reg_mem(newval, mem_ptr),
7485              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7486              REX_reg_breg(res, res), // movzbl
7487              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7488   ins_pipe( pipe_cmpxchg );
7489 %}
7490 
7491 instruct compareAndSwapI(rRegI res,
7492                          memory mem_ptr,
7493                          rax_RegI oldval, rRegI newval,
7494                          rFlagsReg cr)
7495 %{
7496   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7497   effect(KILL cr, KILL oldval);
7498 
7499   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7500             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7501             "sete    $res\n\t"
7502             "movzbl  $res, $res" %}
7503   opcode(0x0F, 0xB1);
7504   ins_encode(lock_prefix,
7505              REX_reg_mem(newval, mem_ptr),
7506              OpcP, OpcS,
7507              reg_mem(newval, mem_ptr),
7508              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7509              REX_reg_breg(res, res), // movzbl
7510              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7511   ins_pipe( pipe_cmpxchg );
7512 %}
7513 
7514 
7515 instruct compareAndSwapN(rRegI res,
7516                           memory mem_ptr,
7517                           rax_RegN oldval, rRegN newval,
7518                           rFlagsReg cr) %{
7519   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7520   effect(KILL cr, KILL oldval);
7521 
7522   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7523             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7524             "sete    $res\n\t"
7525             "movzbl  $res, $res" %}
7526   opcode(0x0F, 0xB1);
7527   ins_encode(lock_prefix,
7528              REX_reg_mem(newval, mem_ptr),
7529              OpcP, OpcS,
7530              reg_mem(newval, mem_ptr),
7531              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7532              REX_reg_breg(res, res), // movzbl
7533              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7534   ins_pipe( pipe_cmpxchg );
7535 %}
7536 
7537 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7538   predicate(n->as_LoadStore()->result_not_used());
7539   match(Set dummy (GetAndAddI mem add));
7540   effect(KILL cr);
7541   format %{ "ADDL  [$mem],$add" %}
7542   ins_encode %{
7543     if (os::is_MP()) { __ lock(); }
7544     __ addl($mem$$Address, $add$$constant);
7545   %}
7546   ins_pipe( pipe_cmpxchg );
7547 %}
7548 
7549 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7550   match(Set newval (GetAndAddI mem newval));
7551   effect(KILL cr);
7552   format %{ "XADDL  [$mem],$newval" %}
7553   ins_encode %{
7554     if (os::is_MP()) { __ lock(); }
7555     __ xaddl($mem$$Address, $newval$$Register);
7556   %}
7557   ins_pipe( pipe_cmpxchg );
7558 %}
7559 
7560 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7561   predicate(n->as_LoadStore()->result_not_used());
7562   match(Set dummy (GetAndAddL mem add));
7563   effect(KILL cr);
7564   format %{ "ADDQ  [$mem],$add" %}
7565   ins_encode %{
7566     if (os::is_MP()) { __ lock(); }
7567     __ addq($mem$$Address, $add$$constant);
7568   %}
7569   ins_pipe( pipe_cmpxchg );
7570 %}
7571 
7572 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7573   match(Set newval (GetAndAddL mem newval));
7574   effect(KILL cr);
7575   format %{ "XADDQ  [$mem],$newval" %}
7576   ins_encode %{
7577     if (os::is_MP()) { __ lock(); }
7578     __ xaddq($mem$$Address, $newval$$Register);
7579   %}
7580   ins_pipe( pipe_cmpxchg );
7581 %}
7582 
7583 instruct xchgI( memory mem, rRegI newval) %{
7584   match(Set newval (GetAndSetI mem newval));
7585   format %{ "XCHGL  $newval,[$mem]" %}
7586   ins_encode %{
7587     __ xchgl($newval$$Register, $mem$$Address);
7588   %}
7589   ins_pipe( pipe_cmpxchg );
7590 %}
7591 
7592 instruct xchgL( memory mem, rRegL newval) %{
7593   match(Set newval (GetAndSetL mem newval));
7594   format %{ "XCHGL  $newval,[$mem]" %}
7595   ins_encode %{
7596     __ xchgq($newval$$Register, $mem$$Address);
7597   %}
7598   ins_pipe( pipe_cmpxchg );
7599 %}
7600 
7601 instruct xchgP( memory mem, rRegP newval) %{
7602   match(Set newval (GetAndSetP mem newval));
7603   format %{ "XCHGQ  $newval,[$mem]" %}
7604   ins_encode %{
7605     __ xchgq($newval$$Register, $mem$$Address);
7606   %}
7607   ins_pipe( pipe_cmpxchg );
7608 %}
7609 
7610 instruct xchgN( memory mem, rRegN newval) %{
7611   match(Set newval (GetAndSetN mem newval));
7612   format %{ "XCHGL  $newval,$mem]" %}
7613   ins_encode %{
7614     __ xchgl($newval$$Register, $mem$$Address);
7615   %}
7616   ins_pipe( pipe_cmpxchg );
7617 %}
7618 
7619 //----------Subtraction Instructions-------------------------------------------
7620 
7621 // Integer Subtraction Instructions
7622 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7623 %{
7624   match(Set dst (SubI dst src));
7625   effect(KILL cr);
7626 
7627   format %{ "subl    $dst, $src\t# int" %}
7628   opcode(0x2B);
7629   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7630   ins_pipe(ialu_reg_reg);
7631 %}
7632 
7633 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7634 %{
7635   match(Set dst (SubI dst src));
7636   effect(KILL cr);
7637 
7638   format %{ "subl    $dst, $src\t# int" %}
7639   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7640   ins_encode(OpcSErm(dst, src), Con8or32(src));
7641   ins_pipe(ialu_reg);
7642 %}
7643 
7644 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7645 %{
7646   match(Set dst (SubI dst (LoadI src)));
7647   effect(KILL cr);
7648 
7649   ins_cost(125);
7650   format %{ "subl    $dst, $src\t# int" %}
7651   opcode(0x2B);
7652   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7653   ins_pipe(ialu_reg_mem);
7654 %}
7655 
7656 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7657 %{
7658   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7659   effect(KILL cr);
7660 
7661   ins_cost(150);
7662   format %{ "subl    $dst, $src\t# int" %}
7663   opcode(0x29); /* Opcode 29 /r */
7664   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7665   ins_pipe(ialu_mem_reg);
7666 %}
7667 
7668 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7669 %{
7670   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7671   effect(KILL cr);
7672 
7673   ins_cost(125); // XXX
7674   format %{ "subl    $dst, $src\t# int" %}
7675   opcode(0x81); /* Opcode 81 /5 id */
7676   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7677   ins_pipe(ialu_mem_imm);
7678 %}
7679 
7680 instruct subExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
7681 %{
7682   match(SubExactI dst src);
7683   effect(DEF cr);
7684 
7685   format %{ "subl    $dst, $src\t# subExact int" %}
7686   ins_encode %{
7687     __ subl($dst$$Register, $src$$Register);
7688   %}
7689   ins_pipe(ialu_reg_reg);
7690 %}
7691 
7692 instruct subExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
7693 %{
7694   match(SubExactI dst src);
7695   effect(DEF cr);
7696 
7697   format %{ "subl    $dst, $src\t# subExact int" %}
7698   ins_encode %{
7699     __ subl($dst$$Register, $src$$constant);
7700   %}
7701   ins_pipe(ialu_reg_reg);
7702 %}
7703 
7704 instruct subExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
7705 %{
7706   match(SubExactI dst (LoadI src));
7707   effect(DEF cr);
7708 
7709   ins_cost(125);
7710   format %{ "subl    $dst, $src\t# subExact int" %}
7711   ins_encode %{
7712     __ subl($dst$$Register, $src$$Address);
7713   %}
7714   ins_pipe(ialu_reg_mem);
7715 %}
7716 
7717 instruct subExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
7718 %{
7719   match(SubExactL dst src);
7720   effect(DEF cr);
7721 
7722   format %{ "subq    $dst, $src\t# subExact long" %}
7723   ins_encode %{
7724     __ subq($dst$$Register, $src$$Register);
7725   %}
7726   ins_pipe(ialu_reg_reg);
7727 %}
7728 
7729 instruct subExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
7730 %{
7731   match(SubExactL dst (LoadL src));
7732   effect(DEF cr);
7733 
7734   format %{ "subq    $dst, $src\t# subExact long" %}
7735   ins_encode %{
7736     __ subq($dst$$Register, $src$$constant);
7737   %}
7738   ins_pipe(ialu_reg_reg);
7739 %}
7740 
7741 instruct subExactL_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
7742 %{
7743   match(SubExactI dst src);
7744   effect(DEF cr);
7745 
7746   ins_cost(125);
7747   format %{ "subq    $dst, $src\t# subExact long" %}
7748   ins_encode %{
7749     __ subq($dst$$Register, $src$$Address);
7750   %}
7751   ins_pipe(ialu_reg_mem);
7752 %}
7753 
7754 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7755 %{
7756   match(Set dst (SubL dst src));
7757   effect(KILL cr);
7758 
7759   format %{ "subq    $dst, $src\t# long" %}
7760   opcode(0x2B);
7761   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7762   ins_pipe(ialu_reg_reg);
7763 %}
7764 
7765 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7766 %{
7767   match(Set dst (SubL dst src));
7768   effect(KILL cr);
7769 
7770   format %{ "subq    $dst, $src\t# long" %}
7771   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7772   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7773   ins_pipe(ialu_reg);
7774 %}
7775 
7776 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7777 %{
7778   match(Set dst (SubL dst (LoadL src)));
7779   effect(KILL cr);
7780 
7781   ins_cost(125);
7782   format %{ "subq    $dst, $src\t# long" %}
7783   opcode(0x2B);
7784   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7785   ins_pipe(ialu_reg_mem);
7786 %}
7787 
7788 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7789 %{
7790   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7791   effect(KILL cr);
7792 
7793   ins_cost(150);
7794   format %{ "subq    $dst, $src\t# long" %}
7795   opcode(0x29); /* Opcode 29 /r */
7796   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7797   ins_pipe(ialu_mem_reg);
7798 %}
7799 
7800 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7801 %{
7802   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7803   effect(KILL cr);
7804 
7805   ins_cost(125); // XXX
7806   format %{ "subq    $dst, $src\t# long" %}
7807   opcode(0x81); /* Opcode 81 /5 id */
7808   ins_encode(REX_mem_wide(dst),
7809              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7810   ins_pipe(ialu_mem_imm);
7811 %}
7812 
7813 // Subtract from a pointer
7814 // XXX hmpf???
7815 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7816 %{
7817   match(Set dst (AddP dst (SubI zero src)));
7818   effect(KILL cr);
7819 
7820   format %{ "subq    $dst, $src\t# ptr - int" %}
7821   opcode(0x2B);
7822   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7823   ins_pipe(ialu_reg_reg);
7824 %}
7825 
7826 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7827 %{
7828   match(Set dst (SubI zero dst));
7829   effect(KILL cr);
7830 
7831   format %{ "negl    $dst\t# int" %}
7832   opcode(0xF7, 0x03);  // Opcode F7 /3
7833   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7834   ins_pipe(ialu_reg);
7835 %}
7836 
7837 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7838 %{
7839   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7840   effect(KILL cr);
7841 
7842   format %{ "negl    $dst\t# int" %}
7843   opcode(0xF7, 0x03);  // Opcode F7 /3
7844   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7845   ins_pipe(ialu_reg);
7846 %}
7847 
7848 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7849 %{
7850   match(Set dst (SubL zero dst));
7851   effect(KILL cr);
7852 
7853   format %{ "negq    $dst\t# long" %}
7854   opcode(0xF7, 0x03);  // Opcode F7 /3
7855   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7856   ins_pipe(ialu_reg);
7857 %}
7858 
7859 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7860 %{
7861   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7862   effect(KILL cr);
7863 
7864   format %{ "negq    $dst\t# long" %}
7865   opcode(0xF7, 0x03);  // Opcode F7 /3
7866   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7867   ins_pipe(ialu_reg);
7868 %}
7869 
7870 instruct negExactI_rReg(rax_RegI dst, rFlagsReg cr)
7871 %{
7872   match(NegExactI dst);
7873   effect(KILL cr);
7874 
7875   format %{ "negl    $dst\t# negExact int" %}
7876   ins_encode %{
7877     __ negl($dst$$Register);
7878   %}
7879   ins_pipe(ialu_reg);
7880 %}
7881 
7882 instruct negExactL_rReg(rax_RegL dst, rFlagsReg cr)
7883 %{
7884   match(NegExactL dst);
7885   effect(KILL cr);
7886 
7887   format %{ "negq    $dst\t# negExact long" %}
7888   ins_encode %{
7889     __ negq($dst$$Register);
7890   %}
7891   ins_pipe(ialu_reg);
7892 %}
7893 
7894 
7895 //----------Multiplication/Division Instructions-------------------------------
7896 // Integer Multiplication Instructions
7897 // Multiply Register
7898 
7899 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7900 %{
7901   match(Set dst (MulI dst src));
7902   effect(KILL cr);
7903 
7904   ins_cost(300);
7905   format %{ "imull   $dst, $src\t# int" %}
7906   opcode(0x0F, 0xAF);
7907   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7908   ins_pipe(ialu_reg_reg_alu0);
7909 %}
7910 
7911 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7912 %{
7913   match(Set dst (MulI src imm));
7914   effect(KILL cr);
7915 
7916   ins_cost(300);
7917   format %{ "imull   $dst, $src, $imm\t# int" %}
7918   opcode(0x69); /* 69 /r id */
7919   ins_encode(REX_reg_reg(dst, src),
7920              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7921   ins_pipe(ialu_reg_reg_alu0);
7922 %}
7923 
7924 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7925 %{
7926   match(Set dst (MulI dst (LoadI src)));
7927   effect(KILL cr);
7928 
7929   ins_cost(350);
7930   format %{ "imull   $dst, $src\t# int" %}
7931   opcode(0x0F, 0xAF);
7932   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7933   ins_pipe(ialu_reg_mem_alu0);
7934 %}
7935 
7936 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7937 %{
7938   match(Set dst (MulI (LoadI src) imm));
7939   effect(KILL cr);
7940 
7941   ins_cost(300);
7942   format %{ "imull   $dst, $src, $imm\t# int" %}
7943   opcode(0x69); /* 69 /r id */
7944   ins_encode(REX_reg_mem(dst, src),
7945              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7946   ins_pipe(ialu_reg_mem_alu0);
7947 %}
7948 
7949 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7950 %{
7951   match(Set dst (MulL dst src));
7952   effect(KILL cr);
7953 
7954   ins_cost(300);
7955   format %{ "imulq   $dst, $src\t# long" %}
7956   opcode(0x0F, 0xAF);
7957   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7958   ins_pipe(ialu_reg_reg_alu0);
7959 %}
7960 
7961 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7962 %{
7963   match(Set dst (MulL src imm));
7964   effect(KILL cr);
7965 
7966   ins_cost(300);
7967   format %{ "imulq   $dst, $src, $imm\t# long" %}
7968   opcode(0x69); /* 69 /r id */
7969   ins_encode(REX_reg_reg_wide(dst, src),
7970              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7971   ins_pipe(ialu_reg_reg_alu0);
7972 %}
7973 
7974 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7975 %{
7976   match(Set dst (MulL dst (LoadL src)));
7977   effect(KILL cr);
7978 
7979   ins_cost(350);
7980   format %{ "imulq   $dst, $src\t# long" %}
7981   opcode(0x0F, 0xAF);
7982   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7983   ins_pipe(ialu_reg_mem_alu0);
7984 %}
7985 
7986 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7987 %{
7988   match(Set dst (MulL (LoadL src) imm));
7989   effect(KILL cr);
7990 
7991   ins_cost(300);
7992   format %{ "imulq   $dst, $src, $imm\t# long" %}
7993   opcode(0x69); /* 69 /r id */
7994   ins_encode(REX_reg_mem_wide(dst, src),
7995              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7996   ins_pipe(ialu_reg_mem_alu0);
7997 %}
7998 
7999 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8000 %{
8001   match(Set dst (MulHiL src rax));
8002   effect(USE_KILL rax, KILL cr);
8003 
8004   ins_cost(300);
8005   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8006   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8007   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8008   ins_pipe(ialu_reg_reg_alu0);
8009 %}
8010 
8011 
8012 instruct mulExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
8013 %{
8014   match(MulExactI dst src);
8015   effect(DEF cr);
8016 
8017   ins_cost(300);
8018   format %{ "imull   $dst, $src\t# mulExact int" %}
8019   ins_encode %{
8020     __ imull($dst$$Register, $src$$Register);
8021   %}
8022   ins_pipe(ialu_reg_reg_alu0);
8023 %}
8024 
8025 
8026 instruct mulExactI_rReg_imm(rax_RegI dst, rRegI src, immI imm, rFlagsReg cr)
8027 %{
8028   match(MulExactI src imm);
8029   effect(DEF cr);
8030 
8031   ins_cost(300);
8032   format %{ "imull   $dst, $src, $imm\t# mulExact int" %}
8033   ins_encode %{
8034     __ imull($dst$$Register, $src$$Register, $imm$$constant);
8035   %}
8036   ins_pipe(ialu_reg_reg_alu0);
8037 %}
8038 
8039 instruct mulExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
8040 %{
8041   match(MulExactI dst (LoadI src));
8042   effect(DEF cr);
8043 
8044   ins_cost(350);
8045   format %{ "imull   $dst, $src\t# mulExact int" %}
8046   ins_encode %{
8047     __ imull($dst$$Register, $src$$Address);
8048   %}
8049   ins_pipe(ialu_reg_mem_alu0);
8050 %}
8051 
8052 instruct mulExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
8053 %{
8054   match(MulExactL dst src);
8055   effect(DEF cr);
8056 
8057   ins_cost(300);
8058   format %{ "imulq   $dst, $src\t# mulExact long" %}
8059   ins_encode %{
8060     __ imulq($dst$$Register, $src$$Register);
8061   %}
8062   ins_pipe(ialu_reg_reg_alu0);
8063 %}
8064 
8065 instruct mulExactL_rReg_imm(rax_RegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8066 %{
8067   match(MulExactL src imm);
8068   effect(DEF cr);
8069 
8070   ins_cost(300);
8071   format %{ "imulq   $dst, $src, $imm\t# mulExact long" %}
8072   ins_encode %{
8073     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
8074   %}
8075   ins_pipe(ialu_reg_reg_alu0);
8076 %}
8077 
8078 instruct mulExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
8079 %{
8080   match(MulExactL dst (LoadL src));
8081   effect(DEF cr);
8082 
8083   ins_cost(350);
8084   format %{ "imulq   $dst, $src\t# mulExact long" %}
8085   ins_encode %{
8086     __ imulq($dst$$Register, $src$$Address);
8087   %}
8088   ins_pipe(ialu_reg_mem_alu0);
8089 %}
8090 
8091 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8092                    rFlagsReg cr)
8093 %{
8094   match(Set rax (DivI rax div));
8095   effect(KILL rdx, KILL cr);
8096 
8097   ins_cost(30*100+10*100); // XXX
8098   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8099             "jne,s   normal\n\t"
8100             "xorl    rdx, rdx\n\t"
8101             "cmpl    $div, -1\n\t"
8102             "je,s    done\n"
8103     "normal: cdql\n\t"
8104             "idivl   $div\n"
8105     "done:"        %}
8106   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8107   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8108   ins_pipe(ialu_reg_reg_alu0);
8109 %}
8110 
8111 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8112                    rFlagsReg cr)
8113 %{
8114   match(Set rax (DivL rax div));
8115   effect(KILL rdx, KILL cr);
8116 
8117   ins_cost(30*100+10*100); // XXX
8118   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8119             "cmpq    rax, rdx\n\t"
8120             "jne,s   normal\n\t"
8121             "xorl    rdx, rdx\n\t"
8122             "cmpq    $div, -1\n\t"
8123             "je,s    done\n"
8124     "normal: cdqq\n\t"
8125             "idivq   $div\n"
8126     "done:"        %}
8127   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8128   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8129   ins_pipe(ialu_reg_reg_alu0);
8130 %}
8131 
8132 // Integer DIVMOD with Register, both quotient and mod results
8133 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8134                              rFlagsReg cr)
8135 %{
8136   match(DivModI rax div);
8137   effect(KILL cr);
8138 
8139   ins_cost(30*100+10*100); // XXX
8140   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8141             "jne,s   normal\n\t"
8142             "xorl    rdx, rdx\n\t"
8143             "cmpl    $div, -1\n\t"
8144             "je,s    done\n"
8145     "normal: cdql\n\t"
8146             "idivl   $div\n"
8147     "done:"        %}
8148   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8149   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8150   ins_pipe(pipe_slow);
8151 %}
8152 
8153 // Long DIVMOD with Register, both quotient and mod results
8154 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8155                              rFlagsReg cr)
8156 %{
8157   match(DivModL rax div);
8158   effect(KILL cr);
8159 
8160   ins_cost(30*100+10*100); // XXX
8161   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8162             "cmpq    rax, rdx\n\t"
8163             "jne,s   normal\n\t"
8164             "xorl    rdx, rdx\n\t"
8165             "cmpq    $div, -1\n\t"
8166             "je,s    done\n"
8167     "normal: cdqq\n\t"
8168             "idivq   $div\n"
8169     "done:"        %}
8170   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8171   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8172   ins_pipe(pipe_slow);
8173 %}
8174 
8175 //----------- DivL-By-Constant-Expansions--------------------------------------
8176 // DivI cases are handled by the compiler
8177 
8178 // Magic constant, reciprocal of 10
8179 instruct loadConL_0x6666666666666667(rRegL dst)
8180 %{
8181   effect(DEF dst);
8182 
8183   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8184   ins_encode(load_immL(dst, 0x6666666666666667));
8185   ins_pipe(ialu_reg);
8186 %}
8187 
8188 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8189 %{
8190   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8191 
8192   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8193   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8194   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8195   ins_pipe(ialu_reg_reg_alu0);
8196 %}
8197 
8198 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8199 %{
8200   effect(USE_DEF dst, KILL cr);
8201 
8202   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8203   opcode(0xC1, 0x7); /* C1 /7 ib */
8204   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8205   ins_pipe(ialu_reg);
8206 %}
8207 
8208 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8209 %{
8210   effect(USE_DEF dst, KILL cr);
8211 
8212   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8213   opcode(0xC1, 0x7); /* C1 /7 ib */
8214   ins_encode(reg_opc_imm_wide(dst, 0x2));
8215   ins_pipe(ialu_reg);
8216 %}
8217 
8218 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8219 %{
8220   match(Set dst (DivL src div));
8221 
8222   ins_cost((5+8)*100);
8223   expand %{
8224     rax_RegL rax;                     // Killed temp
8225     rFlagsReg cr;                     // Killed
8226     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8227     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8228     sarL_rReg_63(src, cr);            // sarq  src, 63
8229     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8230     subL_rReg(dst, src, cr);          // subl  rdx, src
8231   %}
8232 %}
8233 
8234 //-----------------------------------------------------------------------------
8235 
8236 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8237                    rFlagsReg cr)
8238 %{
8239   match(Set rdx (ModI rax div));
8240   effect(KILL rax, KILL cr);
8241 
8242   ins_cost(300); // XXX
8243   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8244             "jne,s   normal\n\t"
8245             "xorl    rdx, rdx\n\t"
8246             "cmpl    $div, -1\n\t"
8247             "je,s    done\n"
8248     "normal: cdql\n\t"
8249             "idivl   $div\n"
8250     "done:"        %}
8251   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8252   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8253   ins_pipe(ialu_reg_reg_alu0);
8254 %}
8255 
8256 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8257                    rFlagsReg cr)
8258 %{
8259   match(Set rdx (ModL rax div));
8260   effect(KILL rax, KILL cr);
8261 
8262   ins_cost(300); // XXX
8263   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8264             "cmpq    rax, rdx\n\t"
8265             "jne,s   normal\n\t"
8266             "xorl    rdx, rdx\n\t"
8267             "cmpq    $div, -1\n\t"
8268             "je,s    done\n"
8269     "normal: cdqq\n\t"
8270             "idivq   $div\n"
8271     "done:"        %}
8272   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8273   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8274   ins_pipe(ialu_reg_reg_alu0);
8275 %}
8276 
8277 // Integer Shift Instructions
8278 // Shift Left by one
8279 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8280 %{
8281   match(Set dst (LShiftI dst shift));
8282   effect(KILL cr);
8283 
8284   format %{ "sall    $dst, $shift" %}
8285   opcode(0xD1, 0x4); /* D1 /4 */
8286   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8287   ins_pipe(ialu_reg);
8288 %}
8289 
8290 // Shift Left by one
8291 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8292 %{
8293   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8294   effect(KILL cr);
8295 
8296   format %{ "sall    $dst, $shift\t" %}
8297   opcode(0xD1, 0x4); /* D1 /4 */
8298   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8299   ins_pipe(ialu_mem_imm);
8300 %}
8301 
8302 // Shift Left by 8-bit immediate
8303 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8304 %{
8305   match(Set dst (LShiftI dst shift));
8306   effect(KILL cr);
8307 
8308   format %{ "sall    $dst, $shift" %}
8309   opcode(0xC1, 0x4); /* C1 /4 ib */
8310   ins_encode(reg_opc_imm(dst, shift));
8311   ins_pipe(ialu_reg);
8312 %}
8313 
8314 // Shift Left by 8-bit immediate
8315 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8316 %{
8317   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8318   effect(KILL cr);
8319 
8320   format %{ "sall    $dst, $shift" %}
8321   opcode(0xC1, 0x4); /* C1 /4 ib */
8322   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8323   ins_pipe(ialu_mem_imm);
8324 %}
8325 
8326 // Shift Left by variable
8327 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8328 %{
8329   match(Set dst (LShiftI dst shift));
8330   effect(KILL cr);
8331 
8332   format %{ "sall    $dst, $shift" %}
8333   opcode(0xD3, 0x4); /* D3 /4 */
8334   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8335   ins_pipe(ialu_reg_reg);
8336 %}
8337 
8338 // Shift Left by variable
8339 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8340 %{
8341   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8342   effect(KILL cr);
8343 
8344   format %{ "sall    $dst, $shift" %}
8345   opcode(0xD3, 0x4); /* D3 /4 */
8346   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8347   ins_pipe(ialu_mem_reg);
8348 %}
8349 
8350 // Arithmetic shift right by one
8351 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8352 %{
8353   match(Set dst (RShiftI dst shift));
8354   effect(KILL cr);
8355 
8356   format %{ "sarl    $dst, $shift" %}
8357   opcode(0xD1, 0x7); /* D1 /7 */
8358   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8359   ins_pipe(ialu_reg);
8360 %}
8361 
8362 // Arithmetic shift right by one
8363 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8364 %{
8365   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8366   effect(KILL cr);
8367 
8368   format %{ "sarl    $dst, $shift" %}
8369   opcode(0xD1, 0x7); /* D1 /7 */
8370   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8371   ins_pipe(ialu_mem_imm);
8372 %}
8373 
8374 // Arithmetic Shift Right by 8-bit immediate
8375 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8376 %{
8377   match(Set dst (RShiftI dst shift));
8378   effect(KILL cr);
8379 
8380   format %{ "sarl    $dst, $shift" %}
8381   opcode(0xC1, 0x7); /* C1 /7 ib */
8382   ins_encode(reg_opc_imm(dst, shift));
8383   ins_pipe(ialu_mem_imm);
8384 %}
8385 
8386 // Arithmetic Shift Right by 8-bit immediate
8387 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8388 %{
8389   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8390   effect(KILL cr);
8391 
8392   format %{ "sarl    $dst, $shift" %}
8393   opcode(0xC1, 0x7); /* C1 /7 ib */
8394   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8395   ins_pipe(ialu_mem_imm);
8396 %}
8397 
8398 // Arithmetic Shift Right by variable
8399 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8400 %{
8401   match(Set dst (RShiftI dst shift));
8402   effect(KILL cr);
8403 
8404   format %{ "sarl    $dst, $shift" %}
8405   opcode(0xD3, 0x7); /* D3 /7 */
8406   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8407   ins_pipe(ialu_reg_reg);
8408 %}
8409 
8410 // Arithmetic Shift Right by variable
8411 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8412 %{
8413   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8414   effect(KILL cr);
8415 
8416   format %{ "sarl    $dst, $shift" %}
8417   opcode(0xD3, 0x7); /* D3 /7 */
8418   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8419   ins_pipe(ialu_mem_reg);
8420 %}
8421 
8422 // Logical shift right by one
8423 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8424 %{
8425   match(Set dst (URShiftI dst shift));
8426   effect(KILL cr);
8427 
8428   format %{ "shrl    $dst, $shift" %}
8429   opcode(0xD1, 0x5); /* D1 /5 */
8430   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8431   ins_pipe(ialu_reg);
8432 %}
8433 
8434 // Logical shift right by one
8435 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8436 %{
8437   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8438   effect(KILL cr);
8439 
8440   format %{ "shrl    $dst, $shift" %}
8441   opcode(0xD1, 0x5); /* D1 /5 */
8442   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8443   ins_pipe(ialu_mem_imm);
8444 %}
8445 
8446 // Logical Shift Right by 8-bit immediate
8447 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8448 %{
8449   match(Set dst (URShiftI dst shift));
8450   effect(KILL cr);
8451 
8452   format %{ "shrl    $dst, $shift" %}
8453   opcode(0xC1, 0x5); /* C1 /5 ib */
8454   ins_encode(reg_opc_imm(dst, shift));
8455   ins_pipe(ialu_reg);
8456 %}
8457 
8458 // Logical Shift Right by 8-bit immediate
8459 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8460 %{
8461   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8462   effect(KILL cr);
8463 
8464   format %{ "shrl    $dst, $shift" %}
8465   opcode(0xC1, 0x5); /* C1 /5 ib */
8466   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8467   ins_pipe(ialu_mem_imm);
8468 %}
8469 
8470 // Logical Shift Right by variable
8471 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8472 %{
8473   match(Set dst (URShiftI dst shift));
8474   effect(KILL cr);
8475 
8476   format %{ "shrl    $dst, $shift" %}
8477   opcode(0xD3, 0x5); /* D3 /5 */
8478   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8479   ins_pipe(ialu_reg_reg);
8480 %}
8481 
8482 // Logical Shift Right by variable
8483 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8484 %{
8485   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8486   effect(KILL cr);
8487 
8488   format %{ "shrl    $dst, $shift" %}
8489   opcode(0xD3, 0x5); /* D3 /5 */
8490   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8491   ins_pipe(ialu_mem_reg);
8492 %}
8493 
8494 // Long Shift Instructions
8495 // Shift Left by one
8496 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8497 %{
8498   match(Set dst (LShiftL dst shift));
8499   effect(KILL cr);
8500 
8501   format %{ "salq    $dst, $shift" %}
8502   opcode(0xD1, 0x4); /* D1 /4 */
8503   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8504   ins_pipe(ialu_reg);
8505 %}
8506 
8507 // Shift Left by one
8508 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8509 %{
8510   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8511   effect(KILL cr);
8512 
8513   format %{ "salq    $dst, $shift" %}
8514   opcode(0xD1, 0x4); /* D1 /4 */
8515   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8516   ins_pipe(ialu_mem_imm);
8517 %}
8518 
8519 // Shift Left by 8-bit immediate
8520 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8521 %{
8522   match(Set dst (LShiftL dst shift));
8523   effect(KILL cr);
8524 
8525   format %{ "salq    $dst, $shift" %}
8526   opcode(0xC1, 0x4); /* C1 /4 ib */
8527   ins_encode(reg_opc_imm_wide(dst, shift));
8528   ins_pipe(ialu_reg);
8529 %}
8530 
8531 // Shift Left by 8-bit immediate
8532 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8533 %{
8534   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8535   effect(KILL cr);
8536 
8537   format %{ "salq    $dst, $shift" %}
8538   opcode(0xC1, 0x4); /* C1 /4 ib */
8539   ins_encode(REX_mem_wide(dst), OpcP,
8540              RM_opc_mem(secondary, dst), Con8or32(shift));
8541   ins_pipe(ialu_mem_imm);
8542 %}
8543 
8544 // Shift Left by variable
8545 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8546 %{
8547   match(Set dst (LShiftL dst shift));
8548   effect(KILL cr);
8549 
8550   format %{ "salq    $dst, $shift" %}
8551   opcode(0xD3, 0x4); /* D3 /4 */
8552   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8553   ins_pipe(ialu_reg_reg);
8554 %}
8555 
8556 // Shift Left by variable
8557 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8558 %{
8559   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8560   effect(KILL cr);
8561 
8562   format %{ "salq    $dst, $shift" %}
8563   opcode(0xD3, 0x4); /* D3 /4 */
8564   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8565   ins_pipe(ialu_mem_reg);
8566 %}
8567 
8568 // Arithmetic shift right by one
8569 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8570 %{
8571   match(Set dst (RShiftL dst shift));
8572   effect(KILL cr);
8573 
8574   format %{ "sarq    $dst, $shift" %}
8575   opcode(0xD1, 0x7); /* D1 /7 */
8576   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8577   ins_pipe(ialu_reg);
8578 %}
8579 
8580 // Arithmetic shift right by one
8581 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8582 %{
8583   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8584   effect(KILL cr);
8585 
8586   format %{ "sarq    $dst, $shift" %}
8587   opcode(0xD1, 0x7); /* D1 /7 */
8588   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8589   ins_pipe(ialu_mem_imm);
8590 %}
8591 
8592 // Arithmetic Shift Right by 8-bit immediate
8593 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8594 %{
8595   match(Set dst (RShiftL dst shift));
8596   effect(KILL cr);
8597 
8598   format %{ "sarq    $dst, $shift" %}
8599   opcode(0xC1, 0x7); /* C1 /7 ib */
8600   ins_encode(reg_opc_imm_wide(dst, shift));
8601   ins_pipe(ialu_mem_imm);
8602 %}
8603 
8604 // Arithmetic Shift Right by 8-bit immediate
8605 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8606 %{
8607   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8608   effect(KILL cr);
8609 
8610   format %{ "sarq    $dst, $shift" %}
8611   opcode(0xC1, 0x7); /* C1 /7 ib */
8612   ins_encode(REX_mem_wide(dst), OpcP,
8613              RM_opc_mem(secondary, dst), Con8or32(shift));
8614   ins_pipe(ialu_mem_imm);
8615 %}
8616 
8617 // Arithmetic Shift Right by variable
8618 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8619 %{
8620   match(Set dst (RShiftL dst shift));
8621   effect(KILL cr);
8622 
8623   format %{ "sarq    $dst, $shift" %}
8624   opcode(0xD3, 0x7); /* D3 /7 */
8625   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8626   ins_pipe(ialu_reg_reg);
8627 %}
8628 
8629 // Arithmetic Shift Right by variable
8630 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8631 %{
8632   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8633   effect(KILL cr);
8634 
8635   format %{ "sarq    $dst, $shift" %}
8636   opcode(0xD3, 0x7); /* D3 /7 */
8637   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8638   ins_pipe(ialu_mem_reg);
8639 %}
8640 
8641 // Logical shift right by one
8642 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8643 %{
8644   match(Set dst (URShiftL dst shift));
8645   effect(KILL cr);
8646 
8647   format %{ "shrq    $dst, $shift" %}
8648   opcode(0xD1, 0x5); /* D1 /5 */
8649   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8650   ins_pipe(ialu_reg);
8651 %}
8652 
8653 // Logical shift right by one
8654 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8655 %{
8656   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8657   effect(KILL cr);
8658 
8659   format %{ "shrq    $dst, $shift" %}
8660   opcode(0xD1, 0x5); /* D1 /5 */
8661   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8662   ins_pipe(ialu_mem_imm);
8663 %}
8664 
8665 // Logical Shift Right by 8-bit immediate
8666 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8667 %{
8668   match(Set dst (URShiftL dst shift));
8669   effect(KILL cr);
8670 
8671   format %{ "shrq    $dst, $shift" %}
8672   opcode(0xC1, 0x5); /* C1 /5 ib */
8673   ins_encode(reg_opc_imm_wide(dst, shift));
8674   ins_pipe(ialu_reg);
8675 %}
8676 
8677 
8678 // Logical Shift Right by 8-bit immediate
8679 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8680 %{
8681   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8682   effect(KILL cr);
8683 
8684   format %{ "shrq    $dst, $shift" %}
8685   opcode(0xC1, 0x5); /* C1 /5 ib */
8686   ins_encode(REX_mem_wide(dst), OpcP,
8687              RM_opc_mem(secondary, dst), Con8or32(shift));
8688   ins_pipe(ialu_mem_imm);
8689 %}
8690 
8691 // Logical Shift Right by variable
8692 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8693 %{
8694   match(Set dst (URShiftL dst shift));
8695   effect(KILL cr);
8696 
8697   format %{ "shrq    $dst, $shift" %}
8698   opcode(0xD3, 0x5); /* D3 /5 */
8699   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8700   ins_pipe(ialu_reg_reg);
8701 %}
8702 
8703 // Logical Shift Right by variable
8704 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8705 %{
8706   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8707   effect(KILL cr);
8708 
8709   format %{ "shrq    $dst, $shift" %}
8710   opcode(0xD3, 0x5); /* D3 /5 */
8711   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8712   ins_pipe(ialu_mem_reg);
8713 %}
8714 
8715 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8716 // This idiom is used by the compiler for the i2b bytecode.
8717 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8718 %{
8719   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8720 
8721   format %{ "movsbl  $dst, $src\t# i2b" %}
8722   opcode(0x0F, 0xBE);
8723   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8724   ins_pipe(ialu_reg_reg);
8725 %}
8726 
8727 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8728 // This idiom is used by the compiler the i2s bytecode.
8729 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8730 %{
8731   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8732 
8733   format %{ "movswl  $dst, $src\t# i2s" %}
8734   opcode(0x0F, 0xBF);
8735   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8736   ins_pipe(ialu_reg_reg);
8737 %}
8738 
8739 // ROL/ROR instructions
8740 
8741 // ROL expand
8742 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8743   effect(KILL cr, USE_DEF dst);
8744 
8745   format %{ "roll    $dst" %}
8746   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8747   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8748   ins_pipe(ialu_reg);
8749 %}
8750 
8751 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8752   effect(USE_DEF dst, USE shift, KILL cr);
8753 
8754   format %{ "roll    $dst, $shift" %}
8755   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8756   ins_encode( reg_opc_imm(dst, shift) );
8757   ins_pipe(ialu_reg);
8758 %}
8759 
8760 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8761 %{
8762   effect(USE_DEF dst, USE shift, KILL cr);
8763 
8764   format %{ "roll    $dst, $shift" %}
8765   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8766   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8767   ins_pipe(ialu_reg_reg);
8768 %}
8769 // end of ROL expand
8770 
8771 // Rotate Left by one
8772 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8773 %{
8774   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8775 
8776   expand %{
8777     rolI_rReg_imm1(dst, cr);
8778   %}
8779 %}
8780 
8781 // Rotate Left by 8-bit immediate
8782 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8783 %{
8784   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8785   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8786 
8787   expand %{
8788     rolI_rReg_imm8(dst, lshift, cr);
8789   %}
8790 %}
8791 
8792 // Rotate Left by variable
8793 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8794 %{
8795   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8796 
8797   expand %{
8798     rolI_rReg_CL(dst, shift, cr);
8799   %}
8800 %}
8801 
8802 // Rotate Left by variable
8803 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8804 %{
8805   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8806 
8807   expand %{
8808     rolI_rReg_CL(dst, shift, cr);
8809   %}
8810 %}
8811 
8812 // ROR expand
8813 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8814 %{
8815   effect(USE_DEF dst, KILL cr);
8816 
8817   format %{ "rorl    $dst" %}
8818   opcode(0xD1, 0x1); /* D1 /1 */
8819   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8820   ins_pipe(ialu_reg);
8821 %}
8822 
8823 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8824 %{
8825   effect(USE_DEF dst, USE shift, KILL cr);
8826 
8827   format %{ "rorl    $dst, $shift" %}
8828   opcode(0xC1, 0x1); /* C1 /1 ib */
8829   ins_encode(reg_opc_imm(dst, shift));
8830   ins_pipe(ialu_reg);
8831 %}
8832 
8833 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8834 %{
8835   effect(USE_DEF dst, USE shift, KILL cr);
8836 
8837   format %{ "rorl    $dst, $shift" %}
8838   opcode(0xD3, 0x1); /* D3 /1 */
8839   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8840   ins_pipe(ialu_reg_reg);
8841 %}
8842 // end of ROR expand
8843 
8844 // Rotate Right by one
8845 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8846 %{
8847   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8848 
8849   expand %{
8850     rorI_rReg_imm1(dst, cr);
8851   %}
8852 %}
8853 
8854 // Rotate Right by 8-bit immediate
8855 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8856 %{
8857   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8858   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8859 
8860   expand %{
8861     rorI_rReg_imm8(dst, rshift, cr);
8862   %}
8863 %}
8864 
8865 // Rotate Right by variable
8866 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8867 %{
8868   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8869 
8870   expand %{
8871     rorI_rReg_CL(dst, shift, cr);
8872   %}
8873 %}
8874 
8875 // Rotate Right by variable
8876 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8877 %{
8878   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8879 
8880   expand %{
8881     rorI_rReg_CL(dst, shift, cr);
8882   %}
8883 %}
8884 
8885 // for long rotate
8886 // ROL expand
8887 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8888   effect(USE_DEF dst, KILL cr);
8889 
8890   format %{ "rolq    $dst" %}
8891   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8892   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8893   ins_pipe(ialu_reg);
8894 %}
8895 
8896 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8897   effect(USE_DEF dst, USE shift, KILL cr);
8898 
8899   format %{ "rolq    $dst, $shift" %}
8900   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8901   ins_encode( reg_opc_imm_wide(dst, shift) );
8902   ins_pipe(ialu_reg);
8903 %}
8904 
8905 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8906 %{
8907   effect(USE_DEF dst, USE shift, KILL cr);
8908 
8909   format %{ "rolq    $dst, $shift" %}
8910   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8911   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8912   ins_pipe(ialu_reg_reg);
8913 %}
8914 // end of ROL expand
8915 
8916 // Rotate Left by one
8917 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8918 %{
8919   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8920 
8921   expand %{
8922     rolL_rReg_imm1(dst, cr);
8923   %}
8924 %}
8925 
8926 // Rotate Left by 8-bit immediate
8927 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8928 %{
8929   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8930   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8931 
8932   expand %{
8933     rolL_rReg_imm8(dst, lshift, cr);
8934   %}
8935 %}
8936 
8937 // Rotate Left by variable
8938 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8939 %{
8940   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8941 
8942   expand %{
8943     rolL_rReg_CL(dst, shift, cr);
8944   %}
8945 %}
8946 
8947 // Rotate Left by variable
8948 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8949 %{
8950   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8951 
8952   expand %{
8953     rolL_rReg_CL(dst, shift, cr);
8954   %}
8955 %}
8956 
8957 // ROR expand
8958 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8959 %{
8960   effect(USE_DEF dst, KILL cr);
8961 
8962   format %{ "rorq    $dst" %}
8963   opcode(0xD1, 0x1); /* D1 /1 */
8964   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8965   ins_pipe(ialu_reg);
8966 %}
8967 
8968 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8969 %{
8970   effect(USE_DEF dst, USE shift, KILL cr);
8971 
8972   format %{ "rorq    $dst, $shift" %}
8973   opcode(0xC1, 0x1); /* C1 /1 ib */
8974   ins_encode(reg_opc_imm_wide(dst, shift));
8975   ins_pipe(ialu_reg);
8976 %}
8977 
8978 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8979 %{
8980   effect(USE_DEF dst, USE shift, KILL cr);
8981 
8982   format %{ "rorq    $dst, $shift" %}
8983   opcode(0xD3, 0x1); /* D3 /1 */
8984   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8985   ins_pipe(ialu_reg_reg);
8986 %}
8987 // end of ROR expand
8988 
8989 // Rotate Right by one
8990 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8991 %{
8992   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8993 
8994   expand %{
8995     rorL_rReg_imm1(dst, cr);
8996   %}
8997 %}
8998 
8999 // Rotate Right by 8-bit immediate
9000 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9001 %{
9002   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9003   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9004 
9005   expand %{
9006     rorL_rReg_imm8(dst, rshift, cr);
9007   %}
9008 %}
9009 
9010 // Rotate Right by variable
9011 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9012 %{
9013   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9014 
9015   expand %{
9016     rorL_rReg_CL(dst, shift, cr);
9017   %}
9018 %}
9019 
9020 // Rotate Right by variable
9021 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9022 %{
9023   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9024 
9025   expand %{
9026     rorL_rReg_CL(dst, shift, cr);
9027   %}
9028 %}
9029 
9030 // Logical Instructions
9031 
9032 // Integer Logical Instructions
9033 
9034 // And Instructions
9035 // And Register with Register
9036 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9037 %{
9038   match(Set dst (AndI dst src));
9039   effect(KILL cr);
9040 
9041   format %{ "andl    $dst, $src\t# int" %}
9042   opcode(0x23);
9043   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9044   ins_pipe(ialu_reg_reg);
9045 %}
9046 
9047 // And Register with Immediate 255
9048 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9049 %{
9050   match(Set dst (AndI dst src));
9051 
9052   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9053   opcode(0x0F, 0xB6);
9054   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9055   ins_pipe(ialu_reg);
9056 %}
9057 
9058 // And Register with Immediate 255 and promote to long
9059 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9060 %{
9061   match(Set dst (ConvI2L (AndI src mask)));
9062 
9063   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9064   opcode(0x0F, 0xB6);
9065   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9066   ins_pipe(ialu_reg);
9067 %}
9068 
9069 // And Register with Immediate 65535
9070 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9071 %{
9072   match(Set dst (AndI dst src));
9073 
9074   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9075   opcode(0x0F, 0xB7);
9076   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9077   ins_pipe(ialu_reg);
9078 %}
9079 
9080 // And Register with Immediate 65535 and promote to long
9081 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9082 %{
9083   match(Set dst (ConvI2L (AndI src mask)));
9084 
9085   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9086   opcode(0x0F, 0xB7);
9087   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9088   ins_pipe(ialu_reg);
9089 %}
9090 
9091 // And Register with Immediate
9092 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9093 %{
9094   match(Set dst (AndI dst src));
9095   effect(KILL cr);
9096 
9097   format %{ "andl    $dst, $src\t# int" %}
9098   opcode(0x81, 0x04); /* Opcode 81 /4 */
9099   ins_encode(OpcSErm(dst, src), Con8or32(src));
9100   ins_pipe(ialu_reg);
9101 %}
9102 
9103 // And Register with Memory
9104 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9105 %{
9106   match(Set dst (AndI dst (LoadI src)));
9107   effect(KILL cr);
9108 
9109   ins_cost(125);
9110   format %{ "andl    $dst, $src\t# int" %}
9111   opcode(0x23);
9112   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9113   ins_pipe(ialu_reg_mem);
9114 %}
9115 
9116 // And Memory with Register
9117 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9118 %{
9119   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9120   effect(KILL cr);
9121 
9122   ins_cost(150);
9123   format %{ "andl    $dst, $src\t# int" %}
9124   opcode(0x21); /* Opcode 21 /r */
9125   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9126   ins_pipe(ialu_mem_reg);
9127 %}
9128 
9129 // And Memory with Immediate
9130 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9131 %{
9132   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9133   effect(KILL cr);
9134 
9135   ins_cost(125);
9136   format %{ "andl    $dst, $src\t# int" %}
9137   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9138   ins_encode(REX_mem(dst), OpcSE(src),
9139              RM_opc_mem(secondary, dst), Con8or32(src));
9140   ins_pipe(ialu_mem_imm);
9141 %}
9142 
9143 // BMI1 instructions
9144 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
9145   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
9146   predicate(UseBMI1Instructions);
9147   effect(KILL cr);
9148 
9149   ins_cost(125);
9150   format %{ "andnl  $dst, $src1, $src2" %}
9151 
9152   ins_encode %{
9153     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
9154   %}
9155   ins_pipe(ialu_reg_mem);
9156 %}
9157 
9158 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
9159   match(Set dst (AndI (XorI src1 minus_1) src2));
9160   predicate(UseBMI1Instructions);
9161   effect(KILL cr);
9162 
9163   format %{ "andnl  $dst, $src1, $src2" %}
9164 
9165   ins_encode %{
9166     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
9167   %}
9168   ins_pipe(ialu_reg);
9169 %}
9170 
9171 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
9172   match(Set dst (AndI (SubI imm_zero src) src));
9173   predicate(UseBMI1Instructions);
9174   effect(KILL cr);
9175 
9176   format %{ "blsil  $dst, $src" %}
9177 
9178   ins_encode %{
9179     __ blsil($dst$$Register, $src$$Register);
9180   %}
9181   ins_pipe(ialu_reg);
9182 %}
9183 
9184 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
9185   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
9186   predicate(UseBMI1Instructions);
9187   effect(KILL cr);
9188 
9189   ins_cost(125);
9190   format %{ "blsil  $dst, $src" %}
9191 
9192   ins_encode %{
9193     __ blsil($dst$$Register, $src$$Address);
9194   %}
9195   ins_pipe(ialu_reg_mem);
9196 %}
9197 
9198 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9199 %{
9200   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
9201   predicate(UseBMI1Instructions);
9202   effect(KILL cr);
9203 
9204   ins_cost(125);
9205   format %{ "blsmskl $dst, $src" %}
9206 
9207   ins_encode %{
9208     __ blsmskl($dst$$Register, $src$$Address);
9209   %}
9210   ins_pipe(ialu_reg_mem);
9211 %}
9212 
9213 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9214 %{
9215   match(Set dst (XorI (AddI src minus_1) src));
9216   predicate(UseBMI1Instructions);
9217   effect(KILL cr);
9218 
9219   format %{ "blsmskl $dst, $src" %}
9220 
9221   ins_encode %{
9222     __ blsmskl($dst$$Register, $src$$Register);
9223   %}
9224 
9225   ins_pipe(ialu_reg);
9226 %}
9227 
9228 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9229 %{
9230   match(Set dst (AndI (AddI src minus_1) src) );
9231   predicate(UseBMI1Instructions);
9232   effect(KILL cr);
9233 
9234   format %{ "blsrl  $dst, $src" %}
9235 
9236   ins_encode %{
9237     __ blsrl($dst$$Register, $src$$Register);
9238   %}
9239 
9240   ins_pipe(ialu_reg_mem);
9241 %}
9242 
9243 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9244 %{
9245   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
9246   predicate(UseBMI1Instructions);
9247   effect(KILL cr);
9248 
9249   ins_cost(125);
9250   format %{ "blsrl  $dst, $src" %}
9251 
9252   ins_encode %{
9253     __ blsrl($dst$$Register, $src$$Address);
9254   %}
9255 
9256   ins_pipe(ialu_reg);
9257 %}
9258 
9259 // Or Instructions
9260 // Or Register with Register
9261 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9262 %{
9263   match(Set dst (OrI dst src));
9264   effect(KILL cr);
9265 
9266   format %{ "orl     $dst, $src\t# int" %}
9267   opcode(0x0B);
9268   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9269   ins_pipe(ialu_reg_reg);
9270 %}
9271 
9272 // Or Register with Immediate
9273 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9274 %{
9275   match(Set dst (OrI dst src));
9276   effect(KILL cr);
9277 
9278   format %{ "orl     $dst, $src\t# int" %}
9279   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9280   ins_encode(OpcSErm(dst, src), Con8or32(src));
9281   ins_pipe(ialu_reg);
9282 %}
9283 
9284 // Or Register with Memory
9285 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9286 %{
9287   match(Set dst (OrI dst (LoadI src)));
9288   effect(KILL cr);
9289 
9290   ins_cost(125);
9291   format %{ "orl     $dst, $src\t# int" %}
9292   opcode(0x0B);
9293   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9294   ins_pipe(ialu_reg_mem);
9295 %}
9296 
9297 // Or Memory with Register
9298 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9299 %{
9300   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9301   effect(KILL cr);
9302 
9303   ins_cost(150);
9304   format %{ "orl     $dst, $src\t# int" %}
9305   opcode(0x09); /* Opcode 09 /r */
9306   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9307   ins_pipe(ialu_mem_reg);
9308 %}
9309 
9310 // Or Memory with Immediate
9311 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9312 %{
9313   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9314   effect(KILL cr);
9315 
9316   ins_cost(125);
9317   format %{ "orl     $dst, $src\t# int" %}
9318   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9319   ins_encode(REX_mem(dst), OpcSE(src),
9320              RM_opc_mem(secondary, dst), Con8or32(src));
9321   ins_pipe(ialu_mem_imm);
9322 %}
9323 
9324 // Xor Instructions
9325 // Xor Register with Register
9326 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9327 %{
9328   match(Set dst (XorI dst src));
9329   effect(KILL cr);
9330 
9331   format %{ "xorl    $dst, $src\t# int" %}
9332   opcode(0x33);
9333   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9334   ins_pipe(ialu_reg_reg);
9335 %}
9336 
9337 // Xor Register with Immediate -1
9338 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9339   match(Set dst (XorI dst imm));
9340 
9341   format %{ "not    $dst" %}
9342   ins_encode %{
9343      __ notl($dst$$Register);
9344   %}
9345   ins_pipe(ialu_reg);
9346 %}
9347 
9348 // Xor Register with Immediate
9349 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9350 %{
9351   match(Set dst (XorI dst src));
9352   effect(KILL cr);
9353 
9354   format %{ "xorl    $dst, $src\t# int" %}
9355   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9356   ins_encode(OpcSErm(dst, src), Con8or32(src));
9357   ins_pipe(ialu_reg);
9358 %}
9359 
9360 // Xor Register with Memory
9361 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9362 %{
9363   match(Set dst (XorI dst (LoadI src)));
9364   effect(KILL cr);
9365 
9366   ins_cost(125);
9367   format %{ "xorl    $dst, $src\t# int" %}
9368   opcode(0x33);
9369   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9370   ins_pipe(ialu_reg_mem);
9371 %}
9372 
9373 // Xor Memory with Register
9374 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9375 %{
9376   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9377   effect(KILL cr);
9378 
9379   ins_cost(150);
9380   format %{ "xorl    $dst, $src\t# int" %}
9381   opcode(0x31); /* Opcode 31 /r */
9382   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9383   ins_pipe(ialu_mem_reg);
9384 %}
9385 
9386 // Xor Memory with Immediate
9387 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9388 %{
9389   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9390   effect(KILL cr);
9391 
9392   ins_cost(125);
9393   format %{ "xorl    $dst, $src\t# int" %}
9394   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9395   ins_encode(REX_mem(dst), OpcSE(src),
9396              RM_opc_mem(secondary, dst), Con8or32(src));
9397   ins_pipe(ialu_mem_imm);
9398 %}
9399 
9400 
9401 // Long Logical Instructions
9402 
9403 // And Instructions
9404 // And Register with Register
9405 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9406 %{
9407   match(Set dst (AndL dst src));
9408   effect(KILL cr);
9409 
9410   format %{ "andq    $dst, $src\t# long" %}
9411   opcode(0x23);
9412   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9413   ins_pipe(ialu_reg_reg);
9414 %}
9415 
9416 // And Register with Immediate 255
9417 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9418 %{
9419   match(Set dst (AndL dst src));
9420 
9421   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9422   opcode(0x0F, 0xB6);
9423   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9424   ins_pipe(ialu_reg);
9425 %}
9426 
9427 // And Register with Immediate 65535
9428 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9429 %{
9430   match(Set dst (AndL dst src));
9431 
9432   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9433   opcode(0x0F, 0xB7);
9434   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9435   ins_pipe(ialu_reg);
9436 %}
9437 
9438 // And Register with Immediate
9439 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9440 %{
9441   match(Set dst (AndL dst src));
9442   effect(KILL cr);
9443 
9444   format %{ "andq    $dst, $src\t# long" %}
9445   opcode(0x81, 0x04); /* Opcode 81 /4 */
9446   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9447   ins_pipe(ialu_reg);
9448 %}
9449 
9450 // And Register with Memory
9451 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9452 %{
9453   match(Set dst (AndL dst (LoadL src)));
9454   effect(KILL cr);
9455 
9456   ins_cost(125);
9457   format %{ "andq    $dst, $src\t# long" %}
9458   opcode(0x23);
9459   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9460   ins_pipe(ialu_reg_mem);
9461 %}
9462 
9463 // And Memory with Register
9464 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9465 %{
9466   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9467   effect(KILL cr);
9468 
9469   ins_cost(150);
9470   format %{ "andq    $dst, $src\t# long" %}
9471   opcode(0x21); /* Opcode 21 /r */
9472   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9473   ins_pipe(ialu_mem_reg);
9474 %}
9475 
9476 // And Memory with Immediate
9477 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9478 %{
9479   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9480   effect(KILL cr);
9481 
9482   ins_cost(125);
9483   format %{ "andq    $dst, $src\t# long" %}
9484   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9485   ins_encode(REX_mem_wide(dst), OpcSE(src),
9486              RM_opc_mem(secondary, dst), Con8or32(src));
9487   ins_pipe(ialu_mem_imm);
9488 %}
9489 
9490 // BMI1 instructions
9491 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
9492   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
9493   predicate(UseBMI1Instructions);
9494   effect(KILL cr);
9495 
9496   ins_cost(125);
9497   format %{ "andnq  $dst, $src1, $src2" %}
9498 
9499   ins_encode %{
9500     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
9501   %}
9502   ins_pipe(ialu_reg_mem);
9503 %}
9504 
9505 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
9506   match(Set dst (AndL (XorL src1 minus_1) src2));
9507   predicate(UseBMI1Instructions);
9508   effect(KILL cr);
9509 
9510   format %{ "andnq  $dst, $src1, $src2" %}
9511 
9512   ins_encode %{
9513   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
9514   %}
9515   ins_pipe(ialu_reg_mem);
9516 %}
9517 
9518 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
9519   match(Set dst (AndL (SubL imm_zero src) src));
9520   predicate(UseBMI1Instructions);
9521   effect(KILL cr);
9522 
9523   format %{ "blsiq  $dst, $src" %}
9524 
9525   ins_encode %{
9526     __ blsiq($dst$$Register, $src$$Register);
9527   %}
9528   ins_pipe(ialu_reg);
9529 %}
9530 
9531 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
9532   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9533   predicate(UseBMI1Instructions);
9534   effect(KILL cr);
9535 
9536   ins_cost(125);
9537   format %{ "blsiq  $dst, $src" %}
9538 
9539   ins_encode %{
9540     __ blsiq($dst$$Register, $src$$Address);
9541   %}
9542   ins_pipe(ialu_reg_mem);
9543 %}
9544 
9545 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9546 %{
9547   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
9548   predicate(UseBMI1Instructions);
9549   effect(KILL cr);
9550 
9551   ins_cost(125);
9552   format %{ "blsmskq $dst, $src" %}
9553 
9554   ins_encode %{
9555     __ blsmskq($dst$$Register, $src$$Address);
9556   %}
9557   ins_pipe(ialu_reg_mem);
9558 %}
9559 
9560 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9561 %{
9562   match(Set dst (XorL (AddL src minus_1) src));
9563   predicate(UseBMI1Instructions);
9564   effect(KILL cr);
9565 
9566   format %{ "blsmskq $dst, $src" %}
9567 
9568   ins_encode %{
9569     __ blsmskq($dst$$Register, $src$$Register);
9570   %}
9571 
9572   ins_pipe(ialu_reg);
9573 %}
9574 
9575 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9576 %{
9577   match(Set dst (AndL (AddL src minus_1) src) );
9578   predicate(UseBMI1Instructions);
9579   effect(KILL cr);
9580 
9581   format %{ "blsrq  $dst, $src" %}
9582 
9583   ins_encode %{
9584     __ blsrq($dst$$Register, $src$$Register);
9585   %}
9586 
9587   ins_pipe(ialu_reg);
9588 %}
9589 
9590 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9591 %{
9592   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
9593   predicate(UseBMI1Instructions);
9594   effect(KILL cr);
9595 
9596   ins_cost(125);
9597   format %{ "blsrq  $dst, $src" %}
9598 
9599   ins_encode %{
9600     __ blsrq($dst$$Register, $src$$Address);
9601   %}
9602 
9603   ins_pipe(ialu_reg);
9604 %}
9605 
9606 // Or Instructions
9607 // Or Register with Register
9608 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9609 %{
9610   match(Set dst (OrL dst src));
9611   effect(KILL cr);
9612 
9613   format %{ "orq     $dst, $src\t# long" %}
9614   opcode(0x0B);
9615   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9616   ins_pipe(ialu_reg_reg);
9617 %}
9618 
9619 // Use any_RegP to match R15 (TLS register) without spilling.
9620 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9621   match(Set dst (OrL dst (CastP2X src)));
9622   effect(KILL cr);
9623 
9624   format %{ "orq     $dst, $src\t# long" %}
9625   opcode(0x0B);
9626   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9627   ins_pipe(ialu_reg_reg);
9628 %}
9629 
9630 
9631 // Or Register with Immediate
9632 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9633 %{
9634   match(Set dst (OrL dst src));
9635   effect(KILL cr);
9636 
9637   format %{ "orq     $dst, $src\t# long" %}
9638   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9639   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9640   ins_pipe(ialu_reg);
9641 %}
9642 
9643 // Or Register with Memory
9644 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9645 %{
9646   match(Set dst (OrL dst (LoadL src)));
9647   effect(KILL cr);
9648 
9649   ins_cost(125);
9650   format %{ "orq     $dst, $src\t# long" %}
9651   opcode(0x0B);
9652   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9653   ins_pipe(ialu_reg_mem);
9654 %}
9655 
9656 // Or Memory with Register
9657 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9658 %{
9659   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9660   effect(KILL cr);
9661 
9662   ins_cost(150);
9663   format %{ "orq     $dst, $src\t# long" %}
9664   opcode(0x09); /* Opcode 09 /r */
9665   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9666   ins_pipe(ialu_mem_reg);
9667 %}
9668 
9669 // Or Memory with Immediate
9670 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9671 %{
9672   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9673   effect(KILL cr);
9674 
9675   ins_cost(125);
9676   format %{ "orq     $dst, $src\t# long" %}
9677   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9678   ins_encode(REX_mem_wide(dst), OpcSE(src),
9679              RM_opc_mem(secondary, dst), Con8or32(src));
9680   ins_pipe(ialu_mem_imm);
9681 %}
9682 
9683 // Xor Instructions
9684 // Xor Register with Register
9685 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9686 %{
9687   match(Set dst (XorL dst src));
9688   effect(KILL cr);
9689 
9690   format %{ "xorq    $dst, $src\t# long" %}
9691   opcode(0x33);
9692   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9693   ins_pipe(ialu_reg_reg);
9694 %}
9695 
9696 // Xor Register with Immediate -1
9697 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9698   match(Set dst (XorL dst imm));
9699 
9700   format %{ "notq   $dst" %}
9701   ins_encode %{
9702      __ notq($dst$$Register);
9703   %}
9704   ins_pipe(ialu_reg);
9705 %}
9706 
9707 // Xor Register with Immediate
9708 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9709 %{
9710   match(Set dst (XorL dst src));
9711   effect(KILL cr);
9712 
9713   format %{ "xorq    $dst, $src\t# long" %}
9714   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9715   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9716   ins_pipe(ialu_reg);
9717 %}
9718 
9719 // Xor Register with Memory
9720 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9721 %{
9722   match(Set dst (XorL dst (LoadL src)));
9723   effect(KILL cr);
9724 
9725   ins_cost(125);
9726   format %{ "xorq    $dst, $src\t# long" %}
9727   opcode(0x33);
9728   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9729   ins_pipe(ialu_reg_mem);
9730 %}
9731 
9732 // Xor Memory with Register
9733 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9734 %{
9735   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9736   effect(KILL cr);
9737 
9738   ins_cost(150);
9739   format %{ "xorq    $dst, $src\t# long" %}
9740   opcode(0x31); /* Opcode 31 /r */
9741   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9742   ins_pipe(ialu_mem_reg);
9743 %}
9744 
9745 // Xor Memory with Immediate
9746 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9747 %{
9748   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9749   effect(KILL cr);
9750 
9751   ins_cost(125);
9752   format %{ "xorq    $dst, $src\t# long" %}
9753   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9754   ins_encode(REX_mem_wide(dst), OpcSE(src),
9755              RM_opc_mem(secondary, dst), Con8or32(src));
9756   ins_pipe(ialu_mem_imm);
9757 %}
9758 
9759 // Convert Int to Boolean
9760 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9761 %{
9762   match(Set dst (Conv2B src));
9763   effect(KILL cr);
9764 
9765   format %{ "testl   $src, $src\t# ci2b\n\t"
9766             "setnz   $dst\n\t"
9767             "movzbl  $dst, $dst" %}
9768   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9769              setNZ_reg(dst),
9770              REX_reg_breg(dst, dst), // movzbl
9771              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9772   ins_pipe(pipe_slow); // XXX
9773 %}
9774 
9775 // Convert Pointer to Boolean
9776 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9777 %{
9778   match(Set dst (Conv2B src));
9779   effect(KILL cr);
9780 
9781   format %{ "testq   $src, $src\t# cp2b\n\t"
9782             "setnz   $dst\n\t"
9783             "movzbl  $dst, $dst" %}
9784   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9785              setNZ_reg(dst),
9786              REX_reg_breg(dst, dst), // movzbl
9787              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9788   ins_pipe(pipe_slow); // XXX
9789 %}
9790 
9791 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9792 %{
9793   match(Set dst (CmpLTMask p q));
9794   effect(KILL cr);
9795 
9796   ins_cost(400);
9797   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9798             "setlt   $dst\n\t"
9799             "movzbl  $dst, $dst\n\t"
9800             "negl    $dst" %}
9801   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9802              setLT_reg(dst),
9803              REX_reg_breg(dst, dst), // movzbl
9804              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9805              neg_reg(dst));
9806   ins_pipe(pipe_slow);
9807 %}
9808 
9809 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9810 %{
9811   match(Set dst (CmpLTMask dst zero));
9812   effect(KILL cr);
9813 
9814   ins_cost(100);
9815   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9816   ins_encode %{
9817   __ sarl($dst$$Register, 31);
9818   %}
9819   ins_pipe(ialu_reg);
9820 %}
9821 
9822 /* Better to save a register than avoid a branch */
9823 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9824 %{
9825   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9826   effect(KILL cr);
9827   ins_cost(300);
9828   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
9829             "jge    done\n\t"
9830             "addl   $p,$y\n"
9831             "done:  " %}
9832   ins_encode %{
9833     Register Rp = $p$$Register;
9834     Register Rq = $q$$Register;
9835     Register Ry = $y$$Register;
9836     Label done;
9837     __ subl(Rp, Rq);
9838     __ jccb(Assembler::greaterEqual, done);
9839     __ addl(Rp, Ry);
9840     __ bind(done);
9841   %}
9842   ins_pipe(pipe_cmplt);
9843 %}
9844 
9845 /* Better to save a register than avoid a branch */
9846 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9847 %{
9848   match(Set y (AndI (CmpLTMask p q) y));
9849   effect(KILL cr);
9850 
9851   ins_cost(300);
9852 
9853   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
9854             "jlt      done\n\t"
9855             "xorl     $y, $y\n"
9856             "done:  " %}
9857   ins_encode %{
9858     Register Rp = $p$$Register;
9859     Register Rq = $q$$Register;
9860     Register Ry = $y$$Register;
9861     Label done;
9862     __ cmpl(Rp, Rq);
9863     __ jccb(Assembler::less, done);
9864     __ xorl(Ry, Ry);
9865     __ bind(done);
9866   %}
9867   ins_pipe(pipe_cmplt);
9868 %}
9869 
9870 
9871 //---------- FP Instructions------------------------------------------------
9872 
9873 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9874 %{
9875   match(Set cr (CmpF src1 src2));
9876 
9877   ins_cost(145);
9878   format %{ "ucomiss $src1, $src2\n\t"
9879             "jnp,s   exit\n\t"
9880             "pushfq\t# saw NaN, set CF\n\t"
9881             "andq    [rsp], #0xffffff2b\n\t"
9882             "popfq\n"
9883     "exit:" %}
9884   ins_encode %{
9885     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9886     emit_cmpfp_fixup(_masm);
9887   %}
9888   ins_pipe(pipe_slow);
9889 %}
9890 
9891 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9892   match(Set cr (CmpF src1 src2));
9893 
9894   ins_cost(100);
9895   format %{ "ucomiss $src1, $src2" %}
9896   ins_encode %{
9897     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9898   %}
9899   ins_pipe(pipe_slow);
9900 %}
9901 
9902 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9903 %{
9904   match(Set cr (CmpF src1 (LoadF src2)));
9905 
9906   ins_cost(145);
9907   format %{ "ucomiss $src1, $src2\n\t"
9908             "jnp,s   exit\n\t"
9909             "pushfq\t# saw NaN, set CF\n\t"
9910             "andq    [rsp], #0xffffff2b\n\t"
9911             "popfq\n"
9912     "exit:" %}
9913   ins_encode %{
9914     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9915     emit_cmpfp_fixup(_masm);
9916   %}
9917   ins_pipe(pipe_slow);
9918 %}
9919 
9920 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9921   match(Set cr (CmpF src1 (LoadF src2)));
9922 
9923   ins_cost(100);
9924   format %{ "ucomiss $src1, $src2" %}
9925   ins_encode %{
9926     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9927   %}
9928   ins_pipe(pipe_slow);
9929 %}
9930 
9931 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9932   match(Set cr (CmpF src con));
9933 
9934   ins_cost(145);
9935   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9936             "jnp,s   exit\n\t"
9937             "pushfq\t# saw NaN, set CF\n\t"
9938             "andq    [rsp], #0xffffff2b\n\t"
9939             "popfq\n"
9940     "exit:" %}
9941   ins_encode %{
9942     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9943     emit_cmpfp_fixup(_masm);
9944   %}
9945   ins_pipe(pipe_slow);
9946 %}
9947 
9948 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9949   match(Set cr (CmpF src con));
9950   ins_cost(100);
9951   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9952   ins_encode %{
9953     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9954   %}
9955   ins_pipe(pipe_slow);
9956 %}
9957 
9958 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9959 %{
9960   match(Set cr (CmpD src1 src2));
9961 
9962   ins_cost(145);
9963   format %{ "ucomisd $src1, $src2\n\t"
9964             "jnp,s   exit\n\t"
9965             "pushfq\t# saw NaN, set CF\n\t"
9966             "andq    [rsp], #0xffffff2b\n\t"
9967             "popfq\n"
9968     "exit:" %}
9969   ins_encode %{
9970     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9971     emit_cmpfp_fixup(_masm);
9972   %}
9973   ins_pipe(pipe_slow);
9974 %}
9975 
9976 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9977   match(Set cr (CmpD src1 src2));
9978 
9979   ins_cost(100);
9980   format %{ "ucomisd $src1, $src2 test" %}
9981   ins_encode %{
9982     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9983   %}
9984   ins_pipe(pipe_slow);
9985 %}
9986 
9987 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9988 %{
9989   match(Set cr (CmpD src1 (LoadD src2)));
9990 
9991   ins_cost(145);
9992   format %{ "ucomisd $src1, $src2\n\t"
9993             "jnp,s   exit\n\t"
9994             "pushfq\t# saw NaN, set CF\n\t"
9995             "andq    [rsp], #0xffffff2b\n\t"
9996             "popfq\n"
9997     "exit:" %}
9998   ins_encode %{
9999     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10000     emit_cmpfp_fixup(_masm);
10001   %}
10002   ins_pipe(pipe_slow);
10003 %}
10004 
10005 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10006   match(Set cr (CmpD src1 (LoadD src2)));
10007 
10008   ins_cost(100);
10009   format %{ "ucomisd $src1, $src2" %}
10010   ins_encode %{
10011     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10012   %}
10013   ins_pipe(pipe_slow);
10014 %}
10015 
10016 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10017   match(Set cr (CmpD src con));
10018 
10019   ins_cost(145);
10020   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10021             "jnp,s   exit\n\t"
10022             "pushfq\t# saw NaN, set CF\n\t"
10023             "andq    [rsp], #0xffffff2b\n\t"
10024             "popfq\n"
10025     "exit:" %}
10026   ins_encode %{
10027     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10028     emit_cmpfp_fixup(_masm);
10029   %}
10030   ins_pipe(pipe_slow);
10031 %}
10032 
10033 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10034   match(Set cr (CmpD src con));
10035   ins_cost(100);
10036   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10037   ins_encode %{
10038     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10039   %}
10040   ins_pipe(pipe_slow);
10041 %}
10042 
10043 // Compare into -1,0,1
10044 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10045 %{
10046   match(Set dst (CmpF3 src1 src2));
10047   effect(KILL cr);
10048 
10049   ins_cost(275);
10050   format %{ "ucomiss $src1, $src2\n\t"
10051             "movl    $dst, #-1\n\t"
10052             "jp,s    done\n\t"
10053             "jb,s    done\n\t"
10054             "setne   $dst\n\t"
10055             "movzbl  $dst, $dst\n"
10056     "done:" %}
10057   ins_encode %{
10058     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10059     emit_cmpfp3(_masm, $dst$$Register);
10060   %}
10061   ins_pipe(pipe_slow);
10062 %}
10063 
10064 // Compare into -1,0,1
10065 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10066 %{
10067   match(Set dst (CmpF3 src1 (LoadF src2)));
10068   effect(KILL cr);
10069 
10070   ins_cost(275);
10071   format %{ "ucomiss $src1, $src2\n\t"
10072             "movl    $dst, #-1\n\t"
10073             "jp,s    done\n\t"
10074             "jb,s    done\n\t"
10075             "setne   $dst\n\t"
10076             "movzbl  $dst, $dst\n"
10077     "done:" %}
10078   ins_encode %{
10079     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10080     emit_cmpfp3(_masm, $dst$$Register);
10081   %}
10082   ins_pipe(pipe_slow);
10083 %}
10084 
10085 // Compare into -1,0,1
10086 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10087   match(Set dst (CmpF3 src con));
10088   effect(KILL cr);
10089 
10090   ins_cost(275);
10091   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10092             "movl    $dst, #-1\n\t"
10093             "jp,s    done\n\t"
10094             "jb,s    done\n\t"
10095             "setne   $dst\n\t"
10096             "movzbl  $dst, $dst\n"
10097     "done:" %}
10098   ins_encode %{
10099     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10100     emit_cmpfp3(_masm, $dst$$Register);
10101   %}
10102   ins_pipe(pipe_slow);
10103 %}
10104 
10105 // Compare into -1,0,1
10106 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10107 %{
10108   match(Set dst (CmpD3 src1 src2));
10109   effect(KILL cr);
10110 
10111   ins_cost(275);
10112   format %{ "ucomisd $src1, $src2\n\t"
10113             "movl    $dst, #-1\n\t"
10114             "jp,s    done\n\t"
10115             "jb,s    done\n\t"
10116             "setne   $dst\n\t"
10117             "movzbl  $dst, $dst\n"
10118     "done:" %}
10119   ins_encode %{
10120     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10121     emit_cmpfp3(_masm, $dst$$Register);
10122   %}
10123   ins_pipe(pipe_slow);
10124 %}
10125 
10126 // Compare into -1,0,1
10127 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10128 %{
10129   match(Set dst (CmpD3 src1 (LoadD src2)));
10130   effect(KILL cr);
10131 
10132   ins_cost(275);
10133   format %{ "ucomisd $src1, $src2\n\t"
10134             "movl    $dst, #-1\n\t"
10135             "jp,s    done\n\t"
10136             "jb,s    done\n\t"
10137             "setne   $dst\n\t"
10138             "movzbl  $dst, $dst\n"
10139     "done:" %}
10140   ins_encode %{
10141     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10142     emit_cmpfp3(_masm, $dst$$Register);
10143   %}
10144   ins_pipe(pipe_slow);
10145 %}
10146 
10147 // Compare into -1,0,1
10148 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10149   match(Set dst (CmpD3 src con));
10150   effect(KILL cr);
10151 
10152   ins_cost(275);
10153   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10154             "movl    $dst, #-1\n\t"
10155             "jp,s    done\n\t"
10156             "jb,s    done\n\t"
10157             "setne   $dst\n\t"
10158             "movzbl  $dst, $dst\n"
10159     "done:" %}
10160   ins_encode %{
10161     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10162     emit_cmpfp3(_masm, $dst$$Register);
10163   %}
10164   ins_pipe(pipe_slow);
10165 %}
10166 
10167 // -----------Trig and Trancendental Instructions------------------------------
10168 instruct cosD_reg(regD dst) %{
10169   match(Set dst (CosD dst));
10170 
10171   format %{ "dcos   $dst\n\t" %}
10172   opcode(0xD9, 0xFF);
10173   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10174   ins_pipe( pipe_slow );
10175 %}
10176 
10177 instruct sinD_reg(regD dst) %{
10178   match(Set dst (SinD dst));
10179 
10180   format %{ "dsin   $dst\n\t" %}
10181   opcode(0xD9, 0xFE);
10182   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10183   ins_pipe( pipe_slow );
10184 %}
10185 
10186 instruct tanD_reg(regD dst) %{
10187   match(Set dst (TanD dst));
10188 
10189   format %{ "dtan   $dst\n\t" %}
10190   ins_encode( Push_SrcXD(dst),
10191               Opcode(0xD9), Opcode(0xF2),   //fptan
10192               Opcode(0xDD), Opcode(0xD8),   //fstp st
10193               Push_ResultXD(dst) );
10194   ins_pipe( pipe_slow );
10195 %}
10196 
10197 instruct log10D_reg(regD dst) %{
10198   // The source and result Double operands in XMM registers
10199   match(Set dst (Log10D dst));
10200   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10201   // fyl2x        ; compute log_10(2) * log_2(x)
10202   format %{ "fldlg2\t\t\t#Log10\n\t"
10203             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10204          %}
10205    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10206               Push_SrcXD(dst),
10207               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10208               Push_ResultXD(dst));
10209 
10210   ins_pipe( pipe_slow );
10211 %}
10212 
10213 instruct logD_reg(regD dst) %{
10214   // The source and result Double operands in XMM registers
10215   match(Set dst (LogD dst));
10216   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10217   // fyl2x        ; compute log_e(2) * log_2(x)
10218   format %{ "fldln2\t\t\t#Log_e\n\t"
10219             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10220          %}
10221   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10222               Push_SrcXD(dst),
10223               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10224               Push_ResultXD(dst));
10225   ins_pipe( pipe_slow );
10226 %}
10227 
10228 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
10229   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10230   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
10231   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
10232   ins_encode %{
10233     __ subptr(rsp, 8);
10234     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
10235     __ fld_d(Address(rsp, 0));
10236     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
10237     __ fld_d(Address(rsp, 0));
10238     __ fast_pow();
10239     __ fstp_d(Address(rsp, 0));
10240     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
10241     __ addptr(rsp, 8);
10242   %}
10243   ins_pipe( pipe_slow );
10244 %}
10245 
10246 instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
10247   match(Set dst (ExpD src));
10248   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
10249   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
10250   ins_encode %{
10251     __ subptr(rsp, 8);
10252     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10253     __ fld_d(Address(rsp, 0));
10254     __ fast_exp();
10255     __ fstp_d(Address(rsp, 0));
10256     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
10257     __ addptr(rsp, 8);
10258   %}
10259   ins_pipe( pipe_slow );
10260 %}
10261 
10262 //----------Arithmetic Conversion Instructions---------------------------------
10263 
10264 instruct roundFloat_nop(regF dst)
10265 %{
10266   match(Set dst (RoundFloat dst));
10267 
10268   ins_cost(0);
10269   ins_encode();
10270   ins_pipe(empty);
10271 %}
10272 
10273 instruct roundDouble_nop(regD dst)
10274 %{
10275   match(Set dst (RoundDouble dst));
10276 
10277   ins_cost(0);
10278   ins_encode();
10279   ins_pipe(empty);
10280 %}
10281 
10282 instruct convF2D_reg_reg(regD dst, regF src)
10283 %{
10284   match(Set dst (ConvF2D src));
10285 
10286   format %{ "cvtss2sd $dst, $src" %}
10287   ins_encode %{
10288     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10289   %}
10290   ins_pipe(pipe_slow); // XXX
10291 %}
10292 
10293 instruct convF2D_reg_mem(regD dst, memory src)
10294 %{
10295   match(Set dst (ConvF2D (LoadF src)));
10296 
10297   format %{ "cvtss2sd $dst, $src" %}
10298   ins_encode %{
10299     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10300   %}
10301   ins_pipe(pipe_slow); // XXX
10302 %}
10303 
10304 instruct convD2F_reg_reg(regF dst, regD src)
10305 %{
10306   match(Set dst (ConvD2F src));
10307 
10308   format %{ "cvtsd2ss $dst, $src" %}
10309   ins_encode %{
10310     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10311   %}
10312   ins_pipe(pipe_slow); // XXX
10313 %}
10314 
10315 instruct convD2F_reg_mem(regF dst, memory src)
10316 %{
10317   match(Set dst (ConvD2F (LoadD src)));
10318 
10319   format %{ "cvtsd2ss $dst, $src" %}
10320   ins_encode %{
10321     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10322   %}
10323   ins_pipe(pipe_slow); // XXX
10324 %}
10325 
10326 // XXX do mem variants
10327 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10328 %{
10329   match(Set dst (ConvF2I src));
10330   effect(KILL cr);
10331 
10332   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10333             "cmpl    $dst, #0x80000000\n\t"
10334             "jne,s   done\n\t"
10335             "subq    rsp, #8\n\t"
10336             "movss   [rsp], $src\n\t"
10337             "call    f2i_fixup\n\t"
10338             "popq    $dst\n"
10339     "done:   "%}
10340   ins_encode %{
10341     Label done;
10342     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10343     __ cmpl($dst$$Register, 0x80000000);
10344     __ jccb(Assembler::notEqual, done);
10345     __ subptr(rsp, 8);
10346     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10347     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
10348     __ pop($dst$$Register);
10349     __ bind(done);
10350   %}
10351   ins_pipe(pipe_slow);
10352 %}
10353 
10354 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10355 %{
10356   match(Set dst (ConvF2L src));
10357   effect(KILL cr);
10358 
10359   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10360             "cmpq    $dst, [0x8000000000000000]\n\t"
10361             "jne,s   done\n\t"
10362             "subq    rsp, #8\n\t"
10363             "movss   [rsp], $src\n\t"
10364             "call    f2l_fixup\n\t"
10365             "popq    $dst\n"
10366     "done:   "%}
10367   ins_encode %{
10368     Label done;
10369     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
10370     __ cmp64($dst$$Register,
10371              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10372     __ jccb(Assembler::notEqual, done);
10373     __ subptr(rsp, 8);
10374     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10375     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
10376     __ pop($dst$$Register);
10377     __ bind(done);
10378   %}
10379   ins_pipe(pipe_slow);
10380 %}
10381 
10382 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10383 %{
10384   match(Set dst (ConvD2I src));
10385   effect(KILL cr);
10386 
10387   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10388             "cmpl    $dst, #0x80000000\n\t"
10389             "jne,s   done\n\t"
10390             "subq    rsp, #8\n\t"
10391             "movsd   [rsp], $src\n\t"
10392             "call    d2i_fixup\n\t"
10393             "popq    $dst\n"
10394     "done:   "%}
10395   ins_encode %{
10396     Label done;
10397     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10398     __ cmpl($dst$$Register, 0x80000000);
10399     __ jccb(Assembler::notEqual, done);
10400     __ subptr(rsp, 8);
10401     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10402     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10403     __ pop($dst$$Register);
10404     __ bind(done);
10405   %}
10406   ins_pipe(pipe_slow);
10407 %}
10408 
10409 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10410 %{
10411   match(Set dst (ConvD2L src));
10412   effect(KILL cr);
10413 
10414   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10415             "cmpq    $dst, [0x8000000000000000]\n\t"
10416             "jne,s   done\n\t"
10417             "subq    rsp, #8\n\t"
10418             "movsd   [rsp], $src\n\t"
10419             "call    d2l_fixup\n\t"
10420             "popq    $dst\n"
10421     "done:   "%}
10422   ins_encode %{
10423     Label done;
10424     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10425     __ cmp64($dst$$Register,
10426              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10427     __ jccb(Assembler::notEqual, done);
10428     __ subptr(rsp, 8);
10429     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10430     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10431     __ pop($dst$$Register);
10432     __ bind(done);
10433   %}
10434   ins_pipe(pipe_slow);
10435 %}
10436 
10437 instruct convI2F_reg_reg(regF dst, rRegI src)
10438 %{
10439   predicate(!UseXmmI2F);
10440   match(Set dst (ConvI2F src));
10441 
10442   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10443   ins_encode %{
10444     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10445   %}
10446   ins_pipe(pipe_slow); // XXX
10447 %}
10448 
10449 instruct convI2F_reg_mem(regF dst, memory src)
10450 %{
10451   match(Set dst (ConvI2F (LoadI src)));
10452 
10453   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10454   ins_encode %{
10455     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10456   %}
10457   ins_pipe(pipe_slow); // XXX
10458 %}
10459 
10460 instruct convI2D_reg_reg(regD dst, rRegI src)
10461 %{
10462   predicate(!UseXmmI2D);
10463   match(Set dst (ConvI2D src));
10464 
10465   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10466   ins_encode %{
10467     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10468   %}
10469   ins_pipe(pipe_slow); // XXX
10470 %}
10471 
10472 instruct convI2D_reg_mem(regD dst, memory src)
10473 %{
10474   match(Set dst (ConvI2D (LoadI src)));
10475 
10476   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10477   ins_encode %{
10478     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10479   %}
10480   ins_pipe(pipe_slow); // XXX
10481 %}
10482 
10483 instruct convXI2F_reg(regF dst, rRegI src)
10484 %{
10485   predicate(UseXmmI2F);
10486   match(Set dst (ConvI2F src));
10487 
10488   format %{ "movdl $dst, $src\n\t"
10489             "cvtdq2psl $dst, $dst\t# i2f" %}
10490   ins_encode %{
10491     __ movdl($dst$$XMMRegister, $src$$Register);
10492     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10493   %}
10494   ins_pipe(pipe_slow); // XXX
10495 %}
10496 
10497 instruct convXI2D_reg(regD dst, rRegI src)
10498 %{
10499   predicate(UseXmmI2D);
10500   match(Set dst (ConvI2D src));
10501 
10502   format %{ "movdl $dst, $src\n\t"
10503             "cvtdq2pdl $dst, $dst\t# i2d" %}
10504   ins_encode %{
10505     __ movdl($dst$$XMMRegister, $src$$Register);
10506     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10507   %}
10508   ins_pipe(pipe_slow); // XXX
10509 %}
10510 
10511 instruct convL2F_reg_reg(regF dst, rRegL src)
10512 %{
10513   match(Set dst (ConvL2F src));
10514 
10515   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10516   ins_encode %{
10517     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10518   %}
10519   ins_pipe(pipe_slow); // XXX
10520 %}
10521 
10522 instruct convL2F_reg_mem(regF dst, memory src)
10523 %{
10524   match(Set dst (ConvL2F (LoadL src)));
10525 
10526   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10527   ins_encode %{
10528     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10529   %}
10530   ins_pipe(pipe_slow); // XXX
10531 %}
10532 
10533 instruct convL2D_reg_reg(regD dst, rRegL src)
10534 %{
10535   match(Set dst (ConvL2D src));
10536 
10537   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10538   ins_encode %{
10539     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10540   %}
10541   ins_pipe(pipe_slow); // XXX
10542 %}
10543 
10544 instruct convL2D_reg_mem(regD dst, memory src)
10545 %{
10546   match(Set dst (ConvL2D (LoadL src)));
10547 
10548   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10549   ins_encode %{
10550     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10551   %}
10552   ins_pipe(pipe_slow); // XXX
10553 %}
10554 
10555 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10556 %{
10557   match(Set dst (ConvI2L src));
10558 
10559   ins_cost(125);
10560   format %{ "movslq  $dst, $src\t# i2l" %}
10561   ins_encode %{
10562     __ movslq($dst$$Register, $src$$Register);
10563   %}
10564   ins_pipe(ialu_reg_reg);
10565 %}
10566 
10567 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10568 // %{
10569 //   match(Set dst (ConvI2L src));
10570 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10571 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10572 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10573 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10574 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10575 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10576 
10577 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10578 //   ins_encode(enc_copy(dst, src));
10579 // //   opcode(0x63); // needs REX.W
10580 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10581 //   ins_pipe(ialu_reg_reg);
10582 // %}
10583 
10584 // Zero-extend convert int to long
10585 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10586 %{
10587   match(Set dst (AndL (ConvI2L src) mask));
10588 
10589   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10590   ins_encode %{
10591     if ($dst$$reg != $src$$reg) {
10592       __ movl($dst$$Register, $src$$Register);
10593     }
10594   %}
10595   ins_pipe(ialu_reg_reg);
10596 %}
10597 
10598 // Zero-extend convert int to long
10599 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10600 %{
10601   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10602 
10603   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10604   ins_encode %{
10605     __ movl($dst$$Register, $src$$Address);
10606   %}
10607   ins_pipe(ialu_reg_mem);
10608 %}
10609 
10610 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10611 %{
10612   match(Set dst (AndL src mask));
10613 
10614   format %{ "movl    $dst, $src\t# zero-extend long" %}
10615   ins_encode %{
10616     __ movl($dst$$Register, $src$$Register);
10617   %}
10618   ins_pipe(ialu_reg_reg);
10619 %}
10620 
10621 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10622 %{
10623   match(Set dst (ConvL2I src));
10624 
10625   format %{ "movl    $dst, $src\t# l2i" %}
10626   ins_encode %{
10627     __ movl($dst$$Register, $src$$Register);
10628   %}
10629   ins_pipe(ialu_reg_reg);
10630 %}
10631 
10632 
10633 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10634   match(Set dst (MoveF2I src));
10635   effect(DEF dst, USE src);
10636 
10637   ins_cost(125);
10638   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10639   ins_encode %{
10640     __ movl($dst$$Register, Address(rsp, $src$$disp));
10641   %}
10642   ins_pipe(ialu_reg_mem);
10643 %}
10644 
10645 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10646   match(Set dst (MoveI2F src));
10647   effect(DEF dst, USE src);
10648 
10649   ins_cost(125);
10650   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10651   ins_encode %{
10652     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10653   %}
10654   ins_pipe(pipe_slow);
10655 %}
10656 
10657 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10658   match(Set dst (MoveD2L src));
10659   effect(DEF dst, USE src);
10660 
10661   ins_cost(125);
10662   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10663   ins_encode %{
10664     __ movq($dst$$Register, Address(rsp, $src$$disp));
10665   %}
10666   ins_pipe(ialu_reg_mem);
10667 %}
10668 
10669 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10670   predicate(!UseXmmLoadAndClearUpper);
10671   match(Set dst (MoveL2D src));
10672   effect(DEF dst, USE src);
10673 
10674   ins_cost(125);
10675   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10676   ins_encode %{
10677     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10678   %}
10679   ins_pipe(pipe_slow);
10680 %}
10681 
10682 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10683   predicate(UseXmmLoadAndClearUpper);
10684   match(Set dst (MoveL2D src));
10685   effect(DEF dst, USE src);
10686 
10687   ins_cost(125);
10688   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10689   ins_encode %{
10690     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10691   %}
10692   ins_pipe(pipe_slow);
10693 %}
10694 
10695 
10696 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10697   match(Set dst (MoveF2I src));
10698   effect(DEF dst, USE src);
10699 
10700   ins_cost(95); // XXX
10701   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10702   ins_encode %{
10703     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10704   %}
10705   ins_pipe(pipe_slow);
10706 %}
10707 
10708 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10709   match(Set dst (MoveI2F src));
10710   effect(DEF dst, USE src);
10711 
10712   ins_cost(100);
10713   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10714   ins_encode %{
10715     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10716   %}
10717   ins_pipe( ialu_mem_reg );
10718 %}
10719 
10720 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10721   match(Set dst (MoveD2L src));
10722   effect(DEF dst, USE src);
10723 
10724   ins_cost(95); // XXX
10725   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10726   ins_encode %{
10727     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10728   %}
10729   ins_pipe(pipe_slow);
10730 %}
10731 
10732 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10733   match(Set dst (MoveL2D src));
10734   effect(DEF dst, USE src);
10735 
10736   ins_cost(100);
10737   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10738   ins_encode %{
10739     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10740   %}
10741   ins_pipe(ialu_mem_reg);
10742 %}
10743 
10744 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10745   match(Set dst (MoveF2I src));
10746   effect(DEF dst, USE src);
10747   ins_cost(85);
10748   format %{ "movd    $dst,$src\t# MoveF2I" %}
10749   ins_encode %{
10750     __ movdl($dst$$Register, $src$$XMMRegister);
10751   %}
10752   ins_pipe( pipe_slow );
10753 %}
10754 
10755 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10756   match(Set dst (MoveD2L src));
10757   effect(DEF dst, USE src);
10758   ins_cost(85);
10759   format %{ "movd    $dst,$src\t# MoveD2L" %}
10760   ins_encode %{
10761     __ movdq($dst$$Register, $src$$XMMRegister);
10762   %}
10763   ins_pipe( pipe_slow );
10764 %}
10765 
10766 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10767   match(Set dst (MoveI2F src));
10768   effect(DEF dst, USE src);
10769   ins_cost(100);
10770   format %{ "movd    $dst,$src\t# MoveI2F" %}
10771   ins_encode %{
10772     __ movdl($dst$$XMMRegister, $src$$Register);
10773   %}
10774   ins_pipe( pipe_slow );
10775 %}
10776 
10777 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10778   match(Set dst (MoveL2D src));
10779   effect(DEF dst, USE src);
10780   ins_cost(100);
10781   format %{ "movd    $dst,$src\t# MoveL2D" %}
10782   ins_encode %{
10783      __ movdq($dst$$XMMRegister, $src$$Register);
10784   %}
10785   ins_pipe( pipe_slow );
10786 %}
10787 
10788 
10789 // =======================================================================
10790 // fast clearing of an array
10791 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10792                   rFlagsReg cr)
10793 %{
10794   predicate(!UseFastStosb);
10795   match(Set dummy (ClearArray cnt base));
10796   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10797 
10798   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10799             "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
10800   ins_encode %{ 
10801     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10802   %}
10803   ins_pipe(pipe_slow);
10804 %}
10805 
10806 instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10807                         rFlagsReg cr)
10808 %{
10809   predicate(UseFastStosb);
10810   match(Set dummy (ClearArray cnt base));
10811   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10812   format %{ "xorq    rax, rax\t# ClearArray:\n\t"
10813             "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10814             "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
10815   ins_encode %{ 
10816     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
10817   %}
10818   ins_pipe( pipe_slow );
10819 %}
10820 
10821 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10822                         rax_RegI result, regD tmp1, rFlagsReg cr)
10823 %{
10824   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10825   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10826 
10827   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10828   ins_encode %{
10829     __ string_compare($str1$$Register, $str2$$Register,
10830                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10831                       $tmp1$$XMMRegister);
10832   %}
10833   ins_pipe( pipe_slow );
10834 %}
10835 
10836 // fast search of substring with known size.
10837 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10838                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10839 %{
10840   predicate(UseSSE42Intrinsics);
10841   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10842   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10843 
10844   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10845   ins_encode %{
10846     int icnt2 = (int)$int_cnt2$$constant;
10847     if (icnt2 >= 8) {
10848       // IndexOf for constant substrings with size >= 8 elements
10849       // which don't need to be loaded through stack.
10850       __ string_indexofC8($str1$$Register, $str2$$Register,
10851                           $cnt1$$Register, $cnt2$$Register,
10852                           icnt2, $result$$Register,
10853                           $vec$$XMMRegister, $tmp$$Register);
10854     } else {
10855       // Small strings are loaded through stack if they cross page boundary.
10856       __ string_indexof($str1$$Register, $str2$$Register,
10857                         $cnt1$$Register, $cnt2$$Register,
10858                         icnt2, $result$$Register,
10859                         $vec$$XMMRegister, $tmp$$Register);
10860     }
10861   %}
10862   ins_pipe( pipe_slow );
10863 %}
10864 
10865 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10866                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10867 %{
10868   predicate(UseSSE42Intrinsics);
10869   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10870   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10871 
10872   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10873   ins_encode %{
10874     __ string_indexof($str1$$Register, $str2$$Register,
10875                       $cnt1$$Register, $cnt2$$Register,
10876                       (-1), $result$$Register,
10877                       $vec$$XMMRegister, $tmp$$Register);
10878   %}
10879   ins_pipe( pipe_slow );
10880 %}
10881 
10882 // fast string equals
10883 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10884                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10885 %{
10886   match(Set result (StrEquals (Binary str1 str2) cnt));
10887   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10888 
10889   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10890   ins_encode %{
10891     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10892                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10893                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10894   %}
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 // fast array equals
10899 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10900                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10901 %{
10902   match(Set result (AryEq ary1 ary2));
10903   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10904   //ins_cost(300);
10905 
10906   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10907   ins_encode %{
10908     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10909                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10910                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10911   %}
10912   ins_pipe( pipe_slow );
10913 %}
10914 
10915 // encode char[] to byte[] in ISO_8859_1
10916 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
10917                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
10918                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
10919   match(Set result (EncodeISOArray src (Binary dst len)));
10920   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
10921 
10922   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
10923   ins_encode %{
10924     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
10925                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
10926                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
10927   %}
10928   ins_pipe( pipe_slow );
10929 %}
10930 
10931 
10932 //----------Control Flow Instructions------------------------------------------
10933 // Signed compare Instructions
10934 
10935 // XXX more variants!!
10936 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10937 %{
10938   match(Set cr (CmpI op1 op2));
10939   effect(DEF cr, USE op1, USE op2);
10940 
10941   format %{ "cmpl    $op1, $op2" %}
10942   opcode(0x3B);  /* Opcode 3B /r */
10943   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10944   ins_pipe(ialu_cr_reg_reg);
10945 %}
10946 
10947 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10948 %{
10949   match(Set cr (CmpI op1 op2));
10950 
10951   format %{ "cmpl    $op1, $op2" %}
10952   opcode(0x81, 0x07); /* Opcode 81 /7 */
10953   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10954   ins_pipe(ialu_cr_reg_imm);
10955 %}
10956 
10957 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10958 %{
10959   match(Set cr (CmpI op1 (LoadI op2)));
10960 
10961   ins_cost(500); // XXX
10962   format %{ "cmpl    $op1, $op2" %}
10963   opcode(0x3B); /* Opcode 3B /r */
10964   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10965   ins_pipe(ialu_cr_reg_mem);
10966 %}
10967 
10968 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10969 %{
10970   match(Set cr (CmpI src zero));
10971 
10972   format %{ "testl   $src, $src" %}
10973   opcode(0x85);
10974   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10975   ins_pipe(ialu_cr_reg_imm);
10976 %}
10977 
10978 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10979 %{
10980   match(Set cr (CmpI (AndI src con) zero));
10981 
10982   format %{ "testl   $src, $con" %}
10983   opcode(0xF7, 0x00);
10984   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10985   ins_pipe(ialu_cr_reg_imm);
10986 %}
10987 
10988 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10989 %{
10990   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10991 
10992   format %{ "testl   $src, $mem" %}
10993   opcode(0x85);
10994   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10995   ins_pipe(ialu_cr_reg_mem);
10996 %}
10997 
10998 // Unsigned compare Instructions; really, same as signed except they
10999 // produce an rFlagsRegU instead of rFlagsReg.
11000 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11001 %{
11002   match(Set cr (CmpU op1 op2));
11003 
11004   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11005   opcode(0x3B); /* Opcode 3B /r */
11006   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11007   ins_pipe(ialu_cr_reg_reg);
11008 %}
11009 
11010 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11011 %{
11012   match(Set cr (CmpU op1 op2));
11013 
11014   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11015   opcode(0x81,0x07); /* Opcode 81 /7 */
11016   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11017   ins_pipe(ialu_cr_reg_imm);
11018 %}
11019 
11020 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11021 %{
11022   match(Set cr (CmpU op1 (LoadI op2)));
11023 
11024   ins_cost(500); // XXX
11025   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11026   opcode(0x3B); /* Opcode 3B /r */
11027   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11028   ins_pipe(ialu_cr_reg_mem);
11029 %}
11030 
11031 // // // Cisc-spilled version of cmpU_rReg
11032 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11033 // //%{
11034 // //  match(Set cr (CmpU (LoadI op1) op2));
11035 // //
11036 // //  format %{ "CMPu   $op1,$op2" %}
11037 // //  ins_cost(500);
11038 // //  opcode(0x39);  /* Opcode 39 /r */
11039 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11040 // //%}
11041 
11042 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11043 %{
11044   match(Set cr (CmpU src zero));
11045 
11046   format %{ "testl  $src, $src\t# unsigned" %}
11047   opcode(0x85);
11048   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11049   ins_pipe(ialu_cr_reg_imm);
11050 %}
11051 
11052 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11053 %{
11054   match(Set cr (CmpP op1 op2));
11055 
11056   format %{ "cmpq    $op1, $op2\t# ptr" %}
11057   opcode(0x3B); /* Opcode 3B /r */
11058   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11059   ins_pipe(ialu_cr_reg_reg);
11060 %}
11061 
11062 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11063 %{
11064   match(Set cr (CmpP op1 (LoadP op2)));
11065 
11066   ins_cost(500); // XXX
11067   format %{ "cmpq    $op1, $op2\t# ptr" %}
11068   opcode(0x3B); /* Opcode 3B /r */
11069   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11070   ins_pipe(ialu_cr_reg_mem);
11071 %}
11072 
11073 // // // Cisc-spilled version of cmpP_rReg
11074 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11075 // //%{
11076 // //  match(Set cr (CmpP (LoadP op1) op2));
11077 // //
11078 // //  format %{ "CMPu   $op1,$op2" %}
11079 // //  ins_cost(500);
11080 // //  opcode(0x39);  /* Opcode 39 /r */
11081 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11082 // //%}
11083 
11084 // XXX this is generalized by compP_rReg_mem???
11085 // Compare raw pointer (used in out-of-heap check).
11086 // Only works because non-oop pointers must be raw pointers
11087 // and raw pointers have no anti-dependencies.
11088 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11089 %{
11090   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
11091   match(Set cr (CmpP op1 (LoadP op2)));
11092 
11093   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11094   opcode(0x3B); /* Opcode 3B /r */
11095   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11096   ins_pipe(ialu_cr_reg_mem);
11097 %}
11098 
11099 // This will generate a signed flags result. This should be OK since
11100 // any compare to a zero should be eq/neq.
11101 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11102 %{
11103   match(Set cr (CmpP src zero));
11104 
11105   format %{ "testq   $src, $src\t# ptr" %}
11106   opcode(0x85);
11107   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11108   ins_pipe(ialu_cr_reg_imm);
11109 %}
11110 
11111 // This will generate a signed flags result. This should be OK since
11112 // any compare to a zero should be eq/neq.
11113 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11114 %{
11115   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11116   match(Set cr (CmpP (LoadP op) zero));
11117 
11118   ins_cost(500); // XXX
11119   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11120   opcode(0xF7); /* Opcode F7 /0 */
11121   ins_encode(REX_mem_wide(op),
11122              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11123   ins_pipe(ialu_cr_reg_imm);
11124 %}
11125 
11126 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11127 %{
11128   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
11129   match(Set cr (CmpP (LoadP mem) zero));
11130 
11131   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11132   ins_encode %{
11133     __ cmpq(r12, $mem$$Address);
11134   %}
11135   ins_pipe(ialu_cr_reg_mem);
11136 %}
11137 
11138 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11139 %{
11140   match(Set cr (CmpN op1 op2));
11141 
11142   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11143   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11144   ins_pipe(ialu_cr_reg_reg);
11145 %}
11146 
11147 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11148 %{
11149   match(Set cr (CmpN src (LoadN mem)));
11150 
11151   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11152   ins_encode %{
11153     __ cmpl($src$$Register, $mem$$Address);
11154   %}
11155   ins_pipe(ialu_cr_reg_mem);
11156 %}
11157 
11158 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11159   match(Set cr (CmpN op1 op2));
11160 
11161   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11162   ins_encode %{
11163     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11164   %}
11165   ins_pipe(ialu_cr_reg_imm);
11166 %}
11167 
11168 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11169 %{
11170   match(Set cr (CmpN src (LoadN mem)));
11171 
11172   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11173   ins_encode %{
11174     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11175   %}
11176   ins_pipe(ialu_cr_reg_mem);
11177 %}
11178 
11179 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
11180   match(Set cr (CmpN op1 op2));
11181 
11182   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
11183   ins_encode %{
11184     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
11185   %}
11186   ins_pipe(ialu_cr_reg_imm);
11187 %}
11188 
11189 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
11190 %{
11191   match(Set cr (CmpN src (LoadNKlass mem)));
11192 
11193   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
11194   ins_encode %{
11195     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11196   %}
11197   ins_pipe(ialu_cr_reg_mem);
11198 %}
11199 
11200 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11201   match(Set cr (CmpN src zero));
11202 
11203   format %{ "testl   $src, $src\t# compressed ptr" %}
11204   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11205   ins_pipe(ialu_cr_reg_imm);
11206 %}
11207 
11208 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11209 %{
11210   predicate(Universe::narrow_oop_base() != NULL);
11211   match(Set cr (CmpN (LoadN mem) zero));
11212 
11213   ins_cost(500); // XXX
11214   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11215   ins_encode %{
11216     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11217   %}
11218   ins_pipe(ialu_cr_reg_mem);
11219 %}
11220 
11221 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11222 %{
11223   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
11224   match(Set cr (CmpN (LoadN mem) zero));
11225 
11226   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11227   ins_encode %{
11228     __ cmpl(r12, $mem$$Address);
11229   %}
11230   ins_pipe(ialu_cr_reg_mem);
11231 %}
11232 
11233 // Yanked all unsigned pointer compare operations.
11234 // Pointer compares are done with CmpP which is already unsigned.
11235 
11236 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11237 %{
11238   match(Set cr (CmpL op1 op2));
11239 
11240   format %{ "cmpq    $op1, $op2" %}
11241   opcode(0x3B);  /* Opcode 3B /r */
11242   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11243   ins_pipe(ialu_cr_reg_reg);
11244 %}
11245 
11246 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11247 %{
11248   match(Set cr (CmpL op1 op2));
11249 
11250   format %{ "cmpq    $op1, $op2" %}
11251   opcode(0x81, 0x07); /* Opcode 81 /7 */
11252   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11253   ins_pipe(ialu_cr_reg_imm);
11254 %}
11255 
11256 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11257 %{
11258   match(Set cr (CmpL op1 (LoadL op2)));
11259 
11260   format %{ "cmpq    $op1, $op2" %}
11261   opcode(0x3B); /* Opcode 3B /r */
11262   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11263   ins_pipe(ialu_cr_reg_mem);
11264 %}
11265 
11266 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11267 %{
11268   match(Set cr (CmpL src zero));
11269 
11270   format %{ "testq   $src, $src" %}
11271   opcode(0x85);
11272   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11273   ins_pipe(ialu_cr_reg_imm);
11274 %}
11275 
11276 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11277 %{
11278   match(Set cr (CmpL (AndL src con) zero));
11279 
11280   format %{ "testq   $src, $con\t# long" %}
11281   opcode(0xF7, 0x00);
11282   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11283   ins_pipe(ialu_cr_reg_imm);
11284 %}
11285 
11286 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11287 %{
11288   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11289 
11290   format %{ "testq   $src, $mem" %}
11291   opcode(0x85);
11292   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11293   ins_pipe(ialu_cr_reg_mem);
11294 %}
11295 
11296 // Manifest a CmpL result in an integer register.  Very painful.
11297 // This is the test to avoid.
11298 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11299 %{
11300   match(Set dst (CmpL3 src1 src2));
11301   effect(KILL flags);
11302 
11303   ins_cost(275); // XXX
11304   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11305             "movl    $dst, -1\n\t"
11306             "jl,s    done\n\t"
11307             "setne   $dst\n\t"
11308             "movzbl  $dst, $dst\n\t"
11309     "done:" %}
11310   ins_encode(cmpl3_flag(src1, src2, dst));
11311   ins_pipe(pipe_slow);
11312 %}
11313 
11314 //----------Max and Min--------------------------------------------------------
11315 // Min Instructions
11316 
11317 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11318 %{
11319   effect(USE_DEF dst, USE src, USE cr);
11320 
11321   format %{ "cmovlgt $dst, $src\t# min" %}
11322   opcode(0x0F, 0x4F);
11323   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11324   ins_pipe(pipe_cmov_reg);
11325 %}
11326 
11327 
11328 instruct minI_rReg(rRegI dst, rRegI src)
11329 %{
11330   match(Set dst (MinI dst src));
11331 
11332   ins_cost(200);
11333   expand %{
11334     rFlagsReg cr;
11335     compI_rReg(cr, dst, src);
11336     cmovI_reg_g(dst, src, cr);
11337   %}
11338 %}
11339 
11340 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11341 %{
11342   effect(USE_DEF dst, USE src, USE cr);
11343 
11344   format %{ "cmovllt $dst, $src\t# max" %}
11345   opcode(0x0F, 0x4C);
11346   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11347   ins_pipe(pipe_cmov_reg);
11348 %}
11349 
11350 
11351 instruct maxI_rReg(rRegI dst, rRegI src)
11352 %{
11353   match(Set dst (MaxI dst src));
11354 
11355   ins_cost(200);
11356   expand %{
11357     rFlagsReg cr;
11358     compI_rReg(cr, dst, src);
11359     cmovI_reg_l(dst, src, cr);
11360   %}
11361 %}
11362 
11363 // ============================================================================
11364 // Branch Instructions
11365 
11366 // Jump Direct - Label defines a relative address from JMP+1
11367 instruct jmpDir(label labl)
11368 %{
11369   match(Goto);
11370   effect(USE labl);
11371 
11372   ins_cost(300);
11373   format %{ "jmp     $labl" %}
11374   size(5);
11375   ins_encode %{
11376     Label* L = $labl$$label;
11377     __ jmp(*L, false); // Always long jump
11378   %}
11379   ins_pipe(pipe_jmp);
11380 %}
11381 
11382 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11383 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11384 %{
11385   match(If cop cr);
11386   effect(USE labl);
11387 
11388   ins_cost(300);
11389   format %{ "j$cop     $labl" %}
11390   size(6);
11391   ins_encode %{
11392     Label* L = $labl$$label;
11393     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11394   %}
11395   ins_pipe(pipe_jcc);
11396 %}
11397 
11398 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11399 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
11400 %{
11401   match(CountedLoopEnd cop cr);
11402   effect(USE labl);
11403 
11404   ins_cost(300);
11405   format %{ "j$cop     $labl\t# loop end" %}
11406   size(6);
11407   ins_encode %{
11408     Label* L = $labl$$label;
11409     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11410   %}
11411   ins_pipe(pipe_jcc);
11412 %}
11413 
11414 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11415 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11416   match(CountedLoopEnd cop cmp);
11417   effect(USE labl);
11418 
11419   ins_cost(300);
11420   format %{ "j$cop,u   $labl\t# loop end" %}
11421   size(6);
11422   ins_encode %{
11423     Label* L = $labl$$label;
11424     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11425   %}
11426   ins_pipe(pipe_jcc);
11427 %}
11428 
11429 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11430   match(CountedLoopEnd cop cmp);
11431   effect(USE labl);
11432 
11433   ins_cost(200);
11434   format %{ "j$cop,u   $labl\t# loop end" %}
11435   size(6);
11436   ins_encode %{
11437     Label* L = $labl$$label;
11438     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11439   %}
11440   ins_pipe(pipe_jcc);
11441 %}
11442 
11443 // Jump Direct Conditional - using unsigned comparison
11444 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11445   match(If cop cmp);
11446   effect(USE labl);
11447 
11448   ins_cost(300);
11449   format %{ "j$cop,u  $labl" %}
11450   size(6);
11451   ins_encode %{
11452     Label* L = $labl$$label;
11453     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11454   %}
11455   ins_pipe(pipe_jcc);
11456 %}
11457 
11458 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11459   match(If cop cmp);
11460   effect(USE labl);
11461 
11462   ins_cost(200);
11463   format %{ "j$cop,u  $labl" %}
11464   size(6);
11465   ins_encode %{
11466     Label* L = $labl$$label;
11467     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11468   %}
11469   ins_pipe(pipe_jcc);
11470 %}
11471 
11472 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11473   match(If cop cmp);
11474   effect(USE labl);
11475 
11476   ins_cost(200);
11477   format %{ $$template
11478     if ($cop$$cmpcode == Assembler::notEqual) {
11479       $$emit$$"jp,u   $labl\n\t"
11480       $$emit$$"j$cop,u   $labl"
11481     } else {
11482       $$emit$$"jp,u   done\n\t"
11483       $$emit$$"j$cop,u   $labl\n\t"
11484       $$emit$$"done:"
11485     }
11486   %}
11487   ins_encode %{
11488     Label* l = $labl$$label;
11489     if ($cop$$cmpcode == Assembler::notEqual) {
11490       __ jcc(Assembler::parity, *l, false);
11491       __ jcc(Assembler::notEqual, *l, false);
11492     } else if ($cop$$cmpcode == Assembler::equal) {
11493       Label done;
11494       __ jccb(Assembler::parity, done);
11495       __ jcc(Assembler::equal, *l, false);
11496       __ bind(done);
11497     } else {
11498        ShouldNotReachHere();
11499     }
11500   %}
11501   ins_pipe(pipe_jcc);
11502 %}
11503 
11504 // ============================================================================
11505 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11506 // superklass array for an instance of the superklass.  Set a hidden
11507 // internal cache on a hit (cache is checked with exposed code in
11508 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11509 // encoding ALSO sets flags.
11510 
11511 instruct partialSubtypeCheck(rdi_RegP result,
11512                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11513                              rFlagsReg cr)
11514 %{
11515   match(Set result (PartialSubtypeCheck sub super));
11516   effect(KILL rcx, KILL cr);
11517 
11518   ins_cost(1100);  // slightly larger than the next version
11519   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11520             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11521             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11522             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11523             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11524             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11525             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11526     "miss:\t" %}
11527 
11528   opcode(0x1); // Force a XOR of RDI
11529   ins_encode(enc_PartialSubtypeCheck());
11530   ins_pipe(pipe_slow);
11531 %}
11532 
11533 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11534                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11535                                      immP0 zero,
11536                                      rdi_RegP result)
11537 %{
11538   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11539   effect(KILL rcx, KILL result);
11540 
11541   ins_cost(1000);
11542   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11543             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
11544             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
11545             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11546             "jne,s   miss\t\t# Missed: flags nz\n\t"
11547             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11548     "miss:\t" %}
11549 
11550   opcode(0x0); // No need to XOR RDI
11551   ins_encode(enc_PartialSubtypeCheck());
11552   ins_pipe(pipe_slow);
11553 %}
11554 
11555 // ============================================================================
11556 // Branch Instructions -- short offset versions
11557 //
11558 // These instructions are used to replace jumps of a long offset (the default
11559 // match) with jumps of a shorter offset.  These instructions are all tagged
11560 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11561 // match rules in general matching.  Instead, the ADLC generates a conversion
11562 // method in the MachNode which can be used to do in-place replacement of the
11563 // long variant with the shorter variant.  The compiler will determine if a
11564 // branch can be taken by the is_short_branch_offset() predicate in the machine
11565 // specific code section of the file.
11566 
11567 // Jump Direct - Label defines a relative address from JMP+1
11568 instruct jmpDir_short(label labl) %{
11569   match(Goto);
11570   effect(USE labl);
11571 
11572   ins_cost(300);
11573   format %{ "jmp,s   $labl" %}
11574   size(2);
11575   ins_encode %{
11576     Label* L = $labl$$label;
11577     __ jmpb(*L);
11578   %}
11579   ins_pipe(pipe_jmp);
11580   ins_short_branch(1);
11581 %}
11582 
11583 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11584 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11585   match(If cop cr);
11586   effect(USE labl);
11587 
11588   ins_cost(300);
11589   format %{ "j$cop,s   $labl" %}
11590   size(2);
11591   ins_encode %{
11592     Label* L = $labl$$label;
11593     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11594   %}
11595   ins_pipe(pipe_jcc);
11596   ins_short_branch(1);
11597 %}
11598 
11599 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11600 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11601   match(CountedLoopEnd cop cr);
11602   effect(USE labl);
11603 
11604   ins_cost(300);
11605   format %{ "j$cop,s   $labl\t# loop end" %}
11606   size(2);
11607   ins_encode %{
11608     Label* L = $labl$$label;
11609     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11610   %}
11611   ins_pipe(pipe_jcc);
11612   ins_short_branch(1);
11613 %}
11614 
11615 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11616 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11617   match(CountedLoopEnd cop cmp);
11618   effect(USE labl);
11619 
11620   ins_cost(300);
11621   format %{ "j$cop,us  $labl\t# loop end" %}
11622   size(2);
11623   ins_encode %{
11624     Label* L = $labl$$label;
11625     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11626   %}
11627   ins_pipe(pipe_jcc);
11628   ins_short_branch(1);
11629 %}
11630 
11631 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11632   match(CountedLoopEnd cop cmp);
11633   effect(USE labl);
11634 
11635   ins_cost(300);
11636   format %{ "j$cop,us  $labl\t# loop end" %}
11637   size(2);
11638   ins_encode %{
11639     Label* L = $labl$$label;
11640     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11641   %}
11642   ins_pipe(pipe_jcc);
11643   ins_short_branch(1);
11644 %}
11645 
11646 // Jump Direct Conditional - using unsigned comparison
11647 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11648   match(If cop cmp);
11649   effect(USE labl);
11650 
11651   ins_cost(300);
11652   format %{ "j$cop,us  $labl" %}
11653   size(2);
11654   ins_encode %{
11655     Label* L = $labl$$label;
11656     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11657   %}
11658   ins_pipe(pipe_jcc);
11659   ins_short_branch(1);
11660 %}
11661 
11662 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11663   match(If cop cmp);
11664   effect(USE labl);
11665 
11666   ins_cost(300);
11667   format %{ "j$cop,us  $labl" %}
11668   size(2);
11669   ins_encode %{
11670     Label* L = $labl$$label;
11671     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11672   %}
11673   ins_pipe(pipe_jcc);
11674   ins_short_branch(1);
11675 %}
11676 
11677 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11678   match(If cop cmp);
11679   effect(USE labl);
11680 
11681   ins_cost(300);
11682   format %{ $$template
11683     if ($cop$$cmpcode == Assembler::notEqual) {
11684       $$emit$$"jp,u,s   $labl\n\t"
11685       $$emit$$"j$cop,u,s   $labl"
11686     } else {
11687       $$emit$$"jp,u,s   done\n\t"
11688       $$emit$$"j$cop,u,s  $labl\n\t"
11689       $$emit$$"done:"
11690     }
11691   %}
11692   size(4);
11693   ins_encode %{
11694     Label* l = $labl$$label;
11695     if ($cop$$cmpcode == Assembler::notEqual) {
11696       __ jccb(Assembler::parity, *l);
11697       __ jccb(Assembler::notEqual, *l);
11698     } else if ($cop$$cmpcode == Assembler::equal) {
11699       Label done;
11700       __ jccb(Assembler::parity, done);
11701       __ jccb(Assembler::equal, *l);
11702       __ bind(done);
11703     } else {
11704        ShouldNotReachHere();
11705     }
11706   %}
11707   ins_pipe(pipe_jcc);
11708   ins_short_branch(1);
11709 %}
11710 
11711 // ============================================================================
11712 // inlined locking and unlocking
11713 
11714 instruct cmpFastLock(rFlagsReg cr,
11715                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11716 %{
11717   match(Set cr (FastLock object box));
11718   effect(TEMP tmp, TEMP scr, USE_KILL box);
11719 
11720   ins_cost(300);
11721   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11722   ins_encode(Fast_Lock(object, box, tmp, scr));
11723   ins_pipe(pipe_slow);
11724 %}
11725 
11726 instruct cmpFastUnlock(rFlagsReg cr,
11727                        rRegP object, rax_RegP box, rRegP tmp)
11728 %{
11729   match(Set cr (FastUnlock object box));
11730   effect(TEMP tmp, USE_KILL box);
11731 
11732   ins_cost(300);
11733   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11734   ins_encode(Fast_Unlock(object, box, tmp));
11735   ins_pipe(pipe_slow);
11736 %}
11737 
11738 
11739 // ============================================================================
11740 // Safepoint Instructions
11741 instruct safePoint_poll(rFlagsReg cr)
11742 %{
11743   predicate(!Assembler::is_polling_page_far());
11744   match(SafePoint);
11745   effect(KILL cr);
11746 
11747   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11748             "# Safepoint: poll for GC" %}
11749   ins_cost(125);
11750   ins_encode %{
11751     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11752     __ testl(rax, addr);
11753   %}
11754   ins_pipe(ialu_reg_mem);
11755 %}
11756 
11757 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11758 %{
11759   predicate(Assembler::is_polling_page_far());
11760   match(SafePoint poll);
11761   effect(KILL cr, USE poll);
11762 
11763   format %{ "testl  rax, [$poll]\t"
11764             "# Safepoint: poll for GC" %}
11765   ins_cost(125);
11766   ins_encode %{
11767     __ relocate(relocInfo::poll_type);
11768     __ testl(rax, Address($poll$$Register, 0));
11769   %}
11770   ins_pipe(ialu_reg_mem);
11771 %}
11772 
11773 // ============================================================================
11774 // Procedure Call/Return Instructions
11775 // Call Java Static Instruction
11776 // Note: If this code changes, the corresponding ret_addr_offset() and
11777 //       compute_padding() functions will have to be adjusted.
11778 instruct CallStaticJavaDirect(method meth) %{
11779   match(CallStaticJava);
11780   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11781   effect(USE meth);
11782 
11783   ins_cost(300);
11784   format %{ "call,static " %}
11785   opcode(0xE8); /* E8 cd */
11786   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
11787   ins_pipe(pipe_slow);
11788   ins_alignment(4);
11789 %}
11790 
11791 // Call Java Static Instruction (method handle version)
11792 // Note: If this code changes, the corresponding ret_addr_offset() and
11793 //       compute_padding() functions will have to be adjusted.
11794 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11795   match(CallStaticJava);
11796   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11797   effect(USE meth);
11798   // RBP is saved by all callees (for interpreter stack correction).
11799   // We use it here for a similar purpose, in {preserve,restore}_SP.
11800 
11801   ins_cost(300);
11802   format %{ "call,static/MethodHandle " %}
11803   opcode(0xE8); /* E8 cd */
11804   ins_encode(clear_avx, preserve_SP,
11805              Java_Static_Call(meth),
11806              restore_SP,
11807              call_epilog);
11808   ins_pipe(pipe_slow);
11809   ins_alignment(4);
11810 %}
11811 
11812 // Call Java Dynamic Instruction
11813 // Note: If this code changes, the corresponding ret_addr_offset() and
11814 //       compute_padding() functions will have to be adjusted.
11815 instruct CallDynamicJavaDirect(method meth)
11816 %{
11817   match(CallDynamicJava);
11818   effect(USE meth);
11819 
11820   ins_cost(300);
11821   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11822             "call,dynamic " %}
11823   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
11824   ins_pipe(pipe_slow);
11825   ins_alignment(4);
11826 %}
11827 
11828 // Call Runtime Instruction
11829 instruct CallRuntimeDirect(method meth)
11830 %{
11831   match(CallRuntime);
11832   effect(USE meth);
11833 
11834   ins_cost(300);
11835   format %{ "call,runtime " %}
11836   ins_encode(clear_avx, Java_To_Runtime(meth));
11837   ins_pipe(pipe_slow);
11838 %}
11839 
11840 // Call runtime without safepoint
11841 instruct CallLeafDirect(method meth)
11842 %{
11843   match(CallLeaf);
11844   effect(USE meth);
11845 
11846   ins_cost(300);
11847   format %{ "call_leaf,runtime " %}
11848   ins_encode(clear_avx, Java_To_Runtime(meth));
11849   ins_pipe(pipe_slow);
11850 %}
11851 
11852 // Call runtime without safepoint
11853 instruct CallLeafNoFPDirect(method meth)
11854 %{
11855   match(CallLeafNoFP);
11856   effect(USE meth);
11857 
11858   ins_cost(300);
11859   format %{ "call_leaf_nofp,runtime " %}
11860   ins_encode(Java_To_Runtime(meth));
11861   ins_pipe(pipe_slow);
11862 %}
11863 
11864 // Return Instruction
11865 // Remove the return address & jump to it.
11866 // Notice: We always emit a nop after a ret to make sure there is room
11867 // for safepoint patching
11868 instruct Ret()
11869 %{
11870   match(Return);
11871 
11872   format %{ "ret" %}
11873   opcode(0xC3);
11874   ins_encode(OpcP);
11875   ins_pipe(pipe_jmp);
11876 %}
11877 
11878 // Tail Call; Jump from runtime stub to Java code.
11879 // Also known as an 'interprocedural jump'.
11880 // Target of jump will eventually return to caller.
11881 // TailJump below removes the return address.
11882 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11883 %{
11884   match(TailCall jump_target method_oop);
11885 
11886   ins_cost(300);
11887   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11888   opcode(0xFF, 0x4); /* Opcode FF /4 */
11889   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11890   ins_pipe(pipe_jmp);
11891 %}
11892 
11893 // Tail Jump; remove the return address; jump to target.
11894 // TailCall above leaves the return address around.
11895 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11896 %{
11897   match(TailJump jump_target ex_oop);
11898 
11899   ins_cost(300);
11900   format %{ "popq    rdx\t# pop return address\n\t"
11901             "jmp     $jump_target" %}
11902   opcode(0xFF, 0x4); /* Opcode FF /4 */
11903   ins_encode(Opcode(0x5a), // popq rdx
11904              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11905   ins_pipe(pipe_jmp);
11906 %}
11907 
11908 // Create exception oop: created by stack-crawling runtime code.
11909 // Created exception is now available to this handler, and is setup
11910 // just prior to jumping to this handler.  No code emitted.
11911 instruct CreateException(rax_RegP ex_oop)
11912 %{
11913   match(Set ex_oop (CreateEx));
11914 
11915   size(0);
11916   // use the following format syntax
11917   format %{ "# exception oop is in rax; no code emitted" %}
11918   ins_encode();
11919   ins_pipe(empty);
11920 %}
11921 
11922 // Rethrow exception:
11923 // The exception oop will come in the first argument position.
11924 // Then JUMP (not call) to the rethrow stub code.
11925 instruct RethrowException()
11926 %{
11927   match(Rethrow);
11928 
11929   // use the following format syntax
11930   format %{ "jmp     rethrow_stub" %}
11931   ins_encode(enc_rethrow);
11932   ins_pipe(pipe_jmp);
11933 %}
11934 
11935 
11936 // ============================================================================
11937 // This name is KNOWN by the ADLC and cannot be changed.
11938 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
11939 // for this guy.
11940 instruct tlsLoadP(r15_RegP dst) %{
11941   match(Set dst (ThreadLocal));
11942   effect(DEF dst);
11943 
11944   size(0);
11945   format %{ "# TLS is in R15" %}
11946   ins_encode( /*empty encoding*/ );
11947   ins_pipe(ialu_reg_reg);
11948 %}
11949 
11950 
11951 //----------PEEPHOLE RULES-----------------------------------------------------
11952 // These must follow all instruction definitions as they use the names
11953 // defined in the instructions definitions.
11954 //
11955 // peepmatch ( root_instr_name [preceding_instruction]* );
11956 //
11957 // peepconstraint %{
11958 // (instruction_number.operand_name relational_op instruction_number.operand_name
11959 //  [, ...] );
11960 // // instruction numbers are zero-based using left to right order in peepmatch
11961 //
11962 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11963 // // provide an instruction_number.operand_name for each operand that appears
11964 // // in the replacement instruction's match rule
11965 //
11966 // ---------VM FLAGS---------------------------------------------------------
11967 //
11968 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11969 //
11970 // Each peephole rule is given an identifying number starting with zero and
11971 // increasing by one in the order seen by the parser.  An individual peephole
11972 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11973 // on the command-line.
11974 //
11975 // ---------CURRENT LIMITATIONS----------------------------------------------
11976 //
11977 // Only match adjacent instructions in same basic block
11978 // Only equality constraints
11979 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11980 // Only one replacement instruction
11981 //
11982 // ---------EXAMPLE----------------------------------------------------------
11983 //
11984 // // pertinent parts of existing instructions in architecture description
11985 // instruct movI(rRegI dst, rRegI src)
11986 // %{
11987 //   match(Set dst (CopyI src));
11988 // %}
11989 //
11990 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11991 // %{
11992 //   match(Set dst (AddI dst src));
11993 //   effect(KILL cr);
11994 // %}
11995 //
11996 // // Change (inc mov) to lea
11997 // peephole %{
11998 //   // increment preceeded by register-register move
11999 //   peepmatch ( incI_rReg movI );
12000 //   // require that the destination register of the increment
12001 //   // match the destination register of the move
12002 //   peepconstraint ( 0.dst == 1.dst );
12003 //   // construct a replacement instruction that sets
12004 //   // the destination to ( move's source register + one )
12005 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12006 // %}
12007 //
12008 
12009 // Implementation no longer uses movX instructions since
12010 // machine-independent system no longer uses CopyX nodes.
12011 //
12012 // peephole
12013 // %{
12014 //   peepmatch (incI_rReg movI);
12015 //   peepconstraint (0.dst == 1.dst);
12016 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12017 // %}
12018 
12019 // peephole
12020 // %{
12021 //   peepmatch (decI_rReg movI);
12022 //   peepconstraint (0.dst == 1.dst);
12023 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12024 // %}
12025 
12026 // peephole
12027 // %{
12028 //   peepmatch (addI_rReg_imm movI);
12029 //   peepconstraint (0.dst == 1.dst);
12030 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12031 // %}
12032 
12033 // peephole
12034 // %{
12035 //   peepmatch (incL_rReg movL);
12036 //   peepconstraint (0.dst == 1.dst);
12037 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12038 // %}
12039 
12040 // peephole
12041 // %{
12042 //   peepmatch (decL_rReg movL);
12043 //   peepconstraint (0.dst == 1.dst);
12044 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12045 // %}
12046 
12047 // peephole
12048 // %{
12049 //   peepmatch (addL_rReg_imm movL);
12050 //   peepconstraint (0.dst == 1.dst);
12051 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12052 // %}
12053 
12054 // peephole
12055 // %{
12056 //   peepmatch (addP_rReg_imm movP);
12057 //   peepconstraint (0.dst == 1.dst);
12058 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12059 // %}
12060 
12061 // // Change load of spilled value to only a spill
12062 // instruct storeI(memory mem, rRegI src)
12063 // %{
12064 //   match(Set mem (StoreI mem src));
12065 // %}
12066 //
12067 // instruct loadI(rRegI dst, memory mem)
12068 // %{
12069 //   match(Set dst (LoadI mem));
12070 // %}
12071 //
12072 
12073 peephole
12074 %{
12075   peepmatch (loadI storeI);
12076   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12077   peepreplace (storeI(1.mem 1.mem 1.src));
12078 %}
12079 
12080 peephole
12081 %{
12082   peepmatch (loadL storeL);
12083   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12084   peepreplace (storeL(1.mem 1.mem 1.src));
12085 %}
12086 
12087 //----------SMARTSPILL RULES---------------------------------------------------
12088 // These must follow all instruction definitions as they use the names
12089 // defined in the instructions definitions.