1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 623 {
 624   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 625   *(cbuf.code_end()) = c;
 626   cbuf.set_code_end(cbuf.code_end() + 1);
 627 }
 628 
 629 // EMIT_CC()
 630 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 631 {
 632   unsigned char c = (unsigned char) (f1 | f2);
 633   *(cbuf.code_end()) = c;
 634   cbuf.set_code_end(cbuf.code_end() + 1);
 635 }
 636 
 637 // EMIT_OPCODE()
 638 void emit_opcode(CodeBuffer &cbuf, int code)
 639 {
 640   *(cbuf.code_end()) = (unsigned char) code;
 641   cbuf.set_code_end(cbuf.code_end() + 1);
 642 }
 643 
 644 // EMIT_OPCODE() w/ relocation information
 645 void emit_opcode(CodeBuffer &cbuf,
 646                  int code, relocInfo::relocType reloc, int offset, int format)
 647 {
 648   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 649   emit_opcode(cbuf, code);
 650 }
 651 
 652 // EMIT_D8()
 653 void emit_d8(CodeBuffer &cbuf, int d8)
 654 {
 655   *(cbuf.code_end()) = (unsigned char) d8;
 656   cbuf.set_code_end(cbuf.code_end() + 1);
 657 }
 658 
 659 // EMIT_D16()
 660 void emit_d16(CodeBuffer &cbuf, int d16)
 661 {
 662   *((short *)(cbuf.code_end())) = d16;
 663   cbuf.set_code_end(cbuf.code_end() + 2);
 664 }
 665 
 666 // EMIT_D32()
 667 void emit_d32(CodeBuffer &cbuf, int d32)
 668 {
 669   *((int *)(cbuf.code_end())) = d32;
 670   cbuf.set_code_end(cbuf.code_end() + 4);
 671 }
 672 
 673 // EMIT_D64()
 674 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 675 {
 676   *((int64_t*) (cbuf.code_end())) = d64;
 677   cbuf.set_code_end(cbuf.code_end() + 8);
 678 }
 679 
 680 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 681 void emit_d32_reloc(CodeBuffer& cbuf,
 682                     int d32,
 683                     relocInfo::relocType reloc,
 684                     int format)
 685 {
 686   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 687   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 688 
 689   *((int*) (cbuf.code_end())) = d32;
 690   cbuf.set_code_end(cbuf.code_end() + 4);
 691 }
 692 
 693 // emit 32 bit value and construct relocation entry from RelocationHolder
 694 void emit_d32_reloc(CodeBuffer& cbuf,
 695                     int d32,
 696                     RelocationHolder const& rspec,
 697                     int format)
 698 {
 699 #ifdef ASSERT
 700   if (rspec.reloc()->type() == relocInfo::oop_type &&
 701       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 702     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 703   }
 704 #endif
 705   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 706 
 707   *((int* )(cbuf.code_end())) = d32;
 708   cbuf.set_code_end(cbuf.code_end() + 4);
 709 }
 710 
 711 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 712   address next_ip = cbuf.code_end() + 4;
 713   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 714                  external_word_Relocation::spec(addr),
 715                  RELOC_DISP32);
 716 }
 717 
 718 
 719 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 720 void emit_d64_reloc(CodeBuffer& cbuf,
 721                     int64_t d64,
 722                     relocInfo::relocType reloc,
 723                     int format)
 724 {
 725   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 726 
 727   *((int64_t*) (cbuf.code_end())) = d64;
 728   cbuf.set_code_end(cbuf.code_end() + 8);
 729 }
 730 
 731 // emit 64 bit value and construct relocation entry from RelocationHolder
 732 void emit_d64_reloc(CodeBuffer& cbuf,
 733                     int64_t d64,
 734                     RelocationHolder const& rspec,
 735                     int format)
 736 {
 737 #ifdef ASSERT
 738   if (rspec.reloc()->type() == relocInfo::oop_type &&
 739       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 740     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 741            "cannot embed scavengable oops in code");
 742   }
 743 #endif
 744   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 745 
 746   *((int64_t*) (cbuf.code_end())) = d64;
 747   cbuf.set_code_end(cbuf.code_end() + 8);
 748 }
 749 
 750 // Access stack slot for load or store
 751 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 752 {
 753   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 754   if (-0x80 <= disp && disp < 0x80) {
 755     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 756     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 757     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 758   } else {
 759     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 760     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 761     emit_d32(cbuf, disp);     // Displacement // R/M byte
 762   }
 763 }
 764 
 765    // rRegI ereg, memory mem) %{    // emit_reg_mem
 766 void encode_RegMem(CodeBuffer &cbuf,
 767                    int reg,
 768                    int base, int index, int scale, int disp, bool disp_is_oop)
 769 {
 770   assert(!disp_is_oop, "cannot have disp");
 771   int regenc = reg & 7;
 772   int baseenc = base & 7;
 773   int indexenc = index & 7;
 774 
 775   // There is no index & no scale, use form without SIB byte
 776   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 777     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 778     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 779       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 780     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 781       // If 8-bit displacement, mode 0x1
 782       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 783       emit_d8(cbuf, disp);
 784     } else {
 785       // If 32-bit displacement
 786       if (base == -1) { // Special flag for absolute address
 787         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 788         if (disp_is_oop) {
 789           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 790         } else {
 791           emit_d32(cbuf, disp);
 792         }
 793       } else {
 794         // Normal base + offset
 795         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 796         if (disp_is_oop) {
 797           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 798         } else {
 799           emit_d32(cbuf, disp);
 800         }
 801       }
 802     }
 803   } else {
 804     // Else, encode with the SIB byte
 805     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 806     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 807       // If no displacement
 808       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 809       emit_rm(cbuf, scale, indexenc, baseenc);
 810     } else {
 811       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 812         // If 8-bit displacement, mode 0x1
 813         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 814         emit_rm(cbuf, scale, indexenc, baseenc);
 815         emit_d8(cbuf, disp);
 816       } else {
 817         // If 32-bit displacement
 818         if (base == 0x04 ) {
 819           emit_rm(cbuf, 0x2, regenc, 0x4);
 820           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 821         } else {
 822           emit_rm(cbuf, 0x2, regenc, 0x4);
 823           emit_rm(cbuf, scale, indexenc, baseenc); // *
 824         }
 825         if (disp_is_oop) {
 826           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 827         } else {
 828           emit_d32(cbuf, disp);
 829         }
 830       }
 831     }
 832   }
 833 }
 834 
 835 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 836 {
 837   if (dstenc != srcenc) {
 838     if (dstenc < 8) {
 839       if (srcenc >= 8) {
 840         emit_opcode(cbuf, Assembler::REX_B);
 841         srcenc -= 8;
 842       }
 843     } else {
 844       if (srcenc < 8) {
 845         emit_opcode(cbuf, Assembler::REX_R);
 846       } else {
 847         emit_opcode(cbuf, Assembler::REX_RB);
 848         srcenc -= 8;
 849       }
 850       dstenc -= 8;
 851     }
 852 
 853     emit_opcode(cbuf, 0x8B);
 854     emit_rm(cbuf, 0x3, dstenc, srcenc);
 855   }
 856 }
 857 
 858 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 859   if( dst_encoding == src_encoding ) {
 860     // reg-reg copy, use an empty encoding
 861   } else {
 862     MacroAssembler _masm(&cbuf);
 863 
 864     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 865   }
 866 }
 867 
 868 
 869 //=============================================================================
 870 #ifndef PRODUCT
 871 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 872 {
 873   Compile* C = ra_->C;
 874 
 875   int framesize = C->frame_slots() << LogBytesPerInt;
 876   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 877   // Remove wordSize for return adr already pushed
 878   // and another for the RBP we are going to save
 879   framesize -= 2*wordSize;
 880   bool need_nop = true;
 881 
 882   // Calls to C2R adapters often do not accept exceptional returns.
 883   // We require that their callers must bang for them.  But be
 884   // careful, because some VM calls (such as call site linkage) can
 885   // use several kilobytes of stack.  But the stack safety zone should
 886   // account for that.  See bugs 4446381, 4468289, 4497237.
 887   if (C->need_stack_bang(framesize)) {
 888     st->print_cr("# stack bang"); st->print("\t");
 889     need_nop = false;
 890   }
 891   st->print_cr("pushq   rbp"); st->print("\t");
 892 
 893   if (VerifyStackAtCalls) {
 894     // Majik cookie to verify stack depth
 895     st->print_cr("pushq   0xffffffffbadb100d"
 896                   "\t# Majik cookie for stack depth check");
 897     st->print("\t");
 898     framesize -= wordSize; // Remove 2 for cookie
 899     need_nop = false;
 900   }
 901 
 902   if (framesize) {
 903     st->print("subq    rsp, #%d\t# Create frame", framesize);
 904     if (framesize < 0x80 && need_nop) {
 905       st->print("\n\tnop\t# nop for patch_verified_entry");
 906     }
 907   }
 908 }
 909 #endif
 910 
 911 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 912 {
 913   Compile* C = ra_->C;
 914 
 915   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 916   // NativeJump::patch_verified_entry will be able to patch out the entry
 917   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 918   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 919   // 6 bytes. So if we don't do the fldcw or the push then we must
 920   // use the 6 byte frame allocation even if we have no frame. :-(
 921   // If method sets FPU control word do it now
 922 
 923   int framesize = C->frame_slots() << LogBytesPerInt;
 924   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 925   // Remove wordSize for return adr already pushed
 926   // and another for the RBP we are going to save
 927   framesize -= 2*wordSize;
 928   bool need_nop = true;
 929 
 930   // Calls to C2R adapters often do not accept exceptional returns.
 931   // We require that their callers must bang for them.  But be
 932   // careful, because some VM calls (such as call site linkage) can
 933   // use several kilobytes of stack.  But the stack safety zone should
 934   // account for that.  See bugs 4446381, 4468289, 4497237.
 935   if (C->need_stack_bang(framesize)) {
 936     MacroAssembler masm(&cbuf);
 937     masm.generate_stack_overflow_check(framesize);
 938     need_nop = false;
 939   }
 940 
 941   // We always push rbp so that on return to interpreter rbp will be
 942   // restored correctly and we can correct the stack.
 943   emit_opcode(cbuf, 0x50 | RBP_enc);
 944 
 945   if (VerifyStackAtCalls) {
 946     // Majik cookie to verify stack depth
 947     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 948     emit_d32(cbuf, 0xbadb100d);
 949     framesize -= wordSize; // Remove 2 for cookie
 950     need_nop = false;
 951   }
 952 
 953   if (framesize) {
 954     emit_opcode(cbuf, Assembler::REX_W);
 955     if (framesize < 0x80) {
 956       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 957       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 958       emit_d8(cbuf, framesize);
 959       if (need_nop) {
 960         emit_opcode(cbuf, 0x90); // nop
 961       }
 962     } else {
 963       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 964       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 965       emit_d32(cbuf, framesize);
 966     }
 967   }
 968 
 969   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 970 
 971 #ifdef ASSERT
 972   if (VerifyStackAtCalls) {
 973     Label L;
 974     MacroAssembler masm(&cbuf);
 975     masm.push(rax);
 976     masm.mov(rax, rsp);
 977     masm.andptr(rax, StackAlignmentInBytes-1);
 978     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 979     masm.pop(rax);
 980     masm.jcc(Assembler::equal, L);
 981     masm.stop("Stack is not properly aligned!");
 982     masm.bind(L);
 983   }
 984 #endif
 985 }
 986 
 987 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 988 {
 989   return MachNode::size(ra_); // too many variables; just compute it
 990                               // the hard way
 991 }
 992 
 993 int MachPrologNode::reloc() const
 994 {
 995   return 0; // a large enough number
 996 }
 997 
 998 //=============================================================================
 999 #ifndef PRODUCT
1000 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1001 {
1002   Compile* C = ra_->C;
1003   int framesize = C->frame_slots() << LogBytesPerInt;
1004   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1005   // Remove word for return adr already pushed
1006   // and RBP
1007   framesize -= 2*wordSize;
1008 
1009   if (framesize) {
1010     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1011     st->print("\t");
1012   }
1013 
1014   st->print_cr("popq\trbp");
1015   if (do_polling() && C->is_method_compilation()) {
1016     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1017                   "# Safepoint: poll for GC");
1018     st->print("\t");
1019   }
1020 }
1021 #endif
1022 
1023 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1024 {
1025   Compile* C = ra_->C;
1026   int framesize = C->frame_slots() << LogBytesPerInt;
1027   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1028   // Remove word for return adr already pushed
1029   // and RBP
1030   framesize -= 2*wordSize;
1031 
1032   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1033 
1034   if (framesize) {
1035     emit_opcode(cbuf, Assembler::REX_W);
1036     if (framesize < 0x80) {
1037       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1038       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1039       emit_d8(cbuf, framesize);
1040     } else {
1041       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1042       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1043       emit_d32(cbuf, framesize);
1044     }
1045   }
1046 
1047   // popq rbp
1048   emit_opcode(cbuf, 0x58 | RBP_enc);
1049 
1050   if (do_polling() && C->is_method_compilation()) {
1051     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1052     // XXX reg_mem doesn't support RIP-relative addressing yet
1053     cbuf.set_inst_mark();
1054     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1055     emit_opcode(cbuf, 0x85); // testl
1056     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1057     // cbuf.inst_mark() is beginning of instruction
1058     emit_d32_reloc(cbuf, os::get_polling_page());
1059 //                    relocInfo::poll_return_type,
1060   }
1061 }
1062 
1063 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1064 {
1065   Compile* C = ra_->C;
1066   int framesize = C->frame_slots() << LogBytesPerInt;
1067   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1068   // Remove word for return adr already pushed
1069   // and RBP
1070   framesize -= 2*wordSize;
1071 
1072   uint size = 0;
1073 
1074   if (do_polling() && C->is_method_compilation()) {
1075     size += 6;
1076   }
1077 
1078   // count popq rbp
1079   size++;
1080 
1081   if (framesize) {
1082     if (framesize < 0x80) {
1083       size += 4;
1084     } else if (framesize) {
1085       size += 7;
1086     }
1087   }
1088 
1089   return size;
1090 }
1091 
1092 int MachEpilogNode::reloc() const
1093 {
1094   return 2; // a large enough number
1095 }
1096 
1097 const Pipeline* MachEpilogNode::pipeline() const
1098 {
1099   return MachNode::pipeline_class();
1100 }
1101 
1102 int MachEpilogNode::safepoint_offset() const
1103 {
1104   return 0;
1105 }
1106 
1107 //=============================================================================
1108 
1109 enum RC {
1110   rc_bad,
1111   rc_int,
1112   rc_float,
1113   rc_stack
1114 };
1115 
1116 static enum RC rc_class(OptoReg::Name reg)
1117 {
1118   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1119 
1120   if (OptoReg::is_stack(reg)) return rc_stack;
1121 
1122   VMReg r = OptoReg::as_VMReg(reg);
1123 
1124   if (r->is_Register()) return rc_int;
1125 
1126   assert(r->is_XMMRegister(), "must be");
1127   return rc_float;
1128 }
1129 
1130 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1131                                        PhaseRegAlloc* ra_,
1132                                        bool do_size,
1133                                        outputStream* st) const
1134 {
1135 
1136   // Get registers to move
1137   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1138   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1139   OptoReg::Name dst_second = ra_->get_reg_second(this);
1140   OptoReg::Name dst_first = ra_->get_reg_first(this);
1141 
1142   enum RC src_second_rc = rc_class(src_second);
1143   enum RC src_first_rc = rc_class(src_first);
1144   enum RC dst_second_rc = rc_class(dst_second);
1145   enum RC dst_first_rc = rc_class(dst_first);
1146 
1147   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1148          "must move at least 1 register" );
1149 
1150   if (src_first == dst_first && src_second == dst_second) {
1151     // Self copy, no move
1152     return 0;
1153   } else if (src_first_rc == rc_stack) {
1154     // mem ->
1155     if (dst_first_rc == rc_stack) {
1156       // mem -> mem
1157       assert(src_second != dst_first, "overlap");
1158       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1159           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1160         // 64-bit
1161         int src_offset = ra_->reg2offset(src_first);
1162         int dst_offset = ra_->reg2offset(dst_first);
1163         if (cbuf) {
1164           emit_opcode(*cbuf, 0xFF);
1165           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1166 
1167           emit_opcode(*cbuf, 0x8F);
1168           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1169 
1170 #ifndef PRODUCT
1171         } else if (!do_size) {
1172           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1173                      "popq    [rsp + #%d]",
1174                      src_offset,
1175                      dst_offset);
1176 #endif
1177         }
1178         return
1179           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1180           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1181       } else {
1182         // 32-bit
1183         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1184         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1185         // No pushl/popl, so:
1186         int src_offset = ra_->reg2offset(src_first);
1187         int dst_offset = ra_->reg2offset(dst_first);
1188         if (cbuf) {
1189           emit_opcode(*cbuf, Assembler::REX_W);
1190           emit_opcode(*cbuf, 0x89);
1191           emit_opcode(*cbuf, 0x44);
1192           emit_opcode(*cbuf, 0x24);
1193           emit_opcode(*cbuf, 0xF8);
1194 
1195           emit_opcode(*cbuf, 0x8B);
1196           encode_RegMem(*cbuf,
1197                         RAX_enc,
1198                         RSP_enc, 0x4, 0, src_offset,
1199                         false);
1200 
1201           emit_opcode(*cbuf, 0x89);
1202           encode_RegMem(*cbuf,
1203                         RAX_enc,
1204                         RSP_enc, 0x4, 0, dst_offset,
1205                         false);
1206 
1207           emit_opcode(*cbuf, Assembler::REX_W);
1208           emit_opcode(*cbuf, 0x8B);
1209           emit_opcode(*cbuf, 0x44);
1210           emit_opcode(*cbuf, 0x24);
1211           emit_opcode(*cbuf, 0xF8);
1212 
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1216                      "movl    rax, [rsp + #%d]\n\t"
1217                      "movl    [rsp + #%d], rax\n\t"
1218                      "movq    rax, [rsp - #8]",
1219                      src_offset,
1220                      dst_offset);
1221 #endif
1222         }
1223         return
1224           5 + // movq
1225           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1226           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1227           5; // movq
1228       }
1229     } else if (dst_first_rc == rc_int) {
1230       // mem -> gpr
1231       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1232           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1233         // 64-bit
1234         int offset = ra_->reg2offset(src_first);
1235         if (cbuf) {
1236           if (Matcher::_regEncode[dst_first] < 8) {
1237             emit_opcode(*cbuf, Assembler::REX_W);
1238           } else {
1239             emit_opcode(*cbuf, Assembler::REX_WR);
1240           }
1241           emit_opcode(*cbuf, 0x8B);
1242           encode_RegMem(*cbuf,
1243                         Matcher::_regEncode[dst_first],
1244                         RSP_enc, 0x4, 0, offset,
1245                         false);
1246 #ifndef PRODUCT
1247         } else if (!do_size) {
1248           st->print("movq    %s, [rsp + #%d]\t# spill",
1249                      Matcher::regName[dst_first],
1250                      offset);
1251 #endif
1252         }
1253         return
1254           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1255       } else {
1256         // 32-bit
1257         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1258         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1259         int offset = ra_->reg2offset(src_first);
1260         if (cbuf) {
1261           if (Matcher::_regEncode[dst_first] >= 8) {
1262             emit_opcode(*cbuf, Assembler::REX_R);
1263           }
1264           emit_opcode(*cbuf, 0x8B);
1265           encode_RegMem(*cbuf,
1266                         Matcher::_regEncode[dst_first],
1267                         RSP_enc, 0x4, 0, offset,
1268                         false);
1269 #ifndef PRODUCT
1270         } else if (!do_size) {
1271           st->print("movl    %s, [rsp + #%d]\t# spill",
1272                      Matcher::regName[dst_first],
1273                      offset);
1274 #endif
1275         }
1276         return
1277           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1278           ((Matcher::_regEncode[dst_first] < 8)
1279            ? 3
1280            : 4); // REX
1281       }
1282     } else if (dst_first_rc == rc_float) {
1283       // mem-> xmm
1284       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1285           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1286         // 64-bit
1287         int offset = ra_->reg2offset(src_first);
1288         if (cbuf) {
1289           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1290           if (Matcher::_regEncode[dst_first] >= 8) {
1291             emit_opcode(*cbuf, Assembler::REX_R);
1292           }
1293           emit_opcode(*cbuf, 0x0F);
1294           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1295           encode_RegMem(*cbuf,
1296                         Matcher::_regEncode[dst_first],
1297                         RSP_enc, 0x4, 0, offset,
1298                         false);
1299 #ifndef PRODUCT
1300         } else if (!do_size) {
1301           st->print("%s  %s, [rsp + #%d]\t# spill",
1302                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1303                      Matcher::regName[dst_first],
1304                      offset);
1305 #endif
1306         }
1307         return
1308           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1309           ((Matcher::_regEncode[dst_first] < 8)
1310            ? 5
1311            : 6); // REX
1312       } else {
1313         // 32-bit
1314         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1315         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1316         int offset = ra_->reg2offset(src_first);
1317         if (cbuf) {
1318           emit_opcode(*cbuf, 0xF3);
1319           if (Matcher::_regEncode[dst_first] >= 8) {
1320             emit_opcode(*cbuf, Assembler::REX_R);
1321           }
1322           emit_opcode(*cbuf, 0x0F);
1323           emit_opcode(*cbuf, 0x10);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[dst_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movss   %s, [rsp + #%d]\t# spill",
1331                      Matcher::regName[dst_first],
1332                      offset);
1333 #endif
1334         }
1335         return
1336           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1337           ((Matcher::_regEncode[dst_first] < 8)
1338            ? 5
1339            : 6); // REX
1340       }
1341     }
1342   } else if (src_first_rc == rc_int) {
1343     // gpr ->
1344     if (dst_first_rc == rc_stack) {
1345       // gpr -> mem
1346       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1347           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1348         // 64-bit
1349         int offset = ra_->reg2offset(dst_first);
1350         if (cbuf) {
1351           if (Matcher::_regEncode[src_first] < 8) {
1352             emit_opcode(*cbuf, Assembler::REX_W);
1353           } else {
1354             emit_opcode(*cbuf, Assembler::REX_WR);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movq    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1369       } else {
1370         // 32-bit
1371         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1372         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1373         int offset = ra_->reg2offset(dst_first);
1374         if (cbuf) {
1375           if (Matcher::_regEncode[src_first] >= 8) {
1376             emit_opcode(*cbuf, Assembler::REX_R);
1377           }
1378           emit_opcode(*cbuf, 0x89);
1379           encode_RegMem(*cbuf,
1380                         Matcher::_regEncode[src_first],
1381                         RSP_enc, 0x4, 0, offset,
1382                         false);
1383 #ifndef PRODUCT
1384         } else if (!do_size) {
1385           st->print("movl    [rsp + #%d], %s\t# spill",
1386                      offset,
1387                      Matcher::regName[src_first]);
1388 #endif
1389         }
1390         return
1391           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1392           ((Matcher::_regEncode[src_first] < 8)
1393            ? 3
1394            : 4); // REX
1395       }
1396     } else if (dst_first_rc == rc_int) {
1397       // gpr -> gpr
1398       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1399           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1400         // 64-bit
1401         if (cbuf) {
1402           if (Matcher::_regEncode[dst_first] < 8) {
1403             if (Matcher::_regEncode[src_first] < 8) {
1404               emit_opcode(*cbuf, Assembler::REX_W);
1405             } else {
1406               emit_opcode(*cbuf, Assembler::REX_WB);
1407             }
1408           } else {
1409             if (Matcher::_regEncode[src_first] < 8) {
1410               emit_opcode(*cbuf, Assembler::REX_WR);
1411             } else {
1412               emit_opcode(*cbuf, Assembler::REX_WRB);
1413             }
1414           }
1415           emit_opcode(*cbuf, 0x8B);
1416           emit_rm(*cbuf, 0x3,
1417                   Matcher::_regEncode[dst_first] & 7,
1418                   Matcher::_regEncode[src_first] & 7);
1419 #ifndef PRODUCT
1420         } else if (!do_size) {
1421           st->print("movq    %s, %s\t# spill",
1422                      Matcher::regName[dst_first],
1423                      Matcher::regName[src_first]);
1424 #endif
1425         }
1426         return 3; // REX
1427       } else {
1428         // 32-bit
1429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1431         if (cbuf) {
1432           if (Matcher::_regEncode[dst_first] < 8) {
1433             if (Matcher::_regEncode[src_first] >= 8) {
1434               emit_opcode(*cbuf, Assembler::REX_B);
1435             }
1436           } else {
1437             if (Matcher::_regEncode[src_first] < 8) {
1438               emit_opcode(*cbuf, Assembler::REX_R);
1439             } else {
1440               emit_opcode(*cbuf, Assembler::REX_RB);
1441             }
1442           }
1443           emit_opcode(*cbuf, 0x8B);
1444           emit_rm(*cbuf, 0x3,
1445                   Matcher::_regEncode[dst_first] & 7,
1446                   Matcher::_regEncode[src_first] & 7);
1447 #ifndef PRODUCT
1448         } else if (!do_size) {
1449           st->print("movl    %s, %s\t# spill",
1450                      Matcher::regName[dst_first],
1451                      Matcher::regName[src_first]);
1452 #endif
1453         }
1454         return
1455           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1456           ? 2
1457           : 3; // REX
1458       }
1459     } else if (dst_first_rc == rc_float) {
1460       // gpr -> xmm
1461       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1462           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1463         // 64-bit
1464         if (cbuf) {
1465           emit_opcode(*cbuf, 0x66);
1466           if (Matcher::_regEncode[dst_first] < 8) {
1467             if (Matcher::_regEncode[src_first] < 8) {
1468               emit_opcode(*cbuf, Assembler::REX_W);
1469             } else {
1470               emit_opcode(*cbuf, Assembler::REX_WB);
1471             }
1472           } else {
1473             if (Matcher::_regEncode[src_first] < 8) {
1474               emit_opcode(*cbuf, Assembler::REX_WR);
1475             } else {
1476               emit_opcode(*cbuf, Assembler::REX_WRB);
1477             }
1478           }
1479           emit_opcode(*cbuf, 0x0F);
1480           emit_opcode(*cbuf, 0x6E);
1481           emit_rm(*cbuf, 0x3,
1482                   Matcher::_regEncode[dst_first] & 7,
1483                   Matcher::_regEncode[src_first] & 7);
1484 #ifndef PRODUCT
1485         } else if (!do_size) {
1486           st->print("movdq   %s, %s\t# spill",
1487                      Matcher::regName[dst_first],
1488                      Matcher::regName[src_first]);
1489 #endif
1490         }
1491         return 5; // REX
1492       } else {
1493         // 32-bit
1494         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1495         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1496         if (cbuf) {
1497           emit_opcode(*cbuf, 0x66);
1498           if (Matcher::_regEncode[dst_first] < 8) {
1499             if (Matcher::_regEncode[src_first] >= 8) {
1500               emit_opcode(*cbuf, Assembler::REX_B);
1501             }
1502           } else {
1503             if (Matcher::_regEncode[src_first] < 8) {
1504               emit_opcode(*cbuf, Assembler::REX_R);
1505             } else {
1506               emit_opcode(*cbuf, Assembler::REX_RB);
1507             }
1508           }
1509           emit_opcode(*cbuf, 0x0F);
1510           emit_opcode(*cbuf, 0x6E);
1511           emit_rm(*cbuf, 0x3,
1512                   Matcher::_regEncode[dst_first] & 7,
1513                   Matcher::_regEncode[src_first] & 7);
1514 #ifndef PRODUCT
1515         } else if (!do_size) {
1516           st->print("movdl   %s, %s\t# spill",
1517                      Matcher::regName[dst_first],
1518                      Matcher::regName[src_first]);
1519 #endif
1520         }
1521         return
1522           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1523           ? 4
1524           : 5; // REX
1525       }
1526     }
1527   } else if (src_first_rc == rc_float) {
1528     // xmm ->
1529     if (dst_first_rc == rc_stack) {
1530       // xmm -> mem
1531       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1532           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1533         // 64-bit
1534         int offset = ra_->reg2offset(dst_first);
1535         if (cbuf) {
1536           emit_opcode(*cbuf, 0xF2);
1537           if (Matcher::_regEncode[src_first] >= 8) {
1538               emit_opcode(*cbuf, Assembler::REX_R);
1539           }
1540           emit_opcode(*cbuf, 0x0F);
1541           emit_opcode(*cbuf, 0x11);
1542           encode_RegMem(*cbuf,
1543                         Matcher::_regEncode[src_first],
1544                         RSP_enc, 0x4, 0, offset,
1545                         false);
1546 #ifndef PRODUCT
1547         } else if (!do_size) {
1548           st->print("movsd   [rsp + #%d], %s\t# spill",
1549                      offset,
1550                      Matcher::regName[src_first]);
1551 #endif
1552         }
1553         return
1554           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1555           ((Matcher::_regEncode[src_first] < 8)
1556            ? 5
1557            : 6); // REX
1558       } else {
1559         // 32-bit
1560         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1561         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1562         int offset = ra_->reg2offset(dst_first);
1563         if (cbuf) {
1564           emit_opcode(*cbuf, 0xF3);
1565           if (Matcher::_regEncode[src_first] >= 8) {
1566               emit_opcode(*cbuf, Assembler::REX_R);
1567           }
1568           emit_opcode(*cbuf, 0x0F);
1569           emit_opcode(*cbuf, 0x11);
1570           encode_RegMem(*cbuf,
1571                         Matcher::_regEncode[src_first],
1572                         RSP_enc, 0x4, 0, offset,
1573                         false);
1574 #ifndef PRODUCT
1575         } else if (!do_size) {
1576           st->print("movss   [rsp + #%d], %s\t# spill",
1577                      offset,
1578                      Matcher::regName[src_first]);
1579 #endif
1580         }
1581         return
1582           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1583           ((Matcher::_regEncode[src_first] < 8)
1584            ? 5
1585            : 6); // REX
1586       }
1587     } else if (dst_first_rc == rc_int) {
1588       // xmm -> gpr
1589       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1590           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1591         // 64-bit
1592         if (cbuf) {
1593           emit_opcode(*cbuf, 0x66);
1594           if (Matcher::_regEncode[dst_first] < 8) {
1595             if (Matcher::_regEncode[src_first] < 8) {
1596               emit_opcode(*cbuf, Assembler::REX_W);
1597             } else {
1598               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1599             }
1600           } else {
1601             if (Matcher::_regEncode[src_first] < 8) {
1602               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1603             } else {
1604               emit_opcode(*cbuf, Assembler::REX_WRB);
1605             }
1606           }
1607           emit_opcode(*cbuf, 0x0F);
1608           emit_opcode(*cbuf, 0x7E);
1609           emit_rm(*cbuf, 0x3,
1610                   Matcher::_regEncode[dst_first] & 7,
1611                   Matcher::_regEncode[src_first] & 7);
1612 #ifndef PRODUCT
1613         } else if (!do_size) {
1614           st->print("movdq   %s, %s\t# spill",
1615                      Matcher::regName[dst_first],
1616                      Matcher::regName[src_first]);
1617 #endif
1618         }
1619         return 5; // REX
1620       } else {
1621         // 32-bit
1622         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1623         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1624         if (cbuf) {
1625           emit_opcode(*cbuf, 0x66);
1626           if (Matcher::_regEncode[dst_first] < 8) {
1627             if (Matcher::_regEncode[src_first] >= 8) {
1628               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1629             }
1630           } else {
1631             if (Matcher::_regEncode[src_first] < 8) {
1632               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1633             } else {
1634               emit_opcode(*cbuf, Assembler::REX_RB);
1635             }
1636           }
1637           emit_opcode(*cbuf, 0x0F);
1638           emit_opcode(*cbuf, 0x7E);
1639           emit_rm(*cbuf, 0x3,
1640                   Matcher::_regEncode[dst_first] & 7,
1641                   Matcher::_regEncode[src_first] & 7);
1642 #ifndef PRODUCT
1643         } else if (!do_size) {
1644           st->print("movdl   %s, %s\t# spill",
1645                      Matcher::regName[dst_first],
1646                      Matcher::regName[src_first]);
1647 #endif
1648         }
1649         return
1650           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1651           ? 4
1652           : 5; // REX
1653       }
1654     } else if (dst_first_rc == rc_float) {
1655       // xmm -> xmm
1656       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1657           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1658         // 64-bit
1659         if (cbuf) {
1660           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1661           if (Matcher::_regEncode[dst_first] < 8) {
1662             if (Matcher::_regEncode[src_first] >= 8) {
1663               emit_opcode(*cbuf, Assembler::REX_B);
1664             }
1665           } else {
1666             if (Matcher::_regEncode[src_first] < 8) {
1667               emit_opcode(*cbuf, Assembler::REX_R);
1668             } else {
1669               emit_opcode(*cbuf, Assembler::REX_RB);
1670             }
1671           }
1672           emit_opcode(*cbuf, 0x0F);
1673           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1674           emit_rm(*cbuf, 0x3,
1675                   Matcher::_regEncode[dst_first] & 7,
1676                   Matcher::_regEncode[src_first] & 7);
1677 #ifndef PRODUCT
1678         } else if (!do_size) {
1679           st->print("%s  %s, %s\t# spill",
1680                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1681                      Matcher::regName[dst_first],
1682                      Matcher::regName[src_first]);
1683 #endif
1684         }
1685         return
1686           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1687           ? 4
1688           : 5; // REX
1689       } else {
1690         // 32-bit
1691         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1692         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1693         if (cbuf) {
1694           if (!UseXmmRegToRegMoveAll)
1695             emit_opcode(*cbuf, 0xF3);
1696           if (Matcher::_regEncode[dst_first] < 8) {
1697             if (Matcher::_regEncode[src_first] >= 8) {
1698               emit_opcode(*cbuf, Assembler::REX_B);
1699             }
1700           } else {
1701             if (Matcher::_regEncode[src_first] < 8) {
1702               emit_opcode(*cbuf, Assembler::REX_R);
1703             } else {
1704               emit_opcode(*cbuf, Assembler::REX_RB);
1705             }
1706           }
1707           emit_opcode(*cbuf, 0x0F);
1708           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1709           emit_rm(*cbuf, 0x3,
1710                   Matcher::_regEncode[dst_first] & 7,
1711                   Matcher::_regEncode[src_first] & 7);
1712 #ifndef PRODUCT
1713         } else if (!do_size) {
1714           st->print("%s  %s, %s\t# spill",
1715                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1716                      Matcher::regName[dst_first],
1717                      Matcher::regName[src_first]);
1718 #endif
1719         }
1720         return
1721           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1722           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1723           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1724       }
1725     }
1726   }
1727 
1728   assert(0," foo ");
1729   Unimplemented();
1730 
1731   return 0;
1732 }
1733 
1734 #ifndef PRODUCT
1735 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1736 {
1737   implementation(NULL, ra_, false, st);
1738 }
1739 #endif
1740 
1741 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1742 {
1743   implementation(&cbuf, ra_, false, NULL);
1744 }
1745 
1746 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1747 {
1748   return implementation(NULL, ra_, true, NULL);
1749 }
1750 
1751 //=============================================================================
1752 #ifndef PRODUCT
1753 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1754 {
1755   st->print("nop \t# %d bytes pad for loops and calls", _count);
1756 }
1757 #endif
1758 
1759 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1760 {
1761   MacroAssembler _masm(&cbuf);
1762   __ nop(_count);
1763 }
1764 
1765 uint MachNopNode::size(PhaseRegAlloc*) const
1766 {
1767   return _count;
1768 }
1769 
1770 
1771 //=============================================================================
1772 #ifndef PRODUCT
1773 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1774 {
1775   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1776   int reg = ra_->get_reg_first(this);
1777   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1778             Matcher::regName[reg], offset);
1779 }
1780 #endif
1781 
1782 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1783 {
1784   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1785   int reg = ra_->get_encode(this);
1786   if (offset >= 0x80) {
1787     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1788     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1789     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1790     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1791     emit_d32(cbuf, offset);
1792   } else {
1793     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1794     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1795     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1796     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1797     emit_d8(cbuf, offset);
1798   }
1799 }
1800 
1801 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1802 {
1803   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1804   return (offset < 0x80) ? 5 : 8; // REX
1805 }
1806 
1807 //=============================================================================
1808 
1809 // emit call stub, compiled java to interpreter
1810 void emit_java_to_interp(CodeBuffer& cbuf)
1811 {
1812   // Stub is fixed up when the corresponding call is converted from
1813   // calling compiled code to calling interpreted code.
1814   // movq rbx, 0
1815   // jmp -5 # to self
1816 
1817   address mark = cbuf.inst_mark();  // get mark within main instrs section
1818 
1819   // Note that the code buffer's inst_mark is always relative to insts.
1820   // That's why we must use the macroassembler to generate a stub.
1821   MacroAssembler _masm(&cbuf);
1822 
1823   address base =
1824   __ start_a_stub(Compile::MAX_stubs_size);
1825   if (base == NULL)  return;  // CodeBuffer::expand failed
1826   // static stub relocation stores the instruction address of the call
1827   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1828   // static stub relocation also tags the methodOop in the code-stream.
1829   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1830   // This is recognized as unresolved by relocs/nativeinst/ic code
1831   __ jump(RuntimeAddress(__ pc()));
1832 
1833   // Update current stubs pointer and restore code_end.
1834   __ end_a_stub();
1835 }
1836 
1837 // size of call stub, compiled java to interpretor
1838 uint size_java_to_interp()
1839 {
1840   return 15;  // movq (1+1+8); jmp (1+4)
1841 }
1842 
1843 // relocation entries for call stub, compiled java to interpretor
1844 uint reloc_java_to_interp()
1845 {
1846   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1847 }
1848 
1849 //=============================================================================
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   if (UseCompressedOops) {
1854     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1855     if (Universe::narrow_oop_shift() != 0) {
1856       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1857     }
1858     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1859   } else {
1860     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1861                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1862   }
1863   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1864   st->print_cr("\tnop");
1865   if (!OptoBreakpoint) {
1866     st->print_cr("\tnop");
1867   }
1868 }
1869 #endif
1870 
1871 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1872 {
1873   MacroAssembler masm(&cbuf);
1874 #ifdef ASSERT
1875   uint code_size = cbuf.code_size();
1876 #endif
1877   if (UseCompressedOops) {
1878     masm.load_klass(rscratch1, j_rarg0);
1879     masm.cmpptr(rax, rscratch1);
1880   } else {
1881     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1882   }
1883 
1884   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1885 
1886   /* WARNING these NOPs are critical so that verified entry point is properly
1887      aligned for patching by NativeJump::patch_verified_entry() */
1888   int nops_cnt = 1;
1889   if (!OptoBreakpoint) {
1890     // Leave space for int3
1891      nops_cnt += 1;
1892   }
1893   if (UseCompressedOops) {
1894     // ??? divisible by 4 is aligned?
1895     nops_cnt += 1;
1896   }
1897   masm.nop(nops_cnt);
1898 
1899   assert(cbuf.code_size() - code_size == size(ra_),
1900          "checking code size of inline cache node");
1901 }
1902 
1903 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1904 {
1905   if (UseCompressedOops) {
1906     if (Universe::narrow_oop_shift() == 0) {
1907       return OptoBreakpoint ? 15 : 16;
1908     } else {
1909       return OptoBreakpoint ? 19 : 20;
1910     }
1911   } else {
1912     return OptoBreakpoint ? 11 : 12;
1913   }
1914 }
1915 
1916 
1917 //=============================================================================
1918 uint size_exception_handler()
1919 {
1920   // NativeCall instruction size is the same as NativeJump.
1921   // Note that this value is also credited (in output.cpp) to
1922   // the size of the code section.
1923   return NativeJump::instruction_size;
1924 }
1925 
1926 // Emit exception handler code.
1927 int emit_exception_handler(CodeBuffer& cbuf)
1928 {
1929 
1930   // Note that the code buffer's inst_mark is always relative to insts.
1931   // That's why we must use the macroassembler to generate a handler.
1932   MacroAssembler _masm(&cbuf);
1933   address base =
1934   __ start_a_stub(size_exception_handler());
1935   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1936   int offset = __ offset();
1937   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1938   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1939   __ end_a_stub();
1940   return offset;
1941 }
1942 
1943 uint size_deopt_handler()
1944 {
1945   // three 5 byte instructions
1946   return 15;
1947 }
1948 
1949 // Emit deopt handler code.
1950 int emit_deopt_handler(CodeBuffer& cbuf)
1951 {
1952 
1953   // Note that the code buffer's inst_mark is always relative to insts.
1954   // That's why we must use the macroassembler to generate a handler.
1955   MacroAssembler _masm(&cbuf);
1956   address base =
1957   __ start_a_stub(size_deopt_handler());
1958   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1959   int offset = __ offset();
1960   address the_pc = (address) __ pc();
1961   Label next;
1962   // push a "the_pc" on the stack without destroying any registers
1963   // as they all may be live.
1964 
1965   // push address of "next"
1966   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1967   __ bind(next);
1968   // adjust it so it matches "the_pc"
1969   __ subptr(Address(rsp, 0), __ offset() - offset);
1970   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1971   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1972   __ end_a_stub();
1973   return offset;
1974 }
1975 
1976 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1977   int mark = cbuf.insts()->mark_off();
1978   MacroAssembler _masm(&cbuf);
1979   address double_address = __ double_constant(x);
1980   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1981   emit_d32_reloc(cbuf,
1982                  (int) (double_address - cbuf.code_end() - 4),
1983                  internal_word_Relocation::spec(double_address),
1984                  RELOC_DISP32);
1985 }
1986 
1987 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1988   int mark = cbuf.insts()->mark_off();
1989   MacroAssembler _masm(&cbuf);
1990   address float_address = __ float_constant(x);
1991   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1992   emit_d32_reloc(cbuf,
1993                  (int) (float_address - cbuf.code_end() - 4),
1994                  internal_word_Relocation::spec(float_address),
1995                  RELOC_DISP32);
1996 }
1997 
1998 
1999 const bool Matcher::match_rule_supported(int opcode) {
2000   if (!has_match_rule(opcode))
2001     return false;
2002 
2003   return true;  // Per default match rules are supported.
2004 }
2005 
2006 int Matcher::regnum_to_fpu_offset(int regnum)
2007 {
2008   return regnum - 32; // The FP registers are in the second chunk
2009 }
2010 
2011 // This is UltraSparc specific, true just means we have fast l2f conversion
2012 const bool Matcher::convL2FSupported(void) {
2013   return true;
2014 }
2015 
2016 // Vector width in bytes
2017 const uint Matcher::vector_width_in_bytes(void) {
2018   return 8;
2019 }
2020 
2021 // Vector ideal reg
2022 const uint Matcher::vector_ideal_reg(void) {
2023   return Op_RegD;
2024 }
2025 
2026 // Is this branch offset short enough that a short branch can be used?
2027 //
2028 // NOTE: If the platform does not provide any short branch variants, then
2029 //       this method should return false for offset 0.
2030 bool Matcher::is_short_branch_offset(int rule, int offset) {
2031   // the short version of jmpConUCF2 contains multiple branches,
2032   // making the reach slightly less
2033   if (rule == jmpConUCF2_rule)
2034     return (-126 <= offset && offset <= 125);
2035   return (-128 <= offset && offset <= 127);
2036 }
2037 
2038 const bool Matcher::isSimpleConstant64(jlong value) {
2039   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2040   //return value == (int) value;  // Cf. storeImmL and immL32.
2041 
2042   // Probably always true, even if a temp register is required.
2043   return true;
2044 }
2045 
2046 // The ecx parameter to rep stosq for the ClearArray node is in words.
2047 const bool Matcher::init_array_count_is_in_bytes = false;
2048 
2049 // Threshold size for cleararray.
2050 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2051 
2052 // Should the Matcher clone shifts on addressing modes, expecting them
2053 // to be subsumed into complex addressing expressions or compute them
2054 // into registers?  True for Intel but false for most RISCs
2055 const bool Matcher::clone_shift_expressions = true;
2056 
2057 // Is it better to copy float constants, or load them directly from
2058 // memory?  Intel can load a float constant from a direct address,
2059 // requiring no extra registers.  Most RISCs will have to materialize
2060 // an address into a register first, so they would do better to copy
2061 // the constant from stack.
2062 const bool Matcher::rematerialize_float_constants = true; // XXX
2063 
2064 // If CPU can load and store mis-aligned doubles directly then no
2065 // fixup is needed.  Else we split the double into 2 integer pieces
2066 // and move it piece-by-piece.  Only happens when passing doubles into
2067 // C code as the Java calling convention forces doubles to be aligned.
2068 const bool Matcher::misaligned_doubles_ok = true;
2069 
2070 // No-op on amd64
2071 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2072 
2073 // Advertise here if the CPU requires explicit rounding operations to
2074 // implement the UseStrictFP mode.
2075 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2076 
2077 // Do floats take an entire double register or just half?
2078 const bool Matcher::float_in_double = true;
2079 // Do ints take an entire long register or just half?
2080 const bool Matcher::int_in_long = true;
2081 
2082 // Return whether or not this register is ever used as an argument.
2083 // This function is used on startup to build the trampoline stubs in
2084 // generateOptoStub.  Registers not mentioned will be killed by the VM
2085 // call in the trampoline, and arguments in those registers not be
2086 // available to the callee.
2087 bool Matcher::can_be_java_arg(int reg)
2088 {
2089   return
2090     reg ==  RDI_num || reg ==  RDI_H_num ||
2091     reg ==  RSI_num || reg ==  RSI_H_num ||
2092     reg ==  RDX_num || reg ==  RDX_H_num ||
2093     reg ==  RCX_num || reg ==  RCX_H_num ||
2094     reg ==   R8_num || reg ==   R8_H_num ||
2095     reg ==   R9_num || reg ==   R9_H_num ||
2096     reg ==  R12_num || reg ==  R12_H_num ||
2097     reg == XMM0_num || reg == XMM0_H_num ||
2098     reg == XMM1_num || reg == XMM1_H_num ||
2099     reg == XMM2_num || reg == XMM2_H_num ||
2100     reg == XMM3_num || reg == XMM3_H_num ||
2101     reg == XMM4_num || reg == XMM4_H_num ||
2102     reg == XMM5_num || reg == XMM5_H_num ||
2103     reg == XMM6_num || reg == XMM6_H_num ||
2104     reg == XMM7_num || reg == XMM7_H_num;
2105 }
2106 
2107 bool Matcher::is_spillable_arg(int reg)
2108 {
2109   return can_be_java_arg(reg);
2110 }
2111 
2112 // Register for DIVI projection of divmodI
2113 RegMask Matcher::divI_proj_mask() {
2114   return INT_RAX_REG_mask;
2115 }
2116 
2117 // Register for MODI projection of divmodI
2118 RegMask Matcher::modI_proj_mask() {
2119   return INT_RDX_REG_mask;
2120 }
2121 
2122 // Register for DIVL projection of divmodL
2123 RegMask Matcher::divL_proj_mask() {
2124   return LONG_RAX_REG_mask;
2125 }
2126 
2127 // Register for MODL projection of divmodL
2128 RegMask Matcher::modL_proj_mask() {
2129   return LONG_RDX_REG_mask;
2130 }
2131 
2132 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2133   return PTR_RBP_REG_mask;
2134 }
2135 
2136 static Address build_address(int b, int i, int s, int d) {
2137   Register index = as_Register(i);
2138   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2139   if (index == rsp) {
2140     index = noreg;
2141     scale = Address::no_scale;
2142   }
2143   Address addr(as_Register(b), index, scale, d);
2144   return addr;
2145 }
2146 
2147 %}
2148 
2149 //----------ENCODING BLOCK-----------------------------------------------------
2150 // This block specifies the encoding classes used by the compiler to
2151 // output byte streams.  Encoding classes are parameterized macros
2152 // used by Machine Instruction Nodes in order to generate the bit
2153 // encoding of the instruction.  Operands specify their base encoding
2154 // interface with the interface keyword.  There are currently
2155 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2156 // COND_INTER.  REG_INTER causes an operand to generate a function
2157 // which returns its register number when queried.  CONST_INTER causes
2158 // an operand to generate a function which returns the value of the
2159 // constant when queried.  MEMORY_INTER causes an operand to generate
2160 // four functions which return the Base Register, the Index Register,
2161 // the Scale Value, and the Offset Value of the operand when queried.
2162 // COND_INTER causes an operand to generate six functions which return
2163 // the encoding code (ie - encoding bits for the instruction)
2164 // associated with each basic boolean condition for a conditional
2165 // instruction.
2166 //
2167 // Instructions specify two basic values for encoding.  Again, a
2168 // function is available to check if the constant displacement is an
2169 // oop. They use the ins_encode keyword to specify their encoding
2170 // classes (which must be a sequence of enc_class names, and their
2171 // parameters, specified in the encoding block), and they use the
2172 // opcode keyword to specify, in order, their primary, secondary, and
2173 // tertiary opcode.  Only the opcode sections which a particular
2174 // instruction needs for encoding need to be specified.
2175 encode %{
2176   // Build emit functions for each basic byte or larger field in the
2177   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2178   // from C++ code in the enc_class source block.  Emit functions will
2179   // live in the main source block for now.  In future, we can
2180   // generalize this by adding a syntax that specifies the sizes of
2181   // fields in an order, so that the adlc can build the emit functions
2182   // automagically
2183 
2184   // Emit primary opcode
2185   enc_class OpcP
2186   %{
2187     emit_opcode(cbuf, $primary);
2188   %}
2189 
2190   // Emit secondary opcode
2191   enc_class OpcS
2192   %{
2193     emit_opcode(cbuf, $secondary);
2194   %}
2195 
2196   // Emit tertiary opcode
2197   enc_class OpcT
2198   %{
2199     emit_opcode(cbuf, $tertiary);
2200   %}
2201 
2202   // Emit opcode directly
2203   enc_class Opcode(immI d8)
2204   %{
2205     emit_opcode(cbuf, $d8$$constant);
2206   %}
2207 
2208   // Emit size prefix
2209   enc_class SizePrefix
2210   %{
2211     emit_opcode(cbuf, 0x66);
2212   %}
2213 
2214   enc_class reg(rRegI reg)
2215   %{
2216     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2217   %}
2218 
2219   enc_class reg_reg(rRegI dst, rRegI src)
2220   %{
2221     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2222   %}
2223 
2224   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2225   %{
2226     emit_opcode(cbuf, $opcode$$constant);
2227     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2228   %}
2229 
2230   enc_class cmpfp_fixup()
2231   %{
2232     // jnp,s exit
2233     emit_opcode(cbuf, 0x7B);
2234     emit_d8(cbuf, 0x0A);
2235 
2236     // pushfq
2237     emit_opcode(cbuf, 0x9C);
2238 
2239     // andq $0xffffff2b, (%rsp)
2240     emit_opcode(cbuf, Assembler::REX_W);
2241     emit_opcode(cbuf, 0x81);
2242     emit_opcode(cbuf, 0x24);
2243     emit_opcode(cbuf, 0x24);
2244     emit_d32(cbuf, 0xffffff2b);
2245 
2246     // popfq
2247     emit_opcode(cbuf, 0x9D);
2248 
2249     // nop (target for branch to avoid branch to branch)
2250     emit_opcode(cbuf, 0x90);
2251   %}
2252 
2253   enc_class cmpfp3(rRegI dst)
2254   %{
2255     int dstenc = $dst$$reg;
2256 
2257     // movl $dst, -1
2258     if (dstenc >= 8) {
2259       emit_opcode(cbuf, Assembler::REX_B);
2260     }
2261     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2262     emit_d32(cbuf, -1);
2263 
2264     // jp,s done
2265     emit_opcode(cbuf, 0x7A);
2266     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2267 
2268     // jb,s done
2269     emit_opcode(cbuf, 0x72);
2270     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2271 
2272     // setne $dst
2273     if (dstenc >= 4) {
2274       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2275     }
2276     emit_opcode(cbuf, 0x0F);
2277     emit_opcode(cbuf, 0x95);
2278     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2279 
2280     // movzbl $dst, $dst
2281     if (dstenc >= 4) {
2282       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2283     }
2284     emit_opcode(cbuf, 0x0F);
2285     emit_opcode(cbuf, 0xB6);
2286     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2287   %}
2288 
2289   enc_class cdql_enc(no_rax_rdx_RegI div)
2290   %{
2291     // Full implementation of Java idiv and irem; checks for
2292     // special case as described in JVM spec., p.243 & p.271.
2293     //
2294     //         normal case                           special case
2295     //
2296     // input : rax: dividend                         min_int
2297     //         reg: divisor                          -1
2298     //
2299     // output: rax: quotient  (= rax idiv reg)       min_int
2300     //         rdx: remainder (= rax irem reg)       0
2301     //
2302     //  Code sequnce:
2303     //
2304     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2305     //    5:   75 07/08                jne    e <normal>
2306     //    7:   33 d2                   xor    %edx,%edx
2307     //  [div >= 8 -> offset + 1]
2308     //  [REX_B]
2309     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2310     //    c:   74 03/04                je     11 <done>
2311     // 000000000000000e <normal>:
2312     //    e:   99                      cltd
2313     //  [div >= 8 -> offset + 1]
2314     //  [REX_B]
2315     //    f:   f7 f9                   idiv   $div
2316     // 0000000000000011 <done>:
2317 
2318     // cmp    $0x80000000,%eax
2319     emit_opcode(cbuf, 0x3d);
2320     emit_d8(cbuf, 0x00);
2321     emit_d8(cbuf, 0x00);
2322     emit_d8(cbuf, 0x00);
2323     emit_d8(cbuf, 0x80);
2324 
2325     // jne    e <normal>
2326     emit_opcode(cbuf, 0x75);
2327     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2328 
2329     // xor    %edx,%edx
2330     emit_opcode(cbuf, 0x33);
2331     emit_d8(cbuf, 0xD2);
2332 
2333     // cmp    $0xffffffffffffffff,%ecx
2334     if ($div$$reg >= 8) {
2335       emit_opcode(cbuf, Assembler::REX_B);
2336     }
2337     emit_opcode(cbuf, 0x83);
2338     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2339     emit_d8(cbuf, 0xFF);
2340 
2341     // je     11 <done>
2342     emit_opcode(cbuf, 0x74);
2343     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2344 
2345     // <normal>
2346     // cltd
2347     emit_opcode(cbuf, 0x99);
2348 
2349     // idivl (note: must be emitted by the user of this rule)
2350     // <done>
2351   %}
2352 
2353   enc_class cdqq_enc(no_rax_rdx_RegL div)
2354   %{
2355     // Full implementation of Java ldiv and lrem; checks for
2356     // special case as described in JVM spec., p.243 & p.271.
2357     //
2358     //         normal case                           special case
2359     //
2360     // input : rax: dividend                         min_long
2361     //         reg: divisor                          -1
2362     //
2363     // output: rax: quotient  (= rax idiv reg)       min_long
2364     //         rdx: remainder (= rax irem reg)       0
2365     //
2366     //  Code sequnce:
2367     //
2368     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2369     //    7:   00 00 80
2370     //    a:   48 39 d0                cmp    %rdx,%rax
2371     //    d:   75 08                   jne    17 <normal>
2372     //    f:   33 d2                   xor    %edx,%edx
2373     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2374     //   15:   74 05                   je     1c <done>
2375     // 0000000000000017 <normal>:
2376     //   17:   48 99                   cqto
2377     //   19:   48 f7 f9                idiv   $div
2378     // 000000000000001c <done>:
2379 
2380     // mov    $0x8000000000000000,%rdx
2381     emit_opcode(cbuf, Assembler::REX_W);
2382     emit_opcode(cbuf, 0xBA);
2383     emit_d8(cbuf, 0x00);
2384     emit_d8(cbuf, 0x00);
2385     emit_d8(cbuf, 0x00);
2386     emit_d8(cbuf, 0x00);
2387     emit_d8(cbuf, 0x00);
2388     emit_d8(cbuf, 0x00);
2389     emit_d8(cbuf, 0x00);
2390     emit_d8(cbuf, 0x80);
2391 
2392     // cmp    %rdx,%rax
2393     emit_opcode(cbuf, Assembler::REX_W);
2394     emit_opcode(cbuf, 0x39);
2395     emit_d8(cbuf, 0xD0);
2396 
2397     // jne    17 <normal>
2398     emit_opcode(cbuf, 0x75);
2399     emit_d8(cbuf, 0x08);
2400 
2401     // xor    %edx,%edx
2402     emit_opcode(cbuf, 0x33);
2403     emit_d8(cbuf, 0xD2);
2404 
2405     // cmp    $0xffffffffffffffff,$div
2406     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2407     emit_opcode(cbuf, 0x83);
2408     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2409     emit_d8(cbuf, 0xFF);
2410 
2411     // je     1e <done>
2412     emit_opcode(cbuf, 0x74);
2413     emit_d8(cbuf, 0x05);
2414 
2415     // <normal>
2416     // cqto
2417     emit_opcode(cbuf, Assembler::REX_W);
2418     emit_opcode(cbuf, 0x99);
2419 
2420     // idivq (note: must be emitted by the user of this rule)
2421     // <done>
2422   %}
2423 
2424   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2425   enc_class OpcSE(immI imm)
2426   %{
2427     // Emit primary opcode and set sign-extend bit
2428     // Check for 8-bit immediate, and set sign extend bit in opcode
2429     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2430       emit_opcode(cbuf, $primary | 0x02);
2431     } else {
2432       // 32-bit immediate
2433       emit_opcode(cbuf, $primary);
2434     }
2435   %}
2436 
2437   enc_class OpcSErm(rRegI dst, immI imm)
2438   %{
2439     // OpcSEr/m
2440     int dstenc = $dst$$reg;
2441     if (dstenc >= 8) {
2442       emit_opcode(cbuf, Assembler::REX_B);
2443       dstenc -= 8;
2444     }
2445     // Emit primary opcode and set sign-extend bit
2446     // Check for 8-bit immediate, and set sign extend bit in opcode
2447     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2448       emit_opcode(cbuf, $primary | 0x02);
2449     } else {
2450       // 32-bit immediate
2451       emit_opcode(cbuf, $primary);
2452     }
2453     // Emit r/m byte with secondary opcode, after primary opcode.
2454     emit_rm(cbuf, 0x3, $secondary, dstenc);
2455   %}
2456 
2457   enc_class OpcSErm_wide(rRegL dst, immI imm)
2458   %{
2459     // OpcSEr/m
2460     int dstenc = $dst$$reg;
2461     if (dstenc < 8) {
2462       emit_opcode(cbuf, Assembler::REX_W);
2463     } else {
2464       emit_opcode(cbuf, Assembler::REX_WB);
2465       dstenc -= 8;
2466     }
2467     // Emit primary opcode and set sign-extend bit
2468     // Check for 8-bit immediate, and set sign extend bit in opcode
2469     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2470       emit_opcode(cbuf, $primary | 0x02);
2471     } else {
2472       // 32-bit immediate
2473       emit_opcode(cbuf, $primary);
2474     }
2475     // Emit r/m byte with secondary opcode, after primary opcode.
2476     emit_rm(cbuf, 0x3, $secondary, dstenc);
2477   %}
2478 
2479   enc_class Con8or32(immI imm)
2480   %{
2481     // Check for 8-bit immediate, and set sign extend bit in opcode
2482     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2483       $$$emit8$imm$$constant;
2484     } else {
2485       // 32-bit immediate
2486       $$$emit32$imm$$constant;
2487     }
2488   %}
2489 
2490   enc_class Lbl(label labl)
2491   %{
2492     // JMP, CALL
2493     Label* l = $labl$$label;
2494     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2495   %}
2496 
2497   enc_class LblShort(label labl)
2498   %{
2499     // JMP, CALL
2500     Label* l = $labl$$label;
2501     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2502     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2503     emit_d8(cbuf, disp);
2504   %}
2505 
2506   enc_class opc2_reg(rRegI dst)
2507   %{
2508     // BSWAP
2509     emit_cc(cbuf, $secondary, $dst$$reg);
2510   %}
2511 
2512   enc_class opc3_reg(rRegI dst)
2513   %{
2514     // BSWAP
2515     emit_cc(cbuf, $tertiary, $dst$$reg);
2516   %}
2517 
2518   enc_class reg_opc(rRegI div)
2519   %{
2520     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2521     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2522   %}
2523 
2524   enc_class Jcc(cmpOp cop, label labl)
2525   %{
2526     // JCC
2527     Label* l = $labl$$label;
2528     $$$emit8$primary;
2529     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2530     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2531   %}
2532 
2533   enc_class JccShort (cmpOp cop, label labl)
2534   %{
2535   // JCC
2536     Label *l = $labl$$label;
2537     emit_cc(cbuf, $primary, $cop$$cmpcode);
2538     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2539     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2540     emit_d8(cbuf, disp);
2541   %}
2542 
2543   enc_class enc_cmov(cmpOp cop)
2544   %{
2545     // CMOV
2546     $$$emit8$primary;
2547     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2548   %}
2549 
2550   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2551   %{
2552     // Invert sense of branch from sense of cmov
2553     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2554     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2555                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2556                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2557     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2558     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2559     if ($dst$$reg < 8) {
2560       if ($src$$reg >= 8) {
2561         emit_opcode(cbuf, Assembler::REX_B);
2562       }
2563     } else {
2564       if ($src$$reg < 8) {
2565         emit_opcode(cbuf, Assembler::REX_R);
2566       } else {
2567         emit_opcode(cbuf, Assembler::REX_RB);
2568       }
2569     }
2570     emit_opcode(cbuf, 0x0F);
2571     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2572     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2573   %}
2574 
2575   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2576   %{
2577     // Invert sense of branch from sense of cmov
2578     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2579     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2580 
2581     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2582     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2583     if ($dst$$reg < 8) {
2584       if ($src$$reg >= 8) {
2585         emit_opcode(cbuf, Assembler::REX_B);
2586       }
2587     } else {
2588       if ($src$$reg < 8) {
2589         emit_opcode(cbuf, Assembler::REX_R);
2590       } else {
2591         emit_opcode(cbuf, Assembler::REX_RB);
2592       }
2593     }
2594     emit_opcode(cbuf, 0x0F);
2595     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2596     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2597   %}
2598 
2599   enc_class enc_PartialSubtypeCheck()
2600   %{
2601     Register Rrdi = as_Register(RDI_enc); // result register
2602     Register Rrax = as_Register(RAX_enc); // super class
2603     Register Rrcx = as_Register(RCX_enc); // killed
2604     Register Rrsi = as_Register(RSI_enc); // sub class
2605     Label miss;
2606     const bool set_cond_codes = true;
2607 
2608     MacroAssembler _masm(&cbuf);
2609     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2610                                      NULL, &miss,
2611                                      /*set_cond_codes:*/ true);
2612     if ($primary) {
2613       __ xorptr(Rrdi, Rrdi);
2614     }
2615     __ bind(miss);
2616   %}
2617 
2618   enc_class Java_To_Interpreter(method meth)
2619   %{
2620     // CALL Java_To_Interpreter
2621     // This is the instruction starting address for relocation info.
2622     cbuf.set_inst_mark();
2623     $$$emit8$primary;
2624     // CALL directly to the runtime
2625     emit_d32_reloc(cbuf,
2626                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2627                    runtime_call_Relocation::spec(),
2628                    RELOC_DISP32);
2629   %}
2630 
2631   enc_class preserve_SP %{
2632     debug_only(int off0 = cbuf.code_size());
2633     MacroAssembler _masm(&cbuf);
2634     // RBP is preserved across all calls, even compiled calls.
2635     // Use it to preserve RSP in places where the callee might change the SP.
2636     __ movptr(rbp, rsp);
2637     debug_only(int off1 = cbuf.code_size());
2638     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2639   %}
2640 
2641   enc_class restore_SP %{
2642     MacroAssembler _masm(&cbuf);
2643     __ movptr(rsp, rbp);
2644   %}
2645 
2646   enc_class Java_Static_Call(method meth)
2647   %{
2648     // JAVA STATIC CALL
2649     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2650     // determine who we intended to call.
2651     cbuf.set_inst_mark();
2652     $$$emit8$primary;
2653 
2654     if (!_method) {
2655       emit_d32_reloc(cbuf,
2656                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2657                      runtime_call_Relocation::spec(),
2658                      RELOC_DISP32);
2659     } else if (_optimized_virtual) {
2660       emit_d32_reloc(cbuf,
2661                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2662                      opt_virtual_call_Relocation::spec(),
2663                      RELOC_DISP32);
2664     } else {
2665       emit_d32_reloc(cbuf,
2666                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2667                      static_call_Relocation::spec(),
2668                      RELOC_DISP32);
2669     }
2670     if (_method) {
2671       // Emit stub for static call
2672       emit_java_to_interp(cbuf);
2673     }
2674   %}
2675 
2676   enc_class Java_Dynamic_Call(method meth)
2677   %{
2678     // JAVA DYNAMIC CALL
2679     // !!!!!
2680     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2681     // emit_call_dynamic_prologue( cbuf );
2682     cbuf.set_inst_mark();
2683 
2684     // movq rax, -1
2685     emit_opcode(cbuf, Assembler::REX_W);
2686     emit_opcode(cbuf, 0xB8 | RAX_enc);
2687     emit_d64_reloc(cbuf,
2688                    (int64_t) Universe::non_oop_word(),
2689                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2690     address virtual_call_oop_addr = cbuf.inst_mark();
2691     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2692     // who we intended to call.
2693     cbuf.set_inst_mark();
2694     $$$emit8$primary;
2695     emit_d32_reloc(cbuf,
2696                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2697                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2698                    RELOC_DISP32);
2699   %}
2700 
2701   enc_class Java_Compiled_Call(method meth)
2702   %{
2703     // JAVA COMPILED CALL
2704     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2705 
2706     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2707     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2708 
2709     // callq *disp(%rax)
2710     cbuf.set_inst_mark();
2711     $$$emit8$primary;
2712     if (disp < 0x80) {
2713       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2714       emit_d8(cbuf, disp); // Displacement
2715     } else {
2716       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2717       emit_d32(cbuf, disp); // Displacement
2718     }
2719   %}
2720 
2721   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2722   %{
2723     // SAL, SAR, SHR
2724     int dstenc = $dst$$reg;
2725     if (dstenc >= 8) {
2726       emit_opcode(cbuf, Assembler::REX_B);
2727       dstenc -= 8;
2728     }
2729     $$$emit8$primary;
2730     emit_rm(cbuf, 0x3, $secondary, dstenc);
2731     $$$emit8$shift$$constant;
2732   %}
2733 
2734   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2735   %{
2736     // SAL, SAR, SHR
2737     int dstenc = $dst$$reg;
2738     if (dstenc < 8) {
2739       emit_opcode(cbuf, Assembler::REX_W);
2740     } else {
2741       emit_opcode(cbuf, Assembler::REX_WB);
2742       dstenc -= 8;
2743     }
2744     $$$emit8$primary;
2745     emit_rm(cbuf, 0x3, $secondary, dstenc);
2746     $$$emit8$shift$$constant;
2747   %}
2748 
2749   enc_class load_immI(rRegI dst, immI src)
2750   %{
2751     int dstenc = $dst$$reg;
2752     if (dstenc >= 8) {
2753       emit_opcode(cbuf, Assembler::REX_B);
2754       dstenc -= 8;
2755     }
2756     emit_opcode(cbuf, 0xB8 | dstenc);
2757     $$$emit32$src$$constant;
2758   %}
2759 
2760   enc_class load_immL(rRegL dst, immL src)
2761   %{
2762     int dstenc = $dst$$reg;
2763     if (dstenc < 8) {
2764       emit_opcode(cbuf, Assembler::REX_W);
2765     } else {
2766       emit_opcode(cbuf, Assembler::REX_WB);
2767       dstenc -= 8;
2768     }
2769     emit_opcode(cbuf, 0xB8 | dstenc);
2770     emit_d64(cbuf, $src$$constant);
2771   %}
2772 
2773   enc_class load_immUL32(rRegL dst, immUL32 src)
2774   %{
2775     // same as load_immI, but this time we care about zeroes in the high word
2776     int dstenc = $dst$$reg;
2777     if (dstenc >= 8) {
2778       emit_opcode(cbuf, Assembler::REX_B);
2779       dstenc -= 8;
2780     }
2781     emit_opcode(cbuf, 0xB8 | dstenc);
2782     $$$emit32$src$$constant;
2783   %}
2784 
2785   enc_class load_immL32(rRegL dst, immL32 src)
2786   %{
2787     int dstenc = $dst$$reg;
2788     if (dstenc < 8) {
2789       emit_opcode(cbuf, Assembler::REX_W);
2790     } else {
2791       emit_opcode(cbuf, Assembler::REX_WB);
2792       dstenc -= 8;
2793     }
2794     emit_opcode(cbuf, 0xC7);
2795     emit_rm(cbuf, 0x03, 0x00, dstenc);
2796     $$$emit32$src$$constant;
2797   %}
2798 
2799   enc_class load_immP31(rRegP dst, immP32 src)
2800   %{
2801     // same as load_immI, but this time we care about zeroes in the high word
2802     int dstenc = $dst$$reg;
2803     if (dstenc >= 8) {
2804       emit_opcode(cbuf, Assembler::REX_B);
2805       dstenc -= 8;
2806     }
2807     emit_opcode(cbuf, 0xB8 | dstenc);
2808     $$$emit32$src$$constant;
2809   %}
2810 
2811   enc_class load_immP(rRegP dst, immP src)
2812   %{
2813     int dstenc = $dst$$reg;
2814     if (dstenc < 8) {
2815       emit_opcode(cbuf, Assembler::REX_W);
2816     } else {
2817       emit_opcode(cbuf, Assembler::REX_WB);
2818       dstenc -= 8;
2819     }
2820     emit_opcode(cbuf, 0xB8 | dstenc);
2821     // This next line should be generated from ADLC
2822     if ($src->constant_is_oop()) {
2823       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2824     } else {
2825       emit_d64(cbuf, $src$$constant);
2826     }
2827   %}
2828 
2829   enc_class load_immF(regF dst, immF con)
2830   %{
2831     // XXX reg_mem doesn't support RIP-relative addressing yet
2832     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2833     emit_float_constant(cbuf, $con$$constant);
2834   %}
2835 
2836   enc_class load_immD(regD dst, immD con)
2837   %{
2838     // XXX reg_mem doesn't support RIP-relative addressing yet
2839     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2840     emit_double_constant(cbuf, $con$$constant);
2841   %}
2842 
2843   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2844     emit_opcode(cbuf, 0xF3);
2845     if ($dst$$reg >= 8) {
2846       emit_opcode(cbuf, Assembler::REX_R);
2847     }
2848     emit_opcode(cbuf, 0x0F);
2849     emit_opcode(cbuf, 0x10);
2850     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2851     emit_float_constant(cbuf, $con$$constant);
2852   %}
2853 
2854   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2855     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2856     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2857     if ($dst$$reg >= 8) {
2858       emit_opcode(cbuf, Assembler::REX_R);
2859     }
2860     emit_opcode(cbuf, 0x0F);
2861     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2862     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2863     emit_double_constant(cbuf, $con$$constant);
2864   %}
2865 
2866   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2867   enc_class enc_copy(rRegI dst, rRegI src)
2868   %{
2869     encode_copy(cbuf, $dst$$reg, $src$$reg);
2870   %}
2871 
2872   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2873   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2874     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2875   %}
2876 
2877   enc_class enc_copy_always(rRegI dst, rRegI src)
2878   %{
2879     int srcenc = $src$$reg;
2880     int dstenc = $dst$$reg;
2881 
2882     if (dstenc < 8) {
2883       if (srcenc >= 8) {
2884         emit_opcode(cbuf, Assembler::REX_B);
2885         srcenc -= 8;
2886       }
2887     } else {
2888       if (srcenc < 8) {
2889         emit_opcode(cbuf, Assembler::REX_R);
2890       } else {
2891         emit_opcode(cbuf, Assembler::REX_RB);
2892         srcenc -= 8;
2893       }
2894       dstenc -= 8;
2895     }
2896 
2897     emit_opcode(cbuf, 0x8B);
2898     emit_rm(cbuf, 0x3, dstenc, srcenc);
2899   %}
2900 
2901   enc_class enc_copy_wide(rRegL dst, rRegL src)
2902   %{
2903     int srcenc = $src$$reg;
2904     int dstenc = $dst$$reg;
2905 
2906     if (dstenc != srcenc) {
2907       if (dstenc < 8) {
2908         if (srcenc < 8) {
2909           emit_opcode(cbuf, Assembler::REX_W);
2910         } else {
2911           emit_opcode(cbuf, Assembler::REX_WB);
2912           srcenc -= 8;
2913         }
2914       } else {
2915         if (srcenc < 8) {
2916           emit_opcode(cbuf, Assembler::REX_WR);
2917         } else {
2918           emit_opcode(cbuf, Assembler::REX_WRB);
2919           srcenc -= 8;
2920         }
2921         dstenc -= 8;
2922       }
2923       emit_opcode(cbuf, 0x8B);
2924       emit_rm(cbuf, 0x3, dstenc, srcenc);
2925     }
2926   %}
2927 
2928   enc_class Con32(immI src)
2929   %{
2930     // Output immediate
2931     $$$emit32$src$$constant;
2932   %}
2933 
2934   enc_class Con64(immL src)
2935   %{
2936     // Output immediate
2937     emit_d64($src$$constant);
2938   %}
2939 
2940   enc_class Con32F_as_bits(immF src)
2941   %{
2942     // Output Float immediate bits
2943     jfloat jf = $src$$constant;
2944     jint jf_as_bits = jint_cast(jf);
2945     emit_d32(cbuf, jf_as_bits);
2946   %}
2947 
2948   enc_class Con16(immI src)
2949   %{
2950     // Output immediate
2951     $$$emit16$src$$constant;
2952   %}
2953 
2954   // How is this different from Con32??? XXX
2955   enc_class Con_d32(immI src)
2956   %{
2957     emit_d32(cbuf,$src$$constant);
2958   %}
2959 
2960   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2961     // Output immediate memory reference
2962     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2963     emit_d32(cbuf, 0x00);
2964   %}
2965 
2966   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2967     MacroAssembler masm(&cbuf);
2968 
2969     Register switch_reg = as_Register($switch_val$$reg);
2970     Register dest_reg   = as_Register($dest$$reg);
2971     address table_base  = masm.address_table_constant(_index2label);
2972 
2973     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2974     // to do that and the compiler is using that register as one it can allocate.
2975     // So we build it all by hand.
2976     // Address index(noreg, switch_reg, Address::times_1);
2977     // ArrayAddress dispatch(table, index);
2978 
2979     Address dispatch(dest_reg, switch_reg, Address::times_1);
2980 
2981     masm.lea(dest_reg, InternalAddress(table_base));
2982     masm.jmp(dispatch);
2983   %}
2984 
2985   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2986     MacroAssembler masm(&cbuf);
2987 
2988     Register switch_reg = as_Register($switch_val$$reg);
2989     Register dest_reg   = as_Register($dest$$reg);
2990     address table_base  = masm.address_table_constant(_index2label);
2991 
2992     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2993     // to do that and the compiler is using that register as one it can allocate.
2994     // So we build it all by hand.
2995     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2996     // ArrayAddress dispatch(table, index);
2997 
2998     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2999 
3000     masm.lea(dest_reg, InternalAddress(table_base));
3001     masm.jmp(dispatch);
3002   %}
3003 
3004   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
3005     MacroAssembler masm(&cbuf);
3006 
3007     Register switch_reg = as_Register($switch_val$$reg);
3008     Register dest_reg   = as_Register($dest$$reg);
3009     address table_base  = masm.address_table_constant(_index2label);
3010 
3011     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
3012     // to do that and the compiler is using that register as one it can allocate.
3013     // So we build it all by hand.
3014     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3015     // ArrayAddress dispatch(table, index);
3016 
3017     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3018     masm.lea(dest_reg, InternalAddress(table_base));
3019     masm.jmp(dispatch);
3020 
3021   %}
3022 
3023   enc_class lock_prefix()
3024   %{
3025     if (os::is_MP()) {
3026       emit_opcode(cbuf, 0xF0); // lock
3027     }
3028   %}
3029 
3030   enc_class REX_mem(memory mem)
3031   %{
3032     if ($mem$$base >= 8) {
3033       if ($mem$$index < 8) {
3034         emit_opcode(cbuf, Assembler::REX_B);
3035       } else {
3036         emit_opcode(cbuf, Assembler::REX_XB);
3037       }
3038     } else {
3039       if ($mem$$index >= 8) {
3040         emit_opcode(cbuf, Assembler::REX_X);
3041       }
3042     }
3043   %}
3044 
3045   enc_class REX_mem_wide(memory mem)
3046   %{
3047     if ($mem$$base >= 8) {
3048       if ($mem$$index < 8) {
3049         emit_opcode(cbuf, Assembler::REX_WB);
3050       } else {
3051         emit_opcode(cbuf, Assembler::REX_WXB);
3052       }
3053     } else {
3054       if ($mem$$index < 8) {
3055         emit_opcode(cbuf, Assembler::REX_W);
3056       } else {
3057         emit_opcode(cbuf, Assembler::REX_WX);
3058       }
3059     }
3060   %}
3061 
3062   // for byte regs
3063   enc_class REX_breg(rRegI reg)
3064   %{
3065     if ($reg$$reg >= 4) {
3066       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3067     }
3068   %}
3069 
3070   // for byte regs
3071   enc_class REX_reg_breg(rRegI dst, rRegI src)
3072   %{
3073     if ($dst$$reg < 8) {
3074       if ($src$$reg >= 4) {
3075         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3076       }
3077     } else {
3078       if ($src$$reg < 8) {
3079         emit_opcode(cbuf, Assembler::REX_R);
3080       } else {
3081         emit_opcode(cbuf, Assembler::REX_RB);
3082       }
3083     }
3084   %}
3085 
3086   // for byte regs
3087   enc_class REX_breg_mem(rRegI reg, memory mem)
3088   %{
3089     if ($reg$$reg < 8) {
3090       if ($mem$$base < 8) {
3091         if ($mem$$index >= 8) {
3092           emit_opcode(cbuf, Assembler::REX_X);
3093         } else if ($reg$$reg >= 4) {
3094           emit_opcode(cbuf, Assembler::REX);
3095         }
3096       } else {
3097         if ($mem$$index < 8) {
3098           emit_opcode(cbuf, Assembler::REX_B);
3099         } else {
3100           emit_opcode(cbuf, Assembler::REX_XB);
3101         }
3102       }
3103     } else {
3104       if ($mem$$base < 8) {
3105         if ($mem$$index < 8) {
3106           emit_opcode(cbuf, Assembler::REX_R);
3107         } else {
3108           emit_opcode(cbuf, Assembler::REX_RX);
3109         }
3110       } else {
3111         if ($mem$$index < 8) {
3112           emit_opcode(cbuf, Assembler::REX_RB);
3113         } else {
3114           emit_opcode(cbuf, Assembler::REX_RXB);
3115         }
3116       }
3117     }
3118   %}
3119 
3120   enc_class REX_reg(rRegI reg)
3121   %{
3122     if ($reg$$reg >= 8) {
3123       emit_opcode(cbuf, Assembler::REX_B);
3124     }
3125   %}
3126 
3127   enc_class REX_reg_wide(rRegI reg)
3128   %{
3129     if ($reg$$reg < 8) {
3130       emit_opcode(cbuf, Assembler::REX_W);
3131     } else {
3132       emit_opcode(cbuf, Assembler::REX_WB);
3133     }
3134   %}
3135 
3136   enc_class REX_reg_reg(rRegI dst, rRegI src)
3137   %{
3138     if ($dst$$reg < 8) {
3139       if ($src$$reg >= 8) {
3140         emit_opcode(cbuf, Assembler::REX_B);
3141       }
3142     } else {
3143       if ($src$$reg < 8) {
3144         emit_opcode(cbuf, Assembler::REX_R);
3145       } else {
3146         emit_opcode(cbuf, Assembler::REX_RB);
3147       }
3148     }
3149   %}
3150 
3151   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3152   %{
3153     if ($dst$$reg < 8) {
3154       if ($src$$reg < 8) {
3155         emit_opcode(cbuf, Assembler::REX_W);
3156       } else {
3157         emit_opcode(cbuf, Assembler::REX_WB);
3158       }
3159     } else {
3160       if ($src$$reg < 8) {
3161         emit_opcode(cbuf, Assembler::REX_WR);
3162       } else {
3163         emit_opcode(cbuf, Assembler::REX_WRB);
3164       }
3165     }
3166   %}
3167 
3168   enc_class REX_reg_mem(rRegI reg, memory mem)
3169   %{
3170     if ($reg$$reg < 8) {
3171       if ($mem$$base < 8) {
3172         if ($mem$$index >= 8) {
3173           emit_opcode(cbuf, Assembler::REX_X);
3174         }
3175       } else {
3176         if ($mem$$index < 8) {
3177           emit_opcode(cbuf, Assembler::REX_B);
3178         } else {
3179           emit_opcode(cbuf, Assembler::REX_XB);
3180         }
3181       }
3182     } else {
3183       if ($mem$$base < 8) {
3184         if ($mem$$index < 8) {
3185           emit_opcode(cbuf, Assembler::REX_R);
3186         } else {
3187           emit_opcode(cbuf, Assembler::REX_RX);
3188         }
3189       } else {
3190         if ($mem$$index < 8) {
3191           emit_opcode(cbuf, Assembler::REX_RB);
3192         } else {
3193           emit_opcode(cbuf, Assembler::REX_RXB);
3194         }
3195       }
3196     }
3197   %}
3198 
3199   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3200   %{
3201     if ($reg$$reg < 8) {
3202       if ($mem$$base < 8) {
3203         if ($mem$$index < 8) {
3204           emit_opcode(cbuf, Assembler::REX_W);
3205         } else {
3206           emit_opcode(cbuf, Assembler::REX_WX);
3207         }
3208       } else {
3209         if ($mem$$index < 8) {
3210           emit_opcode(cbuf, Assembler::REX_WB);
3211         } else {
3212           emit_opcode(cbuf, Assembler::REX_WXB);
3213         }
3214       }
3215     } else {
3216       if ($mem$$base < 8) {
3217         if ($mem$$index < 8) {
3218           emit_opcode(cbuf, Assembler::REX_WR);
3219         } else {
3220           emit_opcode(cbuf, Assembler::REX_WRX);
3221         }
3222       } else {
3223         if ($mem$$index < 8) {
3224           emit_opcode(cbuf, Assembler::REX_WRB);
3225         } else {
3226           emit_opcode(cbuf, Assembler::REX_WRXB);
3227         }
3228       }
3229     }
3230   %}
3231 
3232   enc_class reg_mem(rRegI ereg, memory mem)
3233   %{
3234     // High registers handle in encode_RegMem
3235     int reg = $ereg$$reg;
3236     int base = $mem$$base;
3237     int index = $mem$$index;
3238     int scale = $mem$$scale;
3239     int disp = $mem$$disp;
3240     bool disp_is_oop = $mem->disp_is_oop();
3241 
3242     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3243   %}
3244 
3245   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3246   %{
3247     int rm_byte_opcode = $rm_opcode$$constant;
3248 
3249     // High registers handle in encode_RegMem
3250     int base = $mem$$base;
3251     int index = $mem$$index;
3252     int scale = $mem$$scale;
3253     int displace = $mem$$disp;
3254 
3255     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3256                                             // working with static
3257                                             // globals
3258     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3259                   disp_is_oop);
3260   %}
3261 
3262   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3263   %{
3264     int reg_encoding = $dst$$reg;
3265     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3266     int index        = 0x04;            // 0x04 indicates no index
3267     int scale        = 0x00;            // 0x00 indicates no scale
3268     int displace     = $src1$$constant; // 0x00 indicates no displacement
3269     bool disp_is_oop = false;
3270     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3271                   disp_is_oop);
3272   %}
3273 
3274   enc_class neg_reg(rRegI dst)
3275   %{
3276     int dstenc = $dst$$reg;
3277     if (dstenc >= 8) {
3278       emit_opcode(cbuf, Assembler::REX_B);
3279       dstenc -= 8;
3280     }
3281     // NEG $dst
3282     emit_opcode(cbuf, 0xF7);
3283     emit_rm(cbuf, 0x3, 0x03, dstenc);
3284   %}
3285 
3286   enc_class neg_reg_wide(rRegI dst)
3287   %{
3288     int dstenc = $dst$$reg;
3289     if (dstenc < 8) {
3290       emit_opcode(cbuf, Assembler::REX_W);
3291     } else {
3292       emit_opcode(cbuf, Assembler::REX_WB);
3293       dstenc -= 8;
3294     }
3295     // NEG $dst
3296     emit_opcode(cbuf, 0xF7);
3297     emit_rm(cbuf, 0x3, 0x03, dstenc);
3298   %}
3299 
3300   enc_class setLT_reg(rRegI dst)
3301   %{
3302     int dstenc = $dst$$reg;
3303     if (dstenc >= 8) {
3304       emit_opcode(cbuf, Assembler::REX_B);
3305       dstenc -= 8;
3306     } else if (dstenc >= 4) {
3307       emit_opcode(cbuf, Assembler::REX);
3308     }
3309     // SETLT $dst
3310     emit_opcode(cbuf, 0x0F);
3311     emit_opcode(cbuf, 0x9C);
3312     emit_rm(cbuf, 0x3, 0x0, dstenc);
3313   %}
3314 
3315   enc_class setNZ_reg(rRegI dst)
3316   %{
3317     int dstenc = $dst$$reg;
3318     if (dstenc >= 8) {
3319       emit_opcode(cbuf, Assembler::REX_B);
3320       dstenc -= 8;
3321     } else if (dstenc >= 4) {
3322       emit_opcode(cbuf, Assembler::REX);
3323     }
3324     // SETNZ $dst
3325     emit_opcode(cbuf, 0x0F);
3326     emit_opcode(cbuf, 0x95);
3327     emit_rm(cbuf, 0x3, 0x0, dstenc);
3328   %}
3329 
3330   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3331                        rcx_RegI tmp)
3332   %{
3333     // cadd_cmpLT
3334 
3335     int tmpReg = $tmp$$reg;
3336 
3337     int penc = $p$$reg;
3338     int qenc = $q$$reg;
3339     int yenc = $y$$reg;
3340 
3341     // subl $p,$q
3342     if (penc < 8) {
3343       if (qenc >= 8) {
3344         emit_opcode(cbuf, Assembler::REX_B);
3345       }
3346     } else {
3347       if (qenc < 8) {
3348         emit_opcode(cbuf, Assembler::REX_R);
3349       } else {
3350         emit_opcode(cbuf, Assembler::REX_RB);
3351       }
3352     }
3353     emit_opcode(cbuf, 0x2B);
3354     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3355 
3356     // sbbl $tmp, $tmp
3357     emit_opcode(cbuf, 0x1B);
3358     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3359 
3360     // andl $tmp, $y
3361     if (yenc >= 8) {
3362       emit_opcode(cbuf, Assembler::REX_B);
3363     }
3364     emit_opcode(cbuf, 0x23);
3365     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3366 
3367     // addl $p,$tmp
3368     if (penc >= 8) {
3369         emit_opcode(cbuf, Assembler::REX_R);
3370     }
3371     emit_opcode(cbuf, 0x03);
3372     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3373   %}
3374 
3375   // Compare the lonogs and set -1, 0, or 1 into dst
3376   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3377   %{
3378     int src1enc = $src1$$reg;
3379     int src2enc = $src2$$reg;
3380     int dstenc = $dst$$reg;
3381 
3382     // cmpq $src1, $src2
3383     if (src1enc < 8) {
3384       if (src2enc < 8) {
3385         emit_opcode(cbuf, Assembler::REX_W);
3386       } else {
3387         emit_opcode(cbuf, Assembler::REX_WB);
3388       }
3389     } else {
3390       if (src2enc < 8) {
3391         emit_opcode(cbuf, Assembler::REX_WR);
3392       } else {
3393         emit_opcode(cbuf, Assembler::REX_WRB);
3394       }
3395     }
3396     emit_opcode(cbuf, 0x3B);
3397     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3398 
3399     // movl $dst, -1
3400     if (dstenc >= 8) {
3401       emit_opcode(cbuf, Assembler::REX_B);
3402     }
3403     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3404     emit_d32(cbuf, -1);
3405 
3406     // jl,s done
3407     emit_opcode(cbuf, 0x7C);
3408     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3409 
3410     // setne $dst
3411     if (dstenc >= 4) {
3412       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3413     }
3414     emit_opcode(cbuf, 0x0F);
3415     emit_opcode(cbuf, 0x95);
3416     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3417 
3418     // movzbl $dst, $dst
3419     if (dstenc >= 4) {
3420       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3421     }
3422     emit_opcode(cbuf, 0x0F);
3423     emit_opcode(cbuf, 0xB6);
3424     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3425   %}
3426 
3427   enc_class Push_ResultXD(regD dst) %{
3428     int dstenc = $dst$$reg;
3429 
3430     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3431 
3432     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3433     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3434     if (dstenc >= 8) {
3435       emit_opcode(cbuf, Assembler::REX_R);
3436     }
3437     emit_opcode  (cbuf, 0x0F );
3438     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3439     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3440 
3441     // add rsp,8
3442     emit_opcode(cbuf, Assembler::REX_W);
3443     emit_opcode(cbuf,0x83);
3444     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3445     emit_d8(cbuf,0x08);
3446   %}
3447 
3448   enc_class Push_SrcXD(regD src) %{
3449     int srcenc = $src$$reg;
3450 
3451     // subq rsp,#8
3452     emit_opcode(cbuf, Assembler::REX_W);
3453     emit_opcode(cbuf, 0x83);
3454     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3455     emit_d8(cbuf, 0x8);
3456 
3457     // movsd [rsp],src
3458     emit_opcode(cbuf, 0xF2);
3459     if (srcenc >= 8) {
3460       emit_opcode(cbuf, Assembler::REX_R);
3461     }
3462     emit_opcode(cbuf, 0x0F);
3463     emit_opcode(cbuf, 0x11);
3464     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3465 
3466     // fldd [rsp]
3467     emit_opcode(cbuf, 0x66);
3468     emit_opcode(cbuf, 0xDD);
3469     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3470   %}
3471 
3472 
3473   enc_class movq_ld(regD dst, memory mem) %{
3474     MacroAssembler _masm(&cbuf);
3475     __ movq($dst$$XMMRegister, $mem$$Address);
3476   %}
3477 
3478   enc_class movq_st(memory mem, regD src) %{
3479     MacroAssembler _masm(&cbuf);
3480     __ movq($mem$$Address, $src$$XMMRegister);
3481   %}
3482 
3483   enc_class pshufd_8x8(regF dst, regF src) %{
3484     MacroAssembler _masm(&cbuf);
3485 
3486     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3487     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3488     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3489   %}
3490 
3491   enc_class pshufd_4x16(regF dst, regF src) %{
3492     MacroAssembler _masm(&cbuf);
3493 
3494     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3495   %}
3496 
3497   enc_class pshufd(regD dst, regD src, int mode) %{
3498     MacroAssembler _masm(&cbuf);
3499 
3500     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3501   %}
3502 
3503   enc_class pxor(regD dst, regD src) %{
3504     MacroAssembler _masm(&cbuf);
3505 
3506     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3507   %}
3508 
3509   enc_class mov_i2x(regD dst, rRegI src) %{
3510     MacroAssembler _masm(&cbuf);
3511 
3512     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3513   %}
3514 
3515   // obj: object to lock
3516   // box: box address (header location) -- killed
3517   // tmp: rax -- killed
3518   // scr: rbx -- killed
3519   //
3520   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3521   // from i486.ad.  See that file for comments.
3522   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3523   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3524 
3525 
3526   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3527   %{
3528     Register objReg = as_Register((int)$obj$$reg);
3529     Register boxReg = as_Register((int)$box$$reg);
3530     Register tmpReg = as_Register($tmp$$reg);
3531     Register scrReg = as_Register($scr$$reg);
3532     MacroAssembler masm(&cbuf);
3533 
3534     // Verify uniqueness of register assignments -- necessary but not sufficient
3535     assert (objReg != boxReg && objReg != tmpReg &&
3536             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3537 
3538     if (_counters != NULL) {
3539       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3540     }
3541     if (EmitSync & 1) {
3542         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3543         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3544         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3545     } else
3546     if (EmitSync & 2) {
3547         Label DONE_LABEL;
3548         if (UseBiasedLocking) {
3549            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3550           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3551         }
3552         // QQQ was movl...
3553         masm.movptr(tmpReg, 0x1);
3554         masm.orptr(tmpReg, Address(objReg, 0));
3555         masm.movptr(Address(boxReg, 0), tmpReg);
3556         if (os::is_MP()) {
3557           masm.lock();
3558         }
3559         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3560         masm.jcc(Assembler::equal, DONE_LABEL);
3561 
3562         // Recursive locking
3563         masm.subptr(tmpReg, rsp);
3564         masm.andptr(tmpReg, 7 - os::vm_page_size());
3565         masm.movptr(Address(boxReg, 0), tmpReg);
3566 
3567         masm.bind(DONE_LABEL);
3568         masm.nop(); // avoid branch to branch
3569     } else {
3570         Label DONE_LABEL, IsInflated, Egress;
3571 
3572         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3573         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3574         masm.jcc   (Assembler::notZero, IsInflated) ; 
3575          
3576         // it's stack-locked, biased or neutral
3577         // TODO: optimize markword triage order to reduce the number of
3578         // conditional branches in the most common cases.
3579         // Beware -- there's a subtle invariant that fetch of the markword
3580         // at [FETCH], below, will never observe a biased encoding (*101b).
3581         // If this invariant is not held we'll suffer exclusion (safety) failure.
3582 
3583         if (UseBiasedLocking && !UseOptoBiasInlining) {
3584           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3585           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3586         }
3587 
3588         // was q will it destroy high?
3589         masm.orl   (tmpReg, 1) ; 
3590         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3591         if (os::is_MP()) { masm.lock(); } 
3592         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3593         if (_counters != NULL) {
3594            masm.cond_inc32(Assembler::equal,
3595                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3596         }
3597         masm.jcc   (Assembler::equal, DONE_LABEL);
3598 
3599         // Recursive locking
3600         masm.subptr(tmpReg, rsp);
3601         masm.andptr(tmpReg, 7 - os::vm_page_size());
3602         masm.movptr(Address(boxReg, 0), tmpReg);
3603         if (_counters != NULL) {
3604            masm.cond_inc32(Assembler::equal,
3605                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3606         }
3607         masm.jmp   (DONE_LABEL) ;
3608 
3609         masm.bind  (IsInflated) ;
3610         // It's inflated
3611 
3612         // TODO: someday avoid the ST-before-CAS penalty by
3613         // relocating (deferring) the following ST.
3614         // We should also think about trying a CAS without having
3615         // fetched _owner.  If the CAS is successful we may
3616         // avoid an RTO->RTS upgrade on the $line.
3617         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3618         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3619 
3620         masm.mov    (boxReg, tmpReg) ; 
3621         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3622         masm.testptr(tmpReg, tmpReg) ;   
3623         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3624 
3625         // It's inflated and appears unlocked
3626         if (os::is_MP()) { masm.lock(); } 
3627         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3628         // Intentional fall-through into DONE_LABEL ...
3629 
3630         masm.bind  (DONE_LABEL) ;
3631         masm.nop   () ;                 // avoid jmp to jmp
3632     }
3633   %}
3634 
3635   // obj: object to unlock
3636   // box: box address (displaced header location), killed
3637   // RBX: killed tmp; cannot be obj nor box
3638   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3639   %{
3640 
3641     Register objReg = as_Register($obj$$reg);
3642     Register boxReg = as_Register($box$$reg);
3643     Register tmpReg = as_Register($tmp$$reg);
3644     MacroAssembler masm(&cbuf);
3645 
3646     if (EmitSync & 4) { 
3647        masm.cmpptr(rsp, 0) ; 
3648     } else
3649     if (EmitSync & 8) {
3650        Label DONE_LABEL;
3651        if (UseBiasedLocking) {
3652          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3653        }
3654 
3655        // Check whether the displaced header is 0
3656        //(=> recursive unlock)
3657        masm.movptr(tmpReg, Address(boxReg, 0));
3658        masm.testptr(tmpReg, tmpReg);
3659        masm.jcc(Assembler::zero, DONE_LABEL);
3660 
3661        // If not recursive lock, reset the header to displaced header
3662        if (os::is_MP()) {
3663          masm.lock();
3664        }
3665        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3666        masm.bind(DONE_LABEL);
3667        masm.nop(); // avoid branch to branch
3668     } else {
3669        Label DONE_LABEL, Stacked, CheckSucc ;
3670 
3671        if (UseBiasedLocking && !UseOptoBiasInlining) {
3672          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3673        }
3674         
3675        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3676        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3677        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3678        masm.testl (tmpReg, 0x02) ; 
3679        masm.jcc   (Assembler::zero, Stacked) ; 
3680         
3681        // It's inflated
3682        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3683        masm.xorptr(boxReg, r15_thread) ; 
3684        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3685        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3686        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3687        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3688        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3689        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3690        masm.jmp   (DONE_LABEL) ; 
3691         
3692        if ((EmitSync & 65536) == 0) { 
3693          Label LSuccess, LGoSlowPath ;
3694          masm.bind  (CheckSucc) ;
3695          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3696          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3697 
3698          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3699          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3700          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3701          // are all faster when the write buffer is populated.
3702          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3703          if (os::is_MP()) {
3704             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3705          }
3706          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3707          masm.jcc   (Assembler::notZero, LSuccess) ;
3708 
3709          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3710          if (os::is_MP()) { masm.lock(); }
3711          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3712          masm.jcc   (Assembler::notEqual, LSuccess) ;
3713          // Intentional fall-through into slow-path
3714 
3715          masm.bind  (LGoSlowPath) ;
3716          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3717          masm.jmp   (DONE_LABEL) ;
3718 
3719          masm.bind  (LSuccess) ;
3720          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3721          masm.jmp   (DONE_LABEL) ;
3722        }
3723 
3724        masm.bind  (Stacked) ; 
3725        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3726        if (os::is_MP()) { masm.lock(); } 
3727        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3728 
3729        if (EmitSync & 65536) {
3730           masm.bind (CheckSucc) ;
3731        }
3732        masm.bind(DONE_LABEL);
3733        if (EmitSync & 32768) {
3734           masm.nop();                      // avoid branch to branch
3735        }
3736     }
3737   %}
3738 
3739 
3740   enc_class enc_rethrow()
3741   %{
3742     cbuf.set_inst_mark();
3743     emit_opcode(cbuf, 0xE9); // jmp entry
3744     emit_d32_reloc(cbuf,
3745                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3746                    runtime_call_Relocation::spec(),
3747                    RELOC_DISP32);
3748   %}
3749 
3750   enc_class absF_encoding(regF dst)
3751   %{
3752     int dstenc = $dst$$reg;
3753     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3754 
3755     cbuf.set_inst_mark();
3756     if (dstenc >= 8) {
3757       emit_opcode(cbuf, Assembler::REX_R);
3758       dstenc -= 8;
3759     }
3760     // XXX reg_mem doesn't support RIP-relative addressing yet
3761     emit_opcode(cbuf, 0x0F);
3762     emit_opcode(cbuf, 0x54);
3763     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3764     emit_d32_reloc(cbuf, signmask_address);
3765   %}
3766 
3767   enc_class absD_encoding(regD dst)
3768   %{
3769     int dstenc = $dst$$reg;
3770     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3771 
3772     cbuf.set_inst_mark();
3773     emit_opcode(cbuf, 0x66);
3774     if (dstenc >= 8) {
3775       emit_opcode(cbuf, Assembler::REX_R);
3776       dstenc -= 8;
3777     }
3778     // XXX reg_mem doesn't support RIP-relative addressing yet
3779     emit_opcode(cbuf, 0x0F);
3780     emit_opcode(cbuf, 0x54);
3781     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3782     emit_d32_reloc(cbuf, signmask_address);
3783   %}
3784 
3785   enc_class negF_encoding(regF dst)
3786   %{
3787     int dstenc = $dst$$reg;
3788     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3789 
3790     cbuf.set_inst_mark();
3791     if (dstenc >= 8) {
3792       emit_opcode(cbuf, Assembler::REX_R);
3793       dstenc -= 8;
3794     }
3795     // XXX reg_mem doesn't support RIP-relative addressing yet
3796     emit_opcode(cbuf, 0x0F);
3797     emit_opcode(cbuf, 0x57);
3798     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3799     emit_d32_reloc(cbuf, signflip_address);
3800   %}
3801 
3802   enc_class negD_encoding(regD dst)
3803   %{
3804     int dstenc = $dst$$reg;
3805     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3806 
3807     cbuf.set_inst_mark();
3808     emit_opcode(cbuf, 0x66);
3809     if (dstenc >= 8) {
3810       emit_opcode(cbuf, Assembler::REX_R);
3811       dstenc -= 8;
3812     }
3813     // XXX reg_mem doesn't support RIP-relative addressing yet
3814     emit_opcode(cbuf, 0x0F);
3815     emit_opcode(cbuf, 0x57);
3816     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3817     emit_d32_reloc(cbuf, signflip_address);
3818   %}
3819 
3820   enc_class f2i_fixup(rRegI dst, regF src)
3821   %{
3822     int dstenc = $dst$$reg;
3823     int srcenc = $src$$reg;
3824 
3825     // cmpl $dst, #0x80000000
3826     if (dstenc >= 8) {
3827       emit_opcode(cbuf, Assembler::REX_B);
3828     }
3829     emit_opcode(cbuf, 0x81);
3830     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3831     emit_d32(cbuf, 0x80000000);
3832 
3833     // jne,s done
3834     emit_opcode(cbuf, 0x75);
3835     if (srcenc < 8 && dstenc < 8) {
3836       emit_d8(cbuf, 0xF);
3837     } else if (srcenc >= 8 && dstenc >= 8) {
3838       emit_d8(cbuf, 0x11);
3839     } else {
3840       emit_d8(cbuf, 0x10);
3841     }
3842 
3843     // subq rsp, #8
3844     emit_opcode(cbuf, Assembler::REX_W);
3845     emit_opcode(cbuf, 0x83);
3846     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3847     emit_d8(cbuf, 8);
3848 
3849     // movss [rsp], $src
3850     emit_opcode(cbuf, 0xF3);
3851     if (srcenc >= 8) {
3852       emit_opcode(cbuf, Assembler::REX_R);
3853     }
3854     emit_opcode(cbuf, 0x0F);
3855     emit_opcode(cbuf, 0x11);
3856     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3857 
3858     // call f2i_fixup
3859     cbuf.set_inst_mark();
3860     emit_opcode(cbuf, 0xE8);
3861     emit_d32_reloc(cbuf,
3862                    (int)
3863                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3864                    runtime_call_Relocation::spec(),
3865                    RELOC_DISP32);
3866 
3867     // popq $dst
3868     if (dstenc >= 8) {
3869       emit_opcode(cbuf, Assembler::REX_B);
3870     }
3871     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3872 
3873     // done:
3874   %}
3875 
3876   enc_class f2l_fixup(rRegL dst, regF src)
3877   %{
3878     int dstenc = $dst$$reg;
3879     int srcenc = $src$$reg;
3880     address const_address = (address) StubRoutines::x86::double_sign_flip();
3881 
3882     // cmpq $dst, [0x8000000000000000]
3883     cbuf.set_inst_mark();
3884     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3885     emit_opcode(cbuf, 0x39);
3886     // XXX reg_mem doesn't support RIP-relative addressing yet
3887     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3888     emit_d32_reloc(cbuf, const_address);
3889 
3890 
3891     // jne,s done
3892     emit_opcode(cbuf, 0x75);
3893     if (srcenc < 8 && dstenc < 8) {
3894       emit_d8(cbuf, 0xF);
3895     } else if (srcenc >= 8 && dstenc >= 8) {
3896       emit_d8(cbuf, 0x11);
3897     } else {
3898       emit_d8(cbuf, 0x10);
3899     }
3900 
3901     // subq rsp, #8
3902     emit_opcode(cbuf, Assembler::REX_W);
3903     emit_opcode(cbuf, 0x83);
3904     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3905     emit_d8(cbuf, 8);
3906 
3907     // movss [rsp], $src
3908     emit_opcode(cbuf, 0xF3);
3909     if (srcenc >= 8) {
3910       emit_opcode(cbuf, Assembler::REX_R);
3911     }
3912     emit_opcode(cbuf, 0x0F);
3913     emit_opcode(cbuf, 0x11);
3914     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3915 
3916     // call f2l_fixup
3917     cbuf.set_inst_mark();
3918     emit_opcode(cbuf, 0xE8);
3919     emit_d32_reloc(cbuf,
3920                    (int)
3921                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
3922                    runtime_call_Relocation::spec(),
3923                    RELOC_DISP32);
3924 
3925     // popq $dst
3926     if (dstenc >= 8) {
3927       emit_opcode(cbuf, Assembler::REX_B);
3928     }
3929     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3930 
3931     // done:
3932   %}
3933 
3934   enc_class d2i_fixup(rRegI dst, regD src)
3935   %{
3936     int dstenc = $dst$$reg;
3937     int srcenc = $src$$reg;
3938 
3939     // cmpl $dst, #0x80000000
3940     if (dstenc >= 8) {
3941       emit_opcode(cbuf, Assembler::REX_B);
3942     }
3943     emit_opcode(cbuf, 0x81);
3944     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3945     emit_d32(cbuf, 0x80000000);
3946 
3947     // jne,s done
3948     emit_opcode(cbuf, 0x75);
3949     if (srcenc < 8 && dstenc < 8) {
3950       emit_d8(cbuf, 0xF);
3951     } else if (srcenc >= 8 && dstenc >= 8) {
3952       emit_d8(cbuf, 0x11);
3953     } else {
3954       emit_d8(cbuf, 0x10);
3955     }
3956 
3957     // subq rsp, #8
3958     emit_opcode(cbuf, Assembler::REX_W);
3959     emit_opcode(cbuf, 0x83);
3960     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3961     emit_d8(cbuf, 8);
3962 
3963     // movsd [rsp], $src
3964     emit_opcode(cbuf, 0xF2);
3965     if (srcenc >= 8) {
3966       emit_opcode(cbuf, Assembler::REX_R);
3967     }
3968     emit_opcode(cbuf, 0x0F);
3969     emit_opcode(cbuf, 0x11);
3970     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3971 
3972     // call d2i_fixup
3973     cbuf.set_inst_mark();
3974     emit_opcode(cbuf, 0xE8);
3975     emit_d32_reloc(cbuf,
3976                    (int)
3977                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
3978                    runtime_call_Relocation::spec(),
3979                    RELOC_DISP32);
3980 
3981     // popq $dst
3982     if (dstenc >= 8) {
3983       emit_opcode(cbuf, Assembler::REX_B);
3984     }
3985     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3986 
3987     // done:
3988   %}
3989 
3990   enc_class d2l_fixup(rRegL dst, regD src)
3991   %{
3992     int dstenc = $dst$$reg;
3993     int srcenc = $src$$reg;
3994     address const_address = (address) StubRoutines::x86::double_sign_flip();
3995 
3996     // cmpq $dst, [0x8000000000000000]
3997     cbuf.set_inst_mark();
3998     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3999     emit_opcode(cbuf, 0x39);
4000     // XXX reg_mem doesn't support RIP-relative addressing yet
4001     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4002     emit_d32_reloc(cbuf, const_address);
4003 
4004 
4005     // jne,s done
4006     emit_opcode(cbuf, 0x75);
4007     if (srcenc < 8 && dstenc < 8) {
4008       emit_d8(cbuf, 0xF);
4009     } else if (srcenc >= 8 && dstenc >= 8) {
4010       emit_d8(cbuf, 0x11);
4011     } else {
4012       emit_d8(cbuf, 0x10);
4013     }
4014 
4015     // subq rsp, #8
4016     emit_opcode(cbuf, Assembler::REX_W);
4017     emit_opcode(cbuf, 0x83);
4018     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4019     emit_d8(cbuf, 8);
4020 
4021     // movsd [rsp], $src
4022     emit_opcode(cbuf, 0xF2);
4023     if (srcenc >= 8) {
4024       emit_opcode(cbuf, Assembler::REX_R);
4025     }
4026     emit_opcode(cbuf, 0x0F);
4027     emit_opcode(cbuf, 0x11);
4028     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4029 
4030     // call d2l_fixup
4031     cbuf.set_inst_mark();
4032     emit_opcode(cbuf, 0xE8);
4033     emit_d32_reloc(cbuf,
4034                    (int)
4035                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4036                    runtime_call_Relocation::spec(),
4037                    RELOC_DISP32);
4038 
4039     // popq $dst
4040     if (dstenc >= 8) {
4041       emit_opcode(cbuf, Assembler::REX_B);
4042     }
4043     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4044 
4045     // done:
4046   %}
4047 
4048   // Safepoint Poll.  This polls the safepoint page, and causes an
4049   // exception if it is not readable. Unfortunately, it kills
4050   // RFLAGS in the process.
4051   enc_class enc_safepoint_poll
4052   %{
4053     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4054     // XXX reg_mem doesn't support RIP-relative addressing yet
4055     cbuf.set_inst_mark();
4056     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4057     emit_opcode(cbuf, 0x85); // testl
4058     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4059     // cbuf.inst_mark() is beginning of instruction
4060     emit_d32_reloc(cbuf, os::get_polling_page());
4061 //                    relocInfo::poll_type,
4062   %}
4063 %}
4064 
4065 
4066 
4067 //----------FRAME--------------------------------------------------------------
4068 // Definition of frame structure and management information.
4069 //
4070 //  S T A C K   L A Y O U T    Allocators stack-slot number
4071 //                             |   (to get allocators register number
4072 //  G  Owned by    |        |  v    add OptoReg::stack0())
4073 //  r   CALLER     |        |
4074 //  o     |        +--------+      pad to even-align allocators stack-slot
4075 //  w     V        |  pad0  |        numbers; owned by CALLER
4076 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4077 //  h     ^        |   in   |  5
4078 //        |        |  args  |  4   Holes in incoming args owned by SELF
4079 //  |     |        |        |  3
4080 //  |     |        +--------+
4081 //  V     |        | old out|      Empty on Intel, window on Sparc
4082 //        |    old |preserve|      Must be even aligned.
4083 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4084 //        |        |   in   |  3   area for Intel ret address
4085 //     Owned by    |preserve|      Empty on Sparc.
4086 //       SELF      +--------+
4087 //        |        |  pad2  |  2   pad to align old SP
4088 //        |        +--------+  1
4089 //        |        | locks  |  0
4090 //        |        +--------+----> OptoReg::stack0(), even aligned
4091 //        |        |  pad1  | 11   pad to align new SP
4092 //        |        +--------+
4093 //        |        |        | 10
4094 //        |        | spills |  9   spills
4095 //        V        |        |  8   (pad0 slot for callee)
4096 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4097 //        ^        |  out   |  7
4098 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4099 //     Owned by    +--------+
4100 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4101 //        |    new |preserve|      Must be even-aligned.
4102 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4103 //        |        |        |
4104 //
4105 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4106 //         known from SELF's arguments and the Java calling convention.
4107 //         Region 6-7 is determined per call site.
4108 // Note 2: If the calling convention leaves holes in the incoming argument
4109 //         area, those holes are owned by SELF.  Holes in the outgoing area
4110 //         are owned by the CALLEE.  Holes should not be nessecary in the
4111 //         incoming area, as the Java calling convention is completely under
4112 //         the control of the AD file.  Doubles can be sorted and packed to
4113 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4114 //         varargs C calling conventions.
4115 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4116 //         even aligned with pad0 as needed.
4117 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4118 //         region 6-11 is even aligned; it may be padded out more so that
4119 //         the region from SP to FP meets the minimum stack alignment.
4120 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4121 //         alignment.  Region 11, pad1, may be dynamically extended so that
4122 //         SP meets the minimum alignment.
4123 
4124 frame
4125 %{
4126   // What direction does stack grow in (assumed to be same for C & Java)
4127   stack_direction(TOWARDS_LOW);
4128 
4129   // These three registers define part of the calling convention
4130   // between compiled code and the interpreter.
4131   inline_cache_reg(RAX);                // Inline Cache Register
4132   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4133                                         // calling interpreter
4134 
4135   // Optional: name the operand used by cisc-spilling to access
4136   // [stack_pointer + offset]
4137   cisc_spilling_operand_name(indOffset32);
4138 
4139   // Number of stack slots consumed by locking an object
4140   sync_stack_slots(2);
4141 
4142   // Compiled code's Frame Pointer
4143   frame_pointer(RSP);
4144 
4145   // Interpreter stores its frame pointer in a register which is
4146   // stored to the stack by I2CAdaptors.
4147   // I2CAdaptors convert from interpreted java to compiled java.
4148   interpreter_frame_pointer(RBP);
4149 
4150   // Stack alignment requirement
4151   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4152 
4153   // Number of stack slots between incoming argument block and the start of
4154   // a new frame.  The PROLOG must add this many slots to the stack.  The
4155   // EPILOG must remove this many slots.  amd64 needs two slots for
4156   // return address.
4157   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4158 
4159   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4160   // for calls to C.  Supports the var-args backing area for register parms.
4161   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4162 
4163   // The after-PROLOG location of the return address.  Location of
4164   // return address specifies a type (REG or STACK) and a number
4165   // representing the register number (i.e. - use a register name) or
4166   // stack slot.
4167   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4168   // Otherwise, it is above the locks and verification slot and alignment word
4169   return_addr(STACK - 2 +
4170               round_to(2 + 2 * VerifyStackAtCalls +
4171                        Compile::current()->fixed_slots(),
4172                        WordsPerLong * 2));
4173 
4174   // Body of function which returns an integer array locating
4175   // arguments either in registers or in stack slots.  Passed an array
4176   // of ideal registers called "sig" and a "length" count.  Stack-slot
4177   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4178   // arguments for a CALLEE.  Incoming stack arguments are
4179   // automatically biased by the preserve_stack_slots field above.
4180 
4181   calling_convention
4182   %{
4183     // No difference between ingoing/outgoing just pass false
4184     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4185   %}
4186 
4187   c_calling_convention
4188   %{
4189     // This is obviously always outgoing
4190     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4191   %}
4192 
4193   // Location of compiled Java return values.  Same as C for now.
4194   return_value
4195   %{
4196     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4197            "only return normal values");
4198 
4199     static const int lo[Op_RegL + 1] = {
4200       0,
4201       0,
4202       RAX_num,  // Op_RegN
4203       RAX_num,  // Op_RegI
4204       RAX_num,  // Op_RegP
4205       XMM0_num, // Op_RegF
4206       XMM0_num, // Op_RegD
4207       RAX_num   // Op_RegL
4208     };
4209     static const int hi[Op_RegL + 1] = {
4210       0,
4211       0,
4212       OptoReg::Bad, // Op_RegN
4213       OptoReg::Bad, // Op_RegI
4214       RAX_H_num,    // Op_RegP
4215       OptoReg::Bad, // Op_RegF
4216       XMM0_H_num,   // Op_RegD
4217       RAX_H_num     // Op_RegL
4218     };
4219     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4220     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4221   %}
4222 %}
4223 
4224 //----------ATTRIBUTES---------------------------------------------------------
4225 //----------Operand Attributes-------------------------------------------------
4226 op_attrib op_cost(0);        // Required cost attribute
4227 
4228 //----------Instruction Attributes---------------------------------------------
4229 ins_attrib ins_cost(100);       // Required cost attribute
4230 ins_attrib ins_size(8);         // Required size attribute (in bits)
4231 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4232 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4233                                 // a non-matching short branch variant
4234                                 // of some long branch?
4235 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4236                                 // be a power of 2) specifies the
4237                                 // alignment that some part of the
4238                                 // instruction (not necessarily the
4239                                 // start) requires.  If > 1, a
4240                                 // compute_padding() function must be
4241                                 // provided for the instruction
4242 
4243 //----------OPERANDS-----------------------------------------------------------
4244 // Operand definitions must precede instruction definitions for correct parsing
4245 // in the ADLC because operands constitute user defined types which are used in
4246 // instruction definitions.
4247 
4248 //----------Simple Operands----------------------------------------------------
4249 // Immediate Operands
4250 // Integer Immediate
4251 operand immI()
4252 %{
4253   match(ConI);
4254 
4255   op_cost(10);
4256   format %{ %}
4257   interface(CONST_INTER);
4258 %}
4259 
4260 // Constant for test vs zero
4261 operand immI0()
4262 %{
4263   predicate(n->get_int() == 0);
4264   match(ConI);
4265 
4266   op_cost(0);
4267   format %{ %}
4268   interface(CONST_INTER);
4269 %}
4270 
4271 // Constant for increment
4272 operand immI1()
4273 %{
4274   predicate(n->get_int() == 1);
4275   match(ConI);
4276 
4277   op_cost(0);
4278   format %{ %}
4279   interface(CONST_INTER);
4280 %}
4281 
4282 // Constant for decrement
4283 operand immI_M1()
4284 %{
4285   predicate(n->get_int() == -1);
4286   match(ConI);
4287 
4288   op_cost(0);
4289   format %{ %}
4290   interface(CONST_INTER);
4291 %}
4292 
4293 // Valid scale values for addressing modes
4294 operand immI2()
4295 %{
4296   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4297   match(ConI);
4298 
4299   format %{ %}
4300   interface(CONST_INTER);
4301 %}
4302 
4303 operand immI8()
4304 %{
4305   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4306   match(ConI);
4307 
4308   op_cost(5);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 operand immI16()
4314 %{
4315   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4316   match(ConI);
4317 
4318   op_cost(10);
4319   format %{ %}
4320   interface(CONST_INTER);
4321 %}
4322 
4323 // Constant for long shifts
4324 operand immI_32()
4325 %{
4326   predicate( n->get_int() == 32 );
4327   match(ConI);
4328 
4329   op_cost(0);
4330   format %{ %}
4331   interface(CONST_INTER);
4332 %}
4333 
4334 // Constant for long shifts
4335 operand immI_64()
4336 %{
4337   predicate( n->get_int() == 64 );
4338   match(ConI);
4339 
4340   op_cost(0);
4341   format %{ %}
4342   interface(CONST_INTER);
4343 %}
4344 
4345 // Pointer Immediate
4346 operand immP()
4347 %{
4348   match(ConP);
4349 
4350   op_cost(10);
4351   format %{ %}
4352   interface(CONST_INTER);
4353 %}
4354 
4355 // NULL Pointer Immediate
4356 operand immP0()
4357 %{
4358   predicate(n->get_ptr() == 0);
4359   match(ConP);
4360 
4361   op_cost(5);
4362   format %{ %}
4363   interface(CONST_INTER);
4364 %}
4365 
4366 // Pointer Immediate
4367 operand immN() %{
4368   match(ConN);
4369 
4370   op_cost(10);
4371   format %{ %}
4372   interface(CONST_INTER);
4373 %}
4374 
4375 // NULL Pointer Immediate
4376 operand immN0() %{
4377   predicate(n->get_narrowcon() == 0);
4378   match(ConN);
4379 
4380   op_cost(5);
4381   format %{ %}
4382   interface(CONST_INTER);
4383 %}
4384 
4385 operand immP31()
4386 %{
4387   predicate(!n->as_Type()->type()->isa_oopptr()
4388             && (n->get_ptr() >> 31) == 0);
4389   match(ConP);
4390 
4391   op_cost(5);
4392   format %{ %}
4393   interface(CONST_INTER);
4394 %}
4395 
4396 
4397 // Long Immediate
4398 operand immL()
4399 %{
4400   match(ConL);
4401 
4402   op_cost(20);
4403   format %{ %}
4404   interface(CONST_INTER);
4405 %}
4406 
4407 // Long Immediate 8-bit
4408 operand immL8()
4409 %{
4410   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4411   match(ConL);
4412 
4413   op_cost(5);
4414   format %{ %}
4415   interface(CONST_INTER);
4416 %}
4417 
4418 // Long Immediate 32-bit unsigned
4419 operand immUL32()
4420 %{
4421   predicate(n->get_long() == (unsigned int) (n->get_long()));
4422   match(ConL);
4423 
4424   op_cost(10);
4425   format %{ %}
4426   interface(CONST_INTER);
4427 %}
4428 
4429 // Long Immediate 32-bit signed
4430 operand immL32()
4431 %{
4432   predicate(n->get_long() == (int) (n->get_long()));
4433   match(ConL);
4434 
4435   op_cost(15);
4436   format %{ %}
4437   interface(CONST_INTER);
4438 %}
4439 
4440 // Long Immediate zero
4441 operand immL0()
4442 %{
4443   predicate(n->get_long() == 0L);
4444   match(ConL);
4445 
4446   op_cost(10);
4447   format %{ %}
4448   interface(CONST_INTER);
4449 %}
4450 
4451 // Constant for increment
4452 operand immL1()
4453 %{
4454   predicate(n->get_long() == 1);
4455   match(ConL);
4456 
4457   format %{ %}
4458   interface(CONST_INTER);
4459 %}
4460 
4461 // Constant for decrement
4462 operand immL_M1()
4463 %{
4464   predicate(n->get_long() == -1);
4465   match(ConL);
4466 
4467   format %{ %}
4468   interface(CONST_INTER);
4469 %}
4470 
4471 // Long Immediate: the value 10
4472 operand immL10()
4473 %{
4474   predicate(n->get_long() == 10);
4475   match(ConL);
4476 
4477   format %{ %}
4478   interface(CONST_INTER);
4479 %}
4480 
4481 // Long immediate from 0 to 127.
4482 // Used for a shorter form of long mul by 10.
4483 operand immL_127()
4484 %{
4485   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4486   match(ConL);
4487 
4488   op_cost(10);
4489   format %{ %}
4490   interface(CONST_INTER);
4491 %}
4492 
4493 // Long Immediate: low 32-bit mask
4494 operand immL_32bits()
4495 %{
4496   predicate(n->get_long() == 0xFFFFFFFFL);
4497   match(ConL);
4498   op_cost(20);
4499 
4500   format %{ %}
4501   interface(CONST_INTER);
4502 %}
4503 
4504 // Float Immediate zero
4505 operand immF0()
4506 %{
4507   predicate(jint_cast(n->getf()) == 0);
4508   match(ConF);
4509 
4510   op_cost(5);
4511   format %{ %}
4512   interface(CONST_INTER);
4513 %}
4514 
4515 // Float Immediate
4516 operand immF()
4517 %{
4518   match(ConF);
4519 
4520   op_cost(15);
4521   format %{ %}
4522   interface(CONST_INTER);
4523 %}
4524 
4525 // Double Immediate zero
4526 operand immD0()
4527 %{
4528   predicate(jlong_cast(n->getd()) == 0);
4529   match(ConD);
4530 
4531   op_cost(5);
4532   format %{ %}
4533   interface(CONST_INTER);
4534 %}
4535 
4536 // Double Immediate
4537 operand immD()
4538 %{
4539   match(ConD);
4540 
4541   op_cost(15);
4542   format %{ %}
4543   interface(CONST_INTER);
4544 %}
4545 
4546 // Immediates for special shifts (sign extend)
4547 
4548 // Constants for increment
4549 operand immI_16()
4550 %{
4551   predicate(n->get_int() == 16);
4552   match(ConI);
4553 
4554   format %{ %}
4555   interface(CONST_INTER);
4556 %}
4557 
4558 operand immI_24()
4559 %{
4560   predicate(n->get_int() == 24);
4561   match(ConI);
4562 
4563   format %{ %}
4564   interface(CONST_INTER);
4565 %}
4566 
4567 // Constant for byte-wide masking
4568 operand immI_255()
4569 %{
4570   predicate(n->get_int() == 255);
4571   match(ConI);
4572 
4573   format %{ %}
4574   interface(CONST_INTER);
4575 %}
4576 
4577 // Constant for short-wide masking
4578 operand immI_65535()
4579 %{
4580   predicate(n->get_int() == 65535);
4581   match(ConI);
4582 
4583   format %{ %}
4584   interface(CONST_INTER);
4585 %}
4586 
4587 // Constant for byte-wide masking
4588 operand immL_255()
4589 %{
4590   predicate(n->get_long() == 255);
4591   match(ConL);
4592 
4593   format %{ %}
4594   interface(CONST_INTER);
4595 %}
4596 
4597 // Constant for short-wide masking
4598 operand immL_65535()
4599 %{
4600   predicate(n->get_long() == 65535);
4601   match(ConL);
4602 
4603   format %{ %}
4604   interface(CONST_INTER);
4605 %}
4606 
4607 // Register Operands
4608 // Integer Register
4609 operand rRegI()
4610 %{
4611   constraint(ALLOC_IN_RC(int_reg));
4612   match(RegI);
4613 
4614   match(rax_RegI);
4615   match(rbx_RegI);
4616   match(rcx_RegI);
4617   match(rdx_RegI);
4618   match(rdi_RegI);
4619 
4620   format %{ %}
4621   interface(REG_INTER);
4622 %}
4623 
4624 // Special Registers
4625 operand rax_RegI()
4626 %{
4627   constraint(ALLOC_IN_RC(int_rax_reg));
4628   match(RegI);
4629   match(rRegI);
4630 
4631   format %{ "RAX" %}
4632   interface(REG_INTER);
4633 %}
4634 
4635 // Special Registers
4636 operand rbx_RegI()
4637 %{
4638   constraint(ALLOC_IN_RC(int_rbx_reg));
4639   match(RegI);
4640   match(rRegI);
4641 
4642   format %{ "RBX" %}
4643   interface(REG_INTER);
4644 %}
4645 
4646 operand rcx_RegI()
4647 %{
4648   constraint(ALLOC_IN_RC(int_rcx_reg));
4649   match(RegI);
4650   match(rRegI);
4651 
4652   format %{ "RCX" %}
4653   interface(REG_INTER);
4654 %}
4655 
4656 operand rdx_RegI()
4657 %{
4658   constraint(ALLOC_IN_RC(int_rdx_reg));
4659   match(RegI);
4660   match(rRegI);
4661 
4662   format %{ "RDX" %}
4663   interface(REG_INTER);
4664 %}
4665 
4666 operand rdi_RegI()
4667 %{
4668   constraint(ALLOC_IN_RC(int_rdi_reg));
4669   match(RegI);
4670   match(rRegI);
4671 
4672   format %{ "RDI" %}
4673   interface(REG_INTER);
4674 %}
4675 
4676 operand no_rcx_RegI()
4677 %{
4678   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4679   match(RegI);
4680   match(rax_RegI);
4681   match(rbx_RegI);
4682   match(rdx_RegI);
4683   match(rdi_RegI);
4684 
4685   format %{ %}
4686   interface(REG_INTER);
4687 %}
4688 
4689 operand no_rax_rdx_RegI()
4690 %{
4691   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4692   match(RegI);
4693   match(rbx_RegI);
4694   match(rcx_RegI);
4695   match(rdi_RegI);
4696 
4697   format %{ %}
4698   interface(REG_INTER);
4699 %}
4700 
4701 // Pointer Register
4702 operand any_RegP()
4703 %{
4704   constraint(ALLOC_IN_RC(any_reg));
4705   match(RegP);
4706   match(rax_RegP);
4707   match(rbx_RegP);
4708   match(rdi_RegP);
4709   match(rsi_RegP);
4710   match(rbp_RegP);
4711   match(r15_RegP);
4712   match(rRegP);
4713 
4714   format %{ %}
4715   interface(REG_INTER);
4716 %}
4717 
4718 operand rRegP()
4719 %{
4720   constraint(ALLOC_IN_RC(ptr_reg));
4721   match(RegP);
4722   match(rax_RegP);
4723   match(rbx_RegP);
4724   match(rdi_RegP);
4725   match(rsi_RegP);
4726   match(rbp_RegP);
4727   match(r15_RegP);  // See Q&A below about r15_RegP.
4728 
4729   format %{ %}
4730   interface(REG_INTER);
4731 %}
4732 
4733 operand rRegN() %{
4734   constraint(ALLOC_IN_RC(int_reg));
4735   match(RegN);
4736 
4737   format %{ %}
4738   interface(REG_INTER);
4739 %}
4740 
4741 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4742 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4743 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4744 // The output of an instruction is controlled by the allocator, which respects
4745 // register class masks, not match rules.  Unless an instruction mentions
4746 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4747 // by the allocator as an input.
4748 
4749 operand no_rax_RegP()
4750 %{
4751   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4752   match(RegP);
4753   match(rbx_RegP);
4754   match(rsi_RegP);
4755   match(rdi_RegP);
4756 
4757   format %{ %}
4758   interface(REG_INTER);
4759 %}
4760 
4761 operand no_rbp_RegP()
4762 %{
4763   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4764   match(RegP);
4765   match(rbx_RegP);
4766   match(rsi_RegP);
4767   match(rdi_RegP);
4768 
4769   format %{ %}
4770   interface(REG_INTER);
4771 %}
4772 
4773 operand no_rax_rbx_RegP()
4774 %{
4775   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4776   match(RegP);
4777   match(rsi_RegP);
4778   match(rdi_RegP);
4779 
4780   format %{ %}
4781   interface(REG_INTER);
4782 %}
4783 
4784 // Special Registers
4785 // Return a pointer value
4786 operand rax_RegP()
4787 %{
4788   constraint(ALLOC_IN_RC(ptr_rax_reg));
4789   match(RegP);
4790   match(rRegP);
4791 
4792   format %{ %}
4793   interface(REG_INTER);
4794 %}
4795 
4796 // Special Registers
4797 // Return a compressed pointer value
4798 operand rax_RegN()
4799 %{
4800   constraint(ALLOC_IN_RC(int_rax_reg));
4801   match(RegN);
4802   match(rRegN);
4803 
4804   format %{ %}
4805   interface(REG_INTER);
4806 %}
4807 
4808 // Used in AtomicAdd
4809 operand rbx_RegP()
4810 %{
4811   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4812   match(RegP);
4813   match(rRegP);
4814 
4815   format %{ %}
4816   interface(REG_INTER);
4817 %}
4818 
4819 operand rsi_RegP()
4820 %{
4821   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4822   match(RegP);
4823   match(rRegP);
4824 
4825   format %{ %}
4826   interface(REG_INTER);
4827 %}
4828 
4829 // Used in rep stosq
4830 operand rdi_RegP()
4831 %{
4832   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4833   match(RegP);
4834   match(rRegP);
4835 
4836   format %{ %}
4837   interface(REG_INTER);
4838 %}
4839 
4840 operand rbp_RegP()
4841 %{
4842   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4843   match(RegP);
4844   match(rRegP);
4845 
4846   format %{ %}
4847   interface(REG_INTER);
4848 %}
4849 
4850 operand r15_RegP()
4851 %{
4852   constraint(ALLOC_IN_RC(ptr_r15_reg));
4853   match(RegP);
4854   match(rRegP);
4855 
4856   format %{ %}
4857   interface(REG_INTER);
4858 %}
4859 
4860 operand rRegL()
4861 %{
4862   constraint(ALLOC_IN_RC(long_reg));
4863   match(RegL);
4864   match(rax_RegL);
4865   match(rdx_RegL);
4866 
4867   format %{ %}
4868   interface(REG_INTER);
4869 %}
4870 
4871 // Special Registers
4872 operand no_rax_rdx_RegL()
4873 %{
4874   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4875   match(RegL);
4876   match(rRegL);
4877 
4878   format %{ %}
4879   interface(REG_INTER);
4880 %}
4881 
4882 operand no_rax_RegL()
4883 %{
4884   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4885   match(RegL);
4886   match(rRegL);
4887   match(rdx_RegL);
4888 
4889   format %{ %}
4890   interface(REG_INTER);
4891 %}
4892 
4893 operand no_rcx_RegL()
4894 %{
4895   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4896   match(RegL);
4897   match(rRegL);
4898 
4899   format %{ %}
4900   interface(REG_INTER);
4901 %}
4902 
4903 operand rax_RegL()
4904 %{
4905   constraint(ALLOC_IN_RC(long_rax_reg));
4906   match(RegL);
4907   match(rRegL);
4908 
4909   format %{ "RAX" %}
4910   interface(REG_INTER);
4911 %}
4912 
4913 operand rcx_RegL()
4914 %{
4915   constraint(ALLOC_IN_RC(long_rcx_reg));
4916   match(RegL);
4917   match(rRegL);
4918 
4919   format %{ %}
4920   interface(REG_INTER);
4921 %}
4922 
4923 operand rdx_RegL()
4924 %{
4925   constraint(ALLOC_IN_RC(long_rdx_reg));
4926   match(RegL);
4927   match(rRegL);
4928 
4929   format %{ %}
4930   interface(REG_INTER);
4931 %}
4932 
4933 // Flags register, used as output of compare instructions
4934 operand rFlagsReg()
4935 %{
4936   constraint(ALLOC_IN_RC(int_flags));
4937   match(RegFlags);
4938 
4939   format %{ "RFLAGS" %}
4940   interface(REG_INTER);
4941 %}
4942 
4943 // Flags register, used as output of FLOATING POINT compare instructions
4944 operand rFlagsRegU()
4945 %{
4946   constraint(ALLOC_IN_RC(int_flags));
4947   match(RegFlags);
4948 
4949   format %{ "RFLAGS_U" %}
4950   interface(REG_INTER);
4951 %}
4952 
4953 operand rFlagsRegUCF() %{
4954   constraint(ALLOC_IN_RC(int_flags));
4955   match(RegFlags);
4956   predicate(false);
4957 
4958   format %{ "RFLAGS_U_CF" %}
4959   interface(REG_INTER);
4960 %}
4961 
4962 // Float register operands
4963 operand regF()
4964 %{
4965   constraint(ALLOC_IN_RC(float_reg));
4966   match(RegF);
4967 
4968   format %{ %}
4969   interface(REG_INTER);
4970 %}
4971 
4972 // Double register operands
4973 operand regD() 
4974 %{
4975   constraint(ALLOC_IN_RC(double_reg));
4976   match(RegD);
4977 
4978   format %{ %}
4979   interface(REG_INTER);
4980 %}
4981 
4982 
4983 //----------Memory Operands----------------------------------------------------
4984 // Direct Memory Operand
4985 // operand direct(immP addr)
4986 // %{
4987 //   match(addr);
4988 
4989 //   format %{ "[$addr]" %}
4990 //   interface(MEMORY_INTER) %{
4991 //     base(0xFFFFFFFF);
4992 //     index(0x4);
4993 //     scale(0x0);
4994 //     disp($addr);
4995 //   %}
4996 // %}
4997 
4998 // Indirect Memory Operand
4999 operand indirect(any_RegP reg)
5000 %{
5001   constraint(ALLOC_IN_RC(ptr_reg));
5002   match(reg);
5003 
5004   format %{ "[$reg]" %}
5005   interface(MEMORY_INTER) %{
5006     base($reg);
5007     index(0x4);
5008     scale(0x0);
5009     disp(0x0);
5010   %}
5011 %}
5012 
5013 // Indirect Memory Plus Short Offset Operand
5014 operand indOffset8(any_RegP reg, immL8 off)
5015 %{
5016   constraint(ALLOC_IN_RC(ptr_reg));
5017   match(AddP reg off);
5018 
5019   format %{ "[$reg + $off (8-bit)]" %}
5020   interface(MEMORY_INTER) %{
5021     base($reg);
5022     index(0x4);
5023     scale(0x0);
5024     disp($off);
5025   %}
5026 %}
5027 
5028 // Indirect Memory Plus Long Offset Operand
5029 operand indOffset32(any_RegP reg, immL32 off)
5030 %{
5031   constraint(ALLOC_IN_RC(ptr_reg));
5032   match(AddP reg off);
5033 
5034   format %{ "[$reg + $off (32-bit)]" %}
5035   interface(MEMORY_INTER) %{
5036     base($reg);
5037     index(0x4);
5038     scale(0x0);
5039     disp($off);
5040   %}
5041 %}
5042 
5043 // Indirect Memory Plus Index Register Plus Offset Operand
5044 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5045 %{
5046   constraint(ALLOC_IN_RC(ptr_reg));
5047   match(AddP (AddP reg lreg) off);
5048 
5049   op_cost(10);
5050   format %{"[$reg + $off + $lreg]" %}
5051   interface(MEMORY_INTER) %{
5052     base($reg);
5053     index($lreg);
5054     scale(0x0);
5055     disp($off);
5056   %}
5057 %}
5058 
5059 // Indirect Memory Plus Index Register Plus Offset Operand
5060 operand indIndex(any_RegP reg, rRegL lreg)
5061 %{
5062   constraint(ALLOC_IN_RC(ptr_reg));
5063   match(AddP reg lreg);
5064 
5065   op_cost(10);
5066   format %{"[$reg + $lreg]" %}
5067   interface(MEMORY_INTER) %{
5068     base($reg);
5069     index($lreg);
5070     scale(0x0);
5071     disp(0x0);
5072   %}
5073 %}
5074 
5075 // Indirect Memory Times Scale Plus Index Register
5076 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5077 %{
5078   constraint(ALLOC_IN_RC(ptr_reg));
5079   match(AddP reg (LShiftL lreg scale));
5080 
5081   op_cost(10);
5082   format %{"[$reg + $lreg << $scale]" %}
5083   interface(MEMORY_INTER) %{
5084     base($reg);
5085     index($lreg);
5086     scale($scale);
5087     disp(0x0);
5088   %}
5089 %}
5090 
5091 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5092 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5093 %{
5094   constraint(ALLOC_IN_RC(ptr_reg));
5095   match(AddP (AddP reg (LShiftL lreg scale)) off);
5096 
5097   op_cost(10);
5098   format %{"[$reg + $off + $lreg << $scale]" %}
5099   interface(MEMORY_INTER) %{
5100     base($reg);
5101     index($lreg);
5102     scale($scale);
5103     disp($off);
5104   %}
5105 %}
5106 
5107 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5108 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5109 %{
5110   constraint(ALLOC_IN_RC(ptr_reg));
5111   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5112   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5113 
5114   op_cost(10);
5115   format %{"[$reg + $off + $idx << $scale]" %}
5116   interface(MEMORY_INTER) %{
5117     base($reg);
5118     index($idx);
5119     scale($scale);
5120     disp($off);
5121   %}
5122 %}
5123 
5124 // Indirect Narrow Oop Plus Offset Operand
5125 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5126 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5127 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5128   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5129   constraint(ALLOC_IN_RC(ptr_reg));
5130   match(AddP (DecodeN reg) off);
5131 
5132   op_cost(10);
5133   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5134   interface(MEMORY_INTER) %{
5135     base(0xc); // R12
5136     index($reg);
5137     scale(0x3);
5138     disp($off);
5139   %}
5140 %}
5141 
5142 // Indirect Memory Operand
5143 operand indirectNarrow(rRegN reg)
5144 %{
5145   predicate(Universe::narrow_oop_shift() == 0);
5146   constraint(ALLOC_IN_RC(ptr_reg));
5147   match(DecodeN reg);
5148 
5149   format %{ "[$reg]" %}
5150   interface(MEMORY_INTER) %{
5151     base($reg);
5152     index(0x4);
5153     scale(0x0);
5154     disp(0x0);
5155   %}
5156 %}
5157 
5158 // Indirect Memory Plus Short Offset Operand
5159 operand indOffset8Narrow(rRegN reg, immL8 off)
5160 %{
5161   predicate(Universe::narrow_oop_shift() == 0);
5162   constraint(ALLOC_IN_RC(ptr_reg));
5163   match(AddP (DecodeN reg) off);
5164 
5165   format %{ "[$reg + $off (8-bit)]" %}
5166   interface(MEMORY_INTER) %{
5167     base($reg);
5168     index(0x4);
5169     scale(0x0);
5170     disp($off);
5171   %}
5172 %}
5173 
5174 // Indirect Memory Plus Long Offset Operand
5175 operand indOffset32Narrow(rRegN reg, immL32 off)
5176 %{
5177   predicate(Universe::narrow_oop_shift() == 0);
5178   constraint(ALLOC_IN_RC(ptr_reg));
5179   match(AddP (DecodeN reg) off);
5180 
5181   format %{ "[$reg + $off (32-bit)]" %}
5182   interface(MEMORY_INTER) %{
5183     base($reg);
5184     index(0x4);
5185     scale(0x0);
5186     disp($off);
5187   %}
5188 %}
5189 
5190 // Indirect Memory Plus Index Register Plus Offset Operand
5191 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5192 %{
5193   predicate(Universe::narrow_oop_shift() == 0);
5194   constraint(ALLOC_IN_RC(ptr_reg));
5195   match(AddP (AddP (DecodeN reg) lreg) off);
5196 
5197   op_cost(10);
5198   format %{"[$reg + $off + $lreg]" %}
5199   interface(MEMORY_INTER) %{
5200     base($reg);
5201     index($lreg);
5202     scale(0x0);
5203     disp($off);
5204   %}
5205 %}
5206 
5207 // Indirect Memory Plus Index Register Plus Offset Operand
5208 operand indIndexNarrow(rRegN reg, rRegL lreg)
5209 %{
5210   predicate(Universe::narrow_oop_shift() == 0);
5211   constraint(ALLOC_IN_RC(ptr_reg));
5212   match(AddP (DecodeN reg) lreg);
5213 
5214   op_cost(10);
5215   format %{"[$reg + $lreg]" %}
5216   interface(MEMORY_INTER) %{
5217     base($reg);
5218     index($lreg);
5219     scale(0x0);
5220     disp(0x0);
5221   %}
5222 %}
5223 
5224 // Indirect Memory Times Scale Plus Index Register
5225 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5226 %{
5227   predicate(Universe::narrow_oop_shift() == 0);
5228   constraint(ALLOC_IN_RC(ptr_reg));
5229   match(AddP (DecodeN reg) (LShiftL lreg scale));
5230 
5231   op_cost(10);
5232   format %{"[$reg + $lreg << $scale]" %}
5233   interface(MEMORY_INTER) %{
5234     base($reg);
5235     index($lreg);
5236     scale($scale);
5237     disp(0x0);
5238   %}
5239 %}
5240 
5241 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5242 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5243 %{
5244   predicate(Universe::narrow_oop_shift() == 0);
5245   constraint(ALLOC_IN_RC(ptr_reg));
5246   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5247 
5248   op_cost(10);
5249   format %{"[$reg + $off + $lreg << $scale]" %}
5250   interface(MEMORY_INTER) %{
5251     base($reg);
5252     index($lreg);
5253     scale($scale);
5254     disp($off);
5255   %}
5256 %}
5257 
5258 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5259 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5260 %{
5261   constraint(ALLOC_IN_RC(ptr_reg));
5262   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5263   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5264 
5265   op_cost(10);
5266   format %{"[$reg + $off + $idx << $scale]" %}
5267   interface(MEMORY_INTER) %{
5268     base($reg);
5269     index($idx);
5270     scale($scale);
5271     disp($off);
5272   %}
5273 %}
5274 
5275 
5276 //----------Special Memory Operands--------------------------------------------
5277 // Stack Slot Operand - This operand is used for loading and storing temporary
5278 //                      values on the stack where a match requires a value to
5279 //                      flow through memory.
5280 operand stackSlotP(sRegP reg)
5281 %{
5282   constraint(ALLOC_IN_RC(stack_slots));
5283   // No match rule because this operand is only generated in matching
5284 
5285   format %{ "[$reg]" %}
5286   interface(MEMORY_INTER) %{
5287     base(0x4);   // RSP
5288     index(0x4);  // No Index
5289     scale(0x0);  // No Scale
5290     disp($reg);  // Stack Offset
5291   %}
5292 %}
5293 
5294 operand stackSlotI(sRegI reg)
5295 %{
5296   constraint(ALLOC_IN_RC(stack_slots));
5297   // No match rule because this operand is only generated in matching
5298 
5299   format %{ "[$reg]" %}
5300   interface(MEMORY_INTER) %{
5301     base(0x4);   // RSP
5302     index(0x4);  // No Index
5303     scale(0x0);  // No Scale
5304     disp($reg);  // Stack Offset
5305   %}
5306 %}
5307 
5308 operand stackSlotF(sRegF reg)
5309 %{
5310   constraint(ALLOC_IN_RC(stack_slots));
5311   // No match rule because this operand is only generated in matching
5312 
5313   format %{ "[$reg]" %}
5314   interface(MEMORY_INTER) %{
5315     base(0x4);   // RSP
5316     index(0x4);  // No Index
5317     scale(0x0);  // No Scale
5318     disp($reg);  // Stack Offset
5319   %}
5320 %}
5321 
5322 operand stackSlotD(sRegD reg)
5323 %{
5324   constraint(ALLOC_IN_RC(stack_slots));
5325   // No match rule because this operand is only generated in matching
5326 
5327   format %{ "[$reg]" %}
5328   interface(MEMORY_INTER) %{
5329     base(0x4);   // RSP
5330     index(0x4);  // No Index
5331     scale(0x0);  // No Scale
5332     disp($reg);  // Stack Offset
5333   %}
5334 %}
5335 operand stackSlotL(sRegL reg)
5336 %{
5337   constraint(ALLOC_IN_RC(stack_slots));
5338   // No match rule because this operand is only generated in matching
5339 
5340   format %{ "[$reg]" %}
5341   interface(MEMORY_INTER) %{
5342     base(0x4);   // RSP
5343     index(0x4);  // No Index
5344     scale(0x0);  // No Scale
5345     disp($reg);  // Stack Offset
5346   %}
5347 %}
5348 
5349 //----------Conditional Branch Operands----------------------------------------
5350 // Comparison Op  - This is the operation of the comparison, and is limited to
5351 //                  the following set of codes:
5352 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5353 //
5354 // Other attributes of the comparison, such as unsignedness, are specified
5355 // by the comparison instruction that sets a condition code flags register.
5356 // That result is represented by a flags operand whose subtype is appropriate
5357 // to the unsignedness (etc.) of the comparison.
5358 //
5359 // Later, the instruction which matches both the Comparison Op (a Bool) and
5360 // the flags (produced by the Cmp) specifies the coding of the comparison op
5361 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5362 
5363 // Comparision Code
5364 operand cmpOp()
5365 %{
5366   match(Bool);
5367 
5368   format %{ "" %}
5369   interface(COND_INTER) %{
5370     equal(0x4, "e");
5371     not_equal(0x5, "ne");
5372     less(0xC, "l");
5373     greater_equal(0xD, "ge");
5374     less_equal(0xE, "le");
5375     greater(0xF, "g");
5376   %}
5377 %}
5378 
5379 // Comparison Code, unsigned compare.  Used by FP also, with
5380 // C2 (unordered) turned into GT or LT already.  The other bits
5381 // C0 and C3 are turned into Carry & Zero flags.
5382 operand cmpOpU()
5383 %{
5384   match(Bool);
5385 
5386   format %{ "" %}
5387   interface(COND_INTER) %{
5388     equal(0x4, "e");
5389     not_equal(0x5, "ne");
5390     less(0x2, "b");
5391     greater_equal(0x3, "nb");
5392     less_equal(0x6, "be");
5393     greater(0x7, "nbe");
5394   %}
5395 %}
5396 
5397 
5398 // Floating comparisons that don't require any fixup for the unordered case
5399 operand cmpOpUCF() %{
5400   match(Bool);
5401   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5402             n->as_Bool()->_test._test == BoolTest::ge ||
5403             n->as_Bool()->_test._test == BoolTest::le ||
5404             n->as_Bool()->_test._test == BoolTest::gt);
5405   format %{ "" %}
5406   interface(COND_INTER) %{
5407     equal(0x4, "e");
5408     not_equal(0x5, "ne");
5409     less(0x2, "b");
5410     greater_equal(0x3, "nb");
5411     less_equal(0x6, "be");
5412     greater(0x7, "nbe");
5413   %}
5414 %}
5415 
5416 
5417 // Floating comparisons that can be fixed up with extra conditional jumps
5418 operand cmpOpUCF2() %{
5419   match(Bool);
5420   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5421             n->as_Bool()->_test._test == BoolTest::eq);
5422   format %{ "" %}
5423   interface(COND_INTER) %{
5424     equal(0x4, "e");
5425     not_equal(0x5, "ne");
5426     less(0x2, "b");
5427     greater_equal(0x3, "nb");
5428     less_equal(0x6, "be");
5429     greater(0x7, "nbe");
5430   %}
5431 %}
5432 
5433 
5434 //----------OPERAND CLASSES----------------------------------------------------
5435 // Operand Classes are groups of operands that are used as to simplify
5436 // instruction definitions by not requiring the AD writer to specify separate
5437 // instructions for every form of operand when the instruction accepts
5438 // multiple operand types with the same basic encoding and format.  The classic
5439 // case of this is memory operands.
5440 
5441 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5442                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5443                indCompressedOopOffset,
5444                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5445                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5446                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5447 
5448 //----------PIPELINE-----------------------------------------------------------
5449 // Rules which define the behavior of the target architectures pipeline.
5450 pipeline %{
5451 
5452 //----------ATTRIBUTES---------------------------------------------------------
5453 attributes %{
5454   variable_size_instructions;        // Fixed size instructions
5455   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5456   instruction_unit_size = 1;         // An instruction is 1 bytes long
5457   instruction_fetch_unit_size = 16;  // The processor fetches one line
5458   instruction_fetch_units = 1;       // of 16 bytes
5459 
5460   // List of nop instructions
5461   nops( MachNop );
5462 %}
5463 
5464 //----------RESOURCES----------------------------------------------------------
5465 // Resources are the functional units available to the machine
5466 
5467 // Generic P2/P3 pipeline
5468 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5469 // 3 instructions decoded per cycle.
5470 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5471 // 3 ALU op, only ALU0 handles mul instructions.
5472 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5473            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5474            BR, FPU,
5475            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5476 
5477 //----------PIPELINE DESCRIPTION-----------------------------------------------
5478 // Pipeline Description specifies the stages in the machine's pipeline
5479 
5480 // Generic P2/P3 pipeline
5481 pipe_desc(S0, S1, S2, S3, S4, S5);
5482 
5483 //----------PIPELINE CLASSES---------------------------------------------------
5484 // Pipeline Classes describe the stages in which input and output are
5485 // referenced by the hardware pipeline.
5486 
5487 // Naming convention: ialu or fpu
5488 // Then: _reg
5489 // Then: _reg if there is a 2nd register
5490 // Then: _long if it's a pair of instructions implementing a long
5491 // Then: _fat if it requires the big decoder
5492 //   Or: _mem if it requires the big decoder and a memory unit.
5493 
5494 // Integer ALU reg operation
5495 pipe_class ialu_reg(rRegI dst)
5496 %{
5497     single_instruction;
5498     dst    : S4(write);
5499     dst    : S3(read);
5500     DECODE : S0;        // any decoder
5501     ALU    : S3;        // any alu
5502 %}
5503 
5504 // Long ALU reg operation
5505 pipe_class ialu_reg_long(rRegL dst)
5506 %{
5507     instruction_count(2);
5508     dst    : S4(write);
5509     dst    : S3(read);
5510     DECODE : S0(2);     // any 2 decoders
5511     ALU    : S3(2);     // both alus
5512 %}
5513 
5514 // Integer ALU reg operation using big decoder
5515 pipe_class ialu_reg_fat(rRegI dst)
5516 %{
5517     single_instruction;
5518     dst    : S4(write);
5519     dst    : S3(read);
5520     D0     : S0;        // big decoder only
5521     ALU    : S3;        // any alu
5522 %}
5523 
5524 // Long ALU reg operation using big decoder
5525 pipe_class ialu_reg_long_fat(rRegL dst)
5526 %{
5527     instruction_count(2);
5528     dst    : S4(write);
5529     dst    : S3(read);
5530     D0     : S0(2);     // big decoder only; twice
5531     ALU    : S3(2);     // any 2 alus
5532 %}
5533 
5534 // Integer ALU reg-reg operation
5535 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5536 %{
5537     single_instruction;
5538     dst    : S4(write);
5539     src    : S3(read);
5540     DECODE : S0;        // any decoder
5541     ALU    : S3;        // any alu
5542 %}
5543 
5544 // Long ALU reg-reg operation
5545 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5546 %{
5547     instruction_count(2);
5548     dst    : S4(write);
5549     src    : S3(read);
5550     DECODE : S0(2);     // any 2 decoders
5551     ALU    : S3(2);     // both alus
5552 %}
5553 
5554 // Integer ALU reg-reg operation
5555 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5556 %{
5557     single_instruction;
5558     dst    : S4(write);
5559     src    : S3(read);
5560     D0     : S0;        // big decoder only
5561     ALU    : S3;        // any alu
5562 %}
5563 
5564 // Long ALU reg-reg operation
5565 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5566 %{
5567     instruction_count(2);
5568     dst    : S4(write);
5569     src    : S3(read);
5570     D0     : S0(2);     // big decoder only; twice
5571     ALU    : S3(2);     // both alus
5572 %}
5573 
5574 // Integer ALU reg-mem operation
5575 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5576 %{
5577     single_instruction;
5578     dst    : S5(write);
5579     mem    : S3(read);
5580     D0     : S0;        // big decoder only
5581     ALU    : S4;        // any alu
5582     MEM    : S3;        // any mem
5583 %}
5584 
5585 // Integer mem operation (prefetch)
5586 pipe_class ialu_mem(memory mem)
5587 %{
5588     single_instruction;
5589     mem    : S3(read);
5590     D0     : S0;        // big decoder only
5591     MEM    : S3;        // any mem
5592 %}
5593 
5594 // Integer Store to Memory
5595 pipe_class ialu_mem_reg(memory mem, rRegI src)
5596 %{
5597     single_instruction;
5598     mem    : S3(read);
5599     src    : S5(read);
5600     D0     : S0;        // big decoder only
5601     ALU    : S4;        // any alu
5602     MEM    : S3;
5603 %}
5604 
5605 // // Long Store to Memory
5606 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5607 // %{
5608 //     instruction_count(2);
5609 //     mem    : S3(read);
5610 //     src    : S5(read);
5611 //     D0     : S0(2);          // big decoder only; twice
5612 //     ALU    : S4(2);     // any 2 alus
5613 //     MEM    : S3(2);  // Both mems
5614 // %}
5615 
5616 // Integer Store to Memory
5617 pipe_class ialu_mem_imm(memory mem)
5618 %{
5619     single_instruction;
5620     mem    : S3(read);
5621     D0     : S0;        // big decoder only
5622     ALU    : S4;        // any alu
5623     MEM    : S3;
5624 %}
5625 
5626 // Integer ALU0 reg-reg operation
5627 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5628 %{
5629     single_instruction;
5630     dst    : S4(write);
5631     src    : S3(read);
5632     D0     : S0;        // Big decoder only
5633     ALU0   : S3;        // only alu0
5634 %}
5635 
5636 // Integer ALU0 reg-mem operation
5637 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5638 %{
5639     single_instruction;
5640     dst    : S5(write);
5641     mem    : S3(read);
5642     D0     : S0;        // big decoder only
5643     ALU0   : S4;        // ALU0 only
5644     MEM    : S3;        // any mem
5645 %}
5646 
5647 // Integer ALU reg-reg operation
5648 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5649 %{
5650     single_instruction;
5651     cr     : S4(write);
5652     src1   : S3(read);
5653     src2   : S3(read);
5654     DECODE : S0;        // any decoder
5655     ALU    : S3;        // any alu
5656 %}
5657 
5658 // Integer ALU reg-imm operation
5659 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5660 %{
5661     single_instruction;
5662     cr     : S4(write);
5663     src1   : S3(read);
5664     DECODE : S0;        // any decoder
5665     ALU    : S3;        // any alu
5666 %}
5667 
5668 // Integer ALU reg-mem operation
5669 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5670 %{
5671     single_instruction;
5672     cr     : S4(write);
5673     src1   : S3(read);
5674     src2   : S3(read);
5675     D0     : S0;        // big decoder only
5676     ALU    : S4;        // any alu
5677     MEM    : S3;
5678 %}
5679 
5680 // Conditional move reg-reg
5681 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5682 %{
5683     instruction_count(4);
5684     y      : S4(read);
5685     q      : S3(read);
5686     p      : S3(read);
5687     DECODE : S0(4);     // any decoder
5688 %}
5689 
5690 // Conditional move reg-reg
5691 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5692 %{
5693     single_instruction;
5694     dst    : S4(write);
5695     src    : S3(read);
5696     cr     : S3(read);
5697     DECODE : S0;        // any decoder
5698 %}
5699 
5700 // Conditional move reg-mem
5701 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5702 %{
5703     single_instruction;
5704     dst    : S4(write);
5705     src    : S3(read);
5706     cr     : S3(read);
5707     DECODE : S0;        // any decoder
5708     MEM    : S3;
5709 %}
5710 
5711 // Conditional move reg-reg long
5712 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5713 %{
5714     single_instruction;
5715     dst    : S4(write);
5716     src    : S3(read);
5717     cr     : S3(read);
5718     DECODE : S0(2);     // any 2 decoders
5719 %}
5720 
5721 // XXX
5722 // // Conditional move double reg-reg
5723 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5724 // %{
5725 //     single_instruction;
5726 //     dst    : S4(write);
5727 //     src    : S3(read);
5728 //     cr     : S3(read);
5729 //     DECODE : S0;     // any decoder
5730 // %}
5731 
5732 // Float reg-reg operation
5733 pipe_class fpu_reg(regD dst)
5734 %{
5735     instruction_count(2);
5736     dst    : S3(read);
5737     DECODE : S0(2);     // any 2 decoders
5738     FPU    : S3;
5739 %}
5740 
5741 // Float reg-reg operation
5742 pipe_class fpu_reg_reg(regD dst, regD src)
5743 %{
5744     instruction_count(2);
5745     dst    : S4(write);
5746     src    : S3(read);
5747     DECODE : S0(2);     // any 2 decoders
5748     FPU    : S3;
5749 %}
5750 
5751 // Float reg-reg operation
5752 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5753 %{
5754     instruction_count(3);
5755     dst    : S4(write);
5756     src1   : S3(read);
5757     src2   : S3(read);
5758     DECODE : S0(3);     // any 3 decoders
5759     FPU    : S3(2);
5760 %}
5761 
5762 // Float reg-reg operation
5763 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5764 %{
5765     instruction_count(4);
5766     dst    : S4(write);
5767     src1   : S3(read);
5768     src2   : S3(read);
5769     src3   : S3(read);
5770     DECODE : S0(4);     // any 3 decoders
5771     FPU    : S3(2);
5772 %}
5773 
5774 // Float reg-reg operation
5775 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5776 %{
5777     instruction_count(4);
5778     dst    : S4(write);
5779     src1   : S3(read);
5780     src2   : S3(read);
5781     src3   : S3(read);
5782     DECODE : S1(3);     // any 3 decoders
5783     D0     : S0;        // Big decoder only
5784     FPU    : S3(2);
5785     MEM    : S3;
5786 %}
5787 
5788 // Float reg-mem operation
5789 pipe_class fpu_reg_mem(regD dst, memory mem)
5790 %{
5791     instruction_count(2);
5792     dst    : S5(write);
5793     mem    : S3(read);
5794     D0     : S0;        // big decoder only
5795     DECODE : S1;        // any decoder for FPU POP
5796     FPU    : S4;
5797     MEM    : S3;        // any mem
5798 %}
5799 
5800 // Float reg-mem operation
5801 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5802 %{
5803     instruction_count(3);
5804     dst    : S5(write);
5805     src1   : S3(read);
5806     mem    : S3(read);
5807     D0     : S0;        // big decoder only
5808     DECODE : S1(2);     // any decoder for FPU POP
5809     FPU    : S4;
5810     MEM    : S3;        // any mem
5811 %}
5812 
5813 // Float mem-reg operation
5814 pipe_class fpu_mem_reg(memory mem, regD src)
5815 %{
5816     instruction_count(2);
5817     src    : S5(read);
5818     mem    : S3(read);
5819     DECODE : S0;        // any decoder for FPU PUSH
5820     D0     : S1;        // big decoder only
5821     FPU    : S4;
5822     MEM    : S3;        // any mem
5823 %}
5824 
5825 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5826 %{
5827     instruction_count(3);
5828     src1   : S3(read);
5829     src2   : S3(read);
5830     mem    : S3(read);
5831     DECODE : S0(2);     // any decoder for FPU PUSH
5832     D0     : S1;        // big decoder only
5833     FPU    : S4;
5834     MEM    : S3;        // any mem
5835 %}
5836 
5837 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5838 %{
5839     instruction_count(3);
5840     src1   : S3(read);
5841     src2   : S3(read);
5842     mem    : S4(read);
5843     DECODE : S0;        // any decoder for FPU PUSH
5844     D0     : S0(2);     // big decoder only
5845     FPU    : S4;
5846     MEM    : S3(2);     // any mem
5847 %}
5848 
5849 pipe_class fpu_mem_mem(memory dst, memory src1)
5850 %{
5851     instruction_count(2);
5852     src1   : S3(read);
5853     dst    : S4(read);
5854     D0     : S0(2);     // big decoder only
5855     MEM    : S3(2);     // any mem
5856 %}
5857 
5858 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5859 %{
5860     instruction_count(3);
5861     src1   : S3(read);
5862     src2   : S3(read);
5863     dst    : S4(read);
5864     D0     : S0(3);     // big decoder only
5865     FPU    : S4;
5866     MEM    : S3(3);     // any mem
5867 %}
5868 
5869 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5870 %{
5871     instruction_count(3);
5872     src1   : S4(read);
5873     mem    : S4(read);
5874     DECODE : S0;        // any decoder for FPU PUSH
5875     D0     : S0(2);     // big decoder only
5876     FPU    : S4;
5877     MEM    : S3(2);     // any mem
5878 %}
5879 
5880 // Float load constant
5881 pipe_class fpu_reg_con(regD dst)
5882 %{
5883     instruction_count(2);
5884     dst    : S5(write);
5885     D0     : S0;        // big decoder only for the load
5886     DECODE : S1;        // any decoder for FPU POP
5887     FPU    : S4;
5888     MEM    : S3;        // any mem
5889 %}
5890 
5891 // Float load constant
5892 pipe_class fpu_reg_reg_con(regD dst, regD src)
5893 %{
5894     instruction_count(3);
5895     dst    : S5(write);
5896     src    : S3(read);
5897     D0     : S0;        // big decoder only for the load
5898     DECODE : S1(2);     // any decoder for FPU POP
5899     FPU    : S4;
5900     MEM    : S3;        // any mem
5901 %}
5902 
5903 // UnConditional branch
5904 pipe_class pipe_jmp(label labl)
5905 %{
5906     single_instruction;
5907     BR   : S3;
5908 %}
5909 
5910 // Conditional branch
5911 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5912 %{
5913     single_instruction;
5914     cr    : S1(read);
5915     BR    : S3;
5916 %}
5917 
5918 // Allocation idiom
5919 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5920 %{
5921     instruction_count(1); force_serialization;
5922     fixed_latency(6);
5923     heap_ptr : S3(read);
5924     DECODE   : S0(3);
5925     D0       : S2;
5926     MEM      : S3;
5927     ALU      : S3(2);
5928     dst      : S5(write);
5929     BR       : S5;
5930 %}
5931 
5932 // Generic big/slow expanded idiom
5933 pipe_class pipe_slow()
5934 %{
5935     instruction_count(10); multiple_bundles; force_serialization;
5936     fixed_latency(100);
5937     D0  : S0(2);
5938     MEM : S3(2);
5939 %}
5940 
5941 // The real do-nothing guy
5942 pipe_class empty()
5943 %{
5944     instruction_count(0);
5945 %}
5946 
5947 // Define the class for the Nop node
5948 define
5949 %{
5950    MachNop = empty;
5951 %}
5952 
5953 %}
5954 
5955 //----------INSTRUCTIONS-------------------------------------------------------
5956 //
5957 // match      -- States which machine-independent subtree may be replaced
5958 //               by this instruction.
5959 // ins_cost   -- The estimated cost of this instruction is used by instruction
5960 //               selection to identify a minimum cost tree of machine
5961 //               instructions that matches a tree of machine-independent
5962 //               instructions.
5963 // format     -- A string providing the disassembly for this instruction.
5964 //               The value of an instruction's operand may be inserted
5965 //               by referring to it with a '$' prefix.
5966 // opcode     -- Three instruction opcodes may be provided.  These are referred
5967 //               to within an encode class as $primary, $secondary, and $tertiary
5968 //               rrspectively.  The primary opcode is commonly used to
5969 //               indicate the type of machine instruction, while secondary
5970 //               and tertiary are often used for prefix options or addressing
5971 //               modes.
5972 // ins_encode -- A list of encode classes with parameters. The encode class
5973 //               name must have been defined in an 'enc_class' specification
5974 //               in the encode section of the architecture description.
5975 
5976 
5977 //----------Load/Store/Move Instructions---------------------------------------
5978 //----------Load Instructions--------------------------------------------------
5979 
5980 // Load Byte (8 bit signed)
5981 instruct loadB(rRegI dst, memory mem)
5982 %{
5983   match(Set dst (LoadB mem));
5984 
5985   ins_cost(125);
5986   format %{ "movsbl  $dst, $mem\t# byte" %}
5987 
5988   ins_encode %{
5989     __ movsbl($dst$$Register, $mem$$Address);
5990   %}
5991 
5992   ins_pipe(ialu_reg_mem);
5993 %}
5994 
5995 // Load Byte (8 bit signed) into Long Register
5996 instruct loadB2L(rRegL dst, memory mem)
5997 %{
5998   match(Set dst (ConvI2L (LoadB mem)));
5999 
6000   ins_cost(125);
6001   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6002 
6003   ins_encode %{
6004     __ movsbq($dst$$Register, $mem$$Address);
6005   %}
6006 
6007   ins_pipe(ialu_reg_mem);
6008 %}
6009 
6010 // Load Unsigned Byte (8 bit UNsigned)
6011 instruct loadUB(rRegI dst, memory mem)
6012 %{
6013   match(Set dst (LoadUB mem));
6014 
6015   ins_cost(125);
6016   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6017 
6018   ins_encode %{
6019     __ movzbl($dst$$Register, $mem$$Address);
6020   %}
6021 
6022   ins_pipe(ialu_reg_mem);
6023 %}
6024 
6025 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6026 instruct loadUB2L(rRegL dst, memory mem)
6027 %{
6028   match(Set dst (ConvI2L (LoadUB mem)));
6029 
6030   ins_cost(125);
6031   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6032 
6033   ins_encode %{
6034     __ movzbq($dst$$Register, $mem$$Address);
6035   %}
6036 
6037   ins_pipe(ialu_reg_mem);
6038 %}
6039 
6040 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6041 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6042   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6043   effect(KILL cr);
6044 
6045   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6046             "andl    $dst, $mask" %}
6047   ins_encode %{
6048     Register Rdst = $dst$$Register;
6049     __ movzbq(Rdst, $mem$$Address);
6050     __ andl(Rdst, $mask$$constant);
6051   %}
6052   ins_pipe(ialu_reg_mem);
6053 %}
6054 
6055 // Load Short (16 bit signed)
6056 instruct loadS(rRegI dst, memory mem)
6057 %{
6058   match(Set dst (LoadS mem));
6059 
6060   ins_cost(125);
6061   format %{ "movswl $dst, $mem\t# short" %}
6062 
6063   ins_encode %{
6064     __ movswl($dst$$Register, $mem$$Address);
6065   %}
6066 
6067   ins_pipe(ialu_reg_mem);
6068 %}
6069 
6070 // Load Short (16 bit signed) to Byte (8 bit signed)
6071 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6072   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6073 
6074   ins_cost(125);
6075   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6076   ins_encode %{
6077     __ movsbl($dst$$Register, $mem$$Address);
6078   %}
6079   ins_pipe(ialu_reg_mem);
6080 %}
6081 
6082 // Load Short (16 bit signed) into Long Register
6083 instruct loadS2L(rRegL dst, memory mem)
6084 %{
6085   match(Set dst (ConvI2L (LoadS mem)));
6086 
6087   ins_cost(125);
6088   format %{ "movswq $dst, $mem\t# short -> long" %}
6089 
6090   ins_encode %{
6091     __ movswq($dst$$Register, $mem$$Address);
6092   %}
6093 
6094   ins_pipe(ialu_reg_mem);
6095 %}
6096 
6097 // Load Unsigned Short/Char (16 bit UNsigned)
6098 instruct loadUS(rRegI dst, memory mem)
6099 %{
6100   match(Set dst (LoadUS mem));
6101 
6102   ins_cost(125);
6103   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6104 
6105   ins_encode %{
6106     __ movzwl($dst$$Register, $mem$$Address);
6107   %}
6108 
6109   ins_pipe(ialu_reg_mem);
6110 %}
6111 
6112 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6113 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6114   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6115 
6116   ins_cost(125);
6117   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6118   ins_encode %{
6119     __ movsbl($dst$$Register, $mem$$Address);
6120   %}
6121   ins_pipe(ialu_reg_mem);
6122 %}
6123 
6124 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6125 instruct loadUS2L(rRegL dst, memory mem)
6126 %{
6127   match(Set dst (ConvI2L (LoadUS mem)));
6128 
6129   ins_cost(125);
6130   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6131 
6132   ins_encode %{
6133     __ movzwq($dst$$Register, $mem$$Address);
6134   %}
6135 
6136   ins_pipe(ialu_reg_mem);
6137 %}
6138 
6139 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6140 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6141   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6142 
6143   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6144   ins_encode %{
6145     __ movzbq($dst$$Register, $mem$$Address);
6146   %}
6147   ins_pipe(ialu_reg_mem);
6148 %}
6149 
6150 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6151 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6152   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6153   effect(KILL cr);
6154 
6155   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6156             "andl    $dst, $mask" %}
6157   ins_encode %{
6158     Register Rdst = $dst$$Register;
6159     __ movzwq(Rdst, $mem$$Address);
6160     __ andl(Rdst, $mask$$constant);
6161   %}
6162   ins_pipe(ialu_reg_mem);
6163 %}
6164 
6165 // Load Integer
6166 instruct loadI(rRegI dst, memory mem)
6167 %{
6168   match(Set dst (LoadI mem));
6169 
6170   ins_cost(125);
6171   format %{ "movl    $dst, $mem\t# int" %}
6172 
6173   ins_encode %{
6174     __ movl($dst$$Register, $mem$$Address);
6175   %}
6176 
6177   ins_pipe(ialu_reg_mem);
6178 %}
6179 
6180 // Load Integer (32 bit signed) to Byte (8 bit signed)
6181 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6182   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6183 
6184   ins_cost(125);
6185   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6186   ins_encode %{
6187     __ movsbl($dst$$Register, $mem$$Address);
6188   %}
6189   ins_pipe(ialu_reg_mem);
6190 %}
6191 
6192 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6193 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6194   match(Set dst (AndI (LoadI mem) mask));
6195 
6196   ins_cost(125);
6197   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6198   ins_encode %{
6199     __ movzbl($dst$$Register, $mem$$Address);
6200   %}
6201   ins_pipe(ialu_reg_mem);
6202 %}
6203 
6204 // Load Integer (32 bit signed) to Short (16 bit signed)
6205 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6206   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6207 
6208   ins_cost(125);
6209   format %{ "movswl  $dst, $mem\t# int -> short" %}
6210   ins_encode %{
6211     __ movswl($dst$$Register, $mem$$Address);
6212   %}
6213   ins_pipe(ialu_reg_mem);
6214 %}
6215 
6216 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6217 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6218   match(Set dst (AndI (LoadI mem) mask));
6219 
6220   ins_cost(125);
6221   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6222   ins_encode %{
6223     __ movzwl($dst$$Register, $mem$$Address);
6224   %}
6225   ins_pipe(ialu_reg_mem);
6226 %}
6227 
6228 // Load Integer into Long Register
6229 instruct loadI2L(rRegL dst, memory mem)
6230 %{
6231   match(Set dst (ConvI2L (LoadI mem)));
6232 
6233   ins_cost(125);
6234   format %{ "movslq  $dst, $mem\t# int -> long" %}
6235 
6236   ins_encode %{
6237     __ movslq($dst$$Register, $mem$$Address);
6238   %}
6239 
6240   ins_pipe(ialu_reg_mem);
6241 %}
6242 
6243 // Load Integer with mask 0xFF into Long Register
6244 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6245   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6246 
6247   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6248   ins_encode %{
6249     __ movzbq($dst$$Register, $mem$$Address);
6250   %}
6251   ins_pipe(ialu_reg_mem);
6252 %}
6253 
6254 // Load Integer with mask 0xFFFF into Long Register
6255 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6256   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6257 
6258   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6259   ins_encode %{
6260     __ movzwq($dst$$Register, $mem$$Address);
6261   %}
6262   ins_pipe(ialu_reg_mem);
6263 %}
6264 
6265 // Load Integer with a 32-bit mask into Long Register
6266 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6267   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6268   effect(KILL cr);
6269 
6270   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6271             "andl    $dst, $mask" %}
6272   ins_encode %{
6273     Register Rdst = $dst$$Register;
6274     __ movl(Rdst, $mem$$Address);
6275     __ andl(Rdst, $mask$$constant);
6276   %}
6277   ins_pipe(ialu_reg_mem);
6278 %}
6279 
6280 // Load Unsigned Integer into Long Register
6281 instruct loadUI2L(rRegL dst, memory mem)
6282 %{
6283   match(Set dst (LoadUI2L mem));
6284 
6285   ins_cost(125);
6286   format %{ "movl    $dst, $mem\t# uint -> long" %}
6287 
6288   ins_encode %{
6289     __ movl($dst$$Register, $mem$$Address);
6290   %}
6291 
6292   ins_pipe(ialu_reg_mem);
6293 %}
6294 
6295 // Load Long
6296 instruct loadL(rRegL dst, memory mem)
6297 %{
6298   match(Set dst (LoadL mem));
6299 
6300   ins_cost(125);
6301   format %{ "movq    $dst, $mem\t# long" %}
6302 
6303   ins_encode %{
6304     __ movq($dst$$Register, $mem$$Address);
6305   %}
6306 
6307   ins_pipe(ialu_reg_mem); // XXX
6308 %}
6309 
6310 // Load Range
6311 instruct loadRange(rRegI dst, memory mem)
6312 %{
6313   match(Set dst (LoadRange mem));
6314 
6315   ins_cost(125); // XXX
6316   format %{ "movl    $dst, $mem\t# range" %}
6317   opcode(0x8B);
6318   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6319   ins_pipe(ialu_reg_mem);
6320 %}
6321 
6322 // Load Pointer
6323 instruct loadP(rRegP dst, memory mem)
6324 %{
6325   match(Set dst (LoadP mem));
6326 
6327   ins_cost(125); // XXX
6328   format %{ "movq    $dst, $mem\t# ptr" %}
6329   opcode(0x8B);
6330   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6331   ins_pipe(ialu_reg_mem); // XXX
6332 %}
6333 
6334 // Load Compressed Pointer
6335 instruct loadN(rRegN dst, memory mem)
6336 %{
6337    match(Set dst (LoadN mem));
6338 
6339    ins_cost(125); // XXX
6340    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6341    ins_encode %{
6342      __ movl($dst$$Register, $mem$$Address);
6343    %}
6344    ins_pipe(ialu_reg_mem); // XXX
6345 %}
6346 
6347 
6348 // Load Klass Pointer
6349 instruct loadKlass(rRegP dst, memory mem)
6350 %{
6351   match(Set dst (LoadKlass mem));
6352 
6353   ins_cost(125); // XXX
6354   format %{ "movq    $dst, $mem\t# class" %}
6355   opcode(0x8B);
6356   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6357   ins_pipe(ialu_reg_mem); // XXX
6358 %}
6359 
6360 // Load narrow Klass Pointer
6361 instruct loadNKlass(rRegN dst, memory mem)
6362 %{
6363   match(Set dst (LoadNKlass mem));
6364 
6365   ins_cost(125); // XXX
6366   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6367   ins_encode %{
6368     __ movl($dst$$Register, $mem$$Address);
6369   %}
6370   ins_pipe(ialu_reg_mem); // XXX
6371 %}
6372 
6373 // Load Float
6374 instruct loadF(regF dst, memory mem)
6375 %{
6376   match(Set dst (LoadF mem));
6377 
6378   ins_cost(145); // XXX
6379   format %{ "movss   $dst, $mem\t# float" %}
6380   opcode(0xF3, 0x0F, 0x10);
6381   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6382   ins_pipe(pipe_slow); // XXX
6383 %}
6384 
6385 // Load Double
6386 instruct loadD_partial(regD dst, memory mem)
6387 %{
6388   predicate(!UseXmmLoadAndClearUpper);
6389   match(Set dst (LoadD mem));
6390 
6391   ins_cost(145); // XXX
6392   format %{ "movlpd  $dst, $mem\t# double" %}
6393   opcode(0x66, 0x0F, 0x12);
6394   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6395   ins_pipe(pipe_slow); // XXX
6396 %}
6397 
6398 instruct loadD(regD dst, memory mem)
6399 %{
6400   predicate(UseXmmLoadAndClearUpper);
6401   match(Set dst (LoadD mem));
6402 
6403   ins_cost(145); // XXX
6404   format %{ "movsd   $dst, $mem\t# double" %}
6405   opcode(0xF2, 0x0F, 0x10);
6406   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6407   ins_pipe(pipe_slow); // XXX
6408 %}
6409 
6410 // Load Aligned Packed Byte to XMM register
6411 instruct loadA8B(regD dst, memory mem) %{
6412   match(Set dst (Load8B mem));
6413   ins_cost(125);
6414   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6415   ins_encode( movq_ld(dst, mem));
6416   ins_pipe( pipe_slow );
6417 %}
6418 
6419 // Load Aligned Packed Short to XMM register
6420 instruct loadA4S(regD dst, memory mem) %{
6421   match(Set dst (Load4S mem));
6422   ins_cost(125);
6423   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6424   ins_encode( movq_ld(dst, mem));
6425   ins_pipe( pipe_slow );
6426 %}
6427 
6428 // Load Aligned Packed Char to XMM register
6429 instruct loadA4C(regD dst, memory mem) %{
6430   match(Set dst (Load4C mem));
6431   ins_cost(125);
6432   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6433   ins_encode( movq_ld(dst, mem));
6434   ins_pipe( pipe_slow );
6435 %}
6436 
6437 // Load Aligned Packed Integer to XMM register
6438 instruct load2IU(regD dst, memory mem) %{
6439   match(Set dst (Load2I mem));
6440   ins_cost(125);
6441   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6442   ins_encode( movq_ld(dst, mem));
6443   ins_pipe( pipe_slow );
6444 %}
6445 
6446 // Load Aligned Packed Single to XMM
6447 instruct loadA2F(regD dst, memory mem) %{
6448   match(Set dst (Load2F mem));
6449   ins_cost(145);
6450   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6451   ins_encode( movq_ld(dst, mem));
6452   ins_pipe( pipe_slow );
6453 %}
6454 
6455 // Load Effective Address
6456 instruct leaP8(rRegP dst, indOffset8 mem)
6457 %{
6458   match(Set dst mem);
6459 
6460   ins_cost(110); // XXX
6461   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6462   opcode(0x8D);
6463   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6464   ins_pipe(ialu_reg_reg_fat);
6465 %}
6466 
6467 instruct leaP32(rRegP dst, indOffset32 mem)
6468 %{
6469   match(Set dst mem);
6470 
6471   ins_cost(110);
6472   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6473   opcode(0x8D);
6474   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6475   ins_pipe(ialu_reg_reg_fat);
6476 %}
6477 
6478 // instruct leaPIdx(rRegP dst, indIndex mem)
6479 // %{
6480 //   match(Set dst mem);
6481 
6482 //   ins_cost(110);
6483 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6484 //   opcode(0x8D);
6485 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6486 //   ins_pipe(ialu_reg_reg_fat);
6487 // %}
6488 
6489 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6490 %{
6491   match(Set dst mem);
6492 
6493   ins_cost(110);
6494   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6495   opcode(0x8D);
6496   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6497   ins_pipe(ialu_reg_reg_fat);
6498 %}
6499 
6500 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6501 %{
6502   match(Set dst mem);
6503 
6504   ins_cost(110);
6505   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6506   opcode(0x8D);
6507   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6508   ins_pipe(ialu_reg_reg_fat);
6509 %}
6510 
6511 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6512 %{
6513   match(Set dst mem);
6514 
6515   ins_cost(110);
6516   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6517   opcode(0x8D);
6518   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6519   ins_pipe(ialu_reg_reg_fat);
6520 %}
6521 
6522 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6523 %{
6524   match(Set dst mem);
6525 
6526   ins_cost(110);
6527   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6528   opcode(0x8D);
6529   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6530   ins_pipe(ialu_reg_reg_fat);
6531 %}
6532 
6533 // Load Effective Address which uses Narrow (32-bits) oop
6534 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6535 %{
6536   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6537   match(Set dst mem);
6538 
6539   ins_cost(110);
6540   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6541   opcode(0x8D);
6542   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6543   ins_pipe(ialu_reg_reg_fat);
6544 %}
6545 
6546 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6547 %{
6548   predicate(Universe::narrow_oop_shift() == 0);
6549   match(Set dst mem);
6550 
6551   ins_cost(110); // XXX
6552   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6553   opcode(0x8D);
6554   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6555   ins_pipe(ialu_reg_reg_fat);
6556 %}
6557 
6558 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6559 %{
6560   predicate(Universe::narrow_oop_shift() == 0);
6561   match(Set dst mem);
6562 
6563   ins_cost(110);
6564   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6565   opcode(0x8D);
6566   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6567   ins_pipe(ialu_reg_reg_fat);
6568 %}
6569 
6570 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6571 %{
6572   predicate(Universe::narrow_oop_shift() == 0);
6573   match(Set dst mem);
6574 
6575   ins_cost(110);
6576   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6577   opcode(0x8D);
6578   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6579   ins_pipe(ialu_reg_reg_fat);
6580 %}
6581 
6582 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6583 %{
6584   predicate(Universe::narrow_oop_shift() == 0);
6585   match(Set dst mem);
6586 
6587   ins_cost(110);
6588   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6589   opcode(0x8D);
6590   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6591   ins_pipe(ialu_reg_reg_fat);
6592 %}
6593 
6594 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6595 %{
6596   predicate(Universe::narrow_oop_shift() == 0);
6597   match(Set dst mem);
6598 
6599   ins_cost(110);
6600   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6601   opcode(0x8D);
6602   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6603   ins_pipe(ialu_reg_reg_fat);
6604 %}
6605 
6606 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6607 %{
6608   predicate(Universe::narrow_oop_shift() == 0);
6609   match(Set dst mem);
6610 
6611   ins_cost(110);
6612   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6613   opcode(0x8D);
6614   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6615   ins_pipe(ialu_reg_reg_fat);
6616 %}
6617 
6618 instruct loadConI(rRegI dst, immI src)
6619 %{
6620   match(Set dst src);
6621 
6622   format %{ "movl    $dst, $src\t# int" %}
6623   ins_encode(load_immI(dst, src));
6624   ins_pipe(ialu_reg_fat); // XXX
6625 %}
6626 
6627 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6628 %{
6629   match(Set dst src);
6630   effect(KILL cr);
6631 
6632   ins_cost(50);
6633   format %{ "xorl    $dst, $dst\t# int" %}
6634   opcode(0x33); /* + rd */
6635   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6636   ins_pipe(ialu_reg);
6637 %}
6638 
6639 instruct loadConL(rRegL dst, immL src)
6640 %{
6641   match(Set dst src);
6642 
6643   ins_cost(150);
6644   format %{ "movq    $dst, $src\t# long" %}
6645   ins_encode(load_immL(dst, src));
6646   ins_pipe(ialu_reg);
6647 %}
6648 
6649 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6650 %{
6651   match(Set dst src);
6652   effect(KILL cr);
6653 
6654   ins_cost(50);
6655   format %{ "xorl    $dst, $dst\t# long" %}
6656   opcode(0x33); /* + rd */
6657   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6658   ins_pipe(ialu_reg); // XXX
6659 %}
6660 
6661 instruct loadConUL32(rRegL dst, immUL32 src)
6662 %{
6663   match(Set dst src);
6664 
6665   ins_cost(60);
6666   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6667   ins_encode(load_immUL32(dst, src));
6668   ins_pipe(ialu_reg);
6669 %}
6670 
6671 instruct loadConL32(rRegL dst, immL32 src)
6672 %{
6673   match(Set dst src);
6674 
6675   ins_cost(70);
6676   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6677   ins_encode(load_immL32(dst, src));
6678   ins_pipe(ialu_reg);
6679 %}
6680 
6681 instruct loadConP(rRegP dst, immP src)
6682 %{
6683   match(Set dst src);
6684 
6685   format %{ "movq    $dst, $src\t# ptr" %}
6686   ins_encode(load_immP(dst, src));
6687   ins_pipe(ialu_reg_fat); // XXX
6688 %}
6689 
6690 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6691 %{
6692   match(Set dst src);
6693   effect(KILL cr);
6694 
6695   ins_cost(50);
6696   format %{ "xorl    $dst, $dst\t# ptr" %}
6697   opcode(0x33); /* + rd */
6698   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6699   ins_pipe(ialu_reg);
6700 %}
6701 
6702 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6703 %{
6704   match(Set dst src);
6705   effect(KILL cr);
6706 
6707   ins_cost(60);
6708   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6709   ins_encode(load_immP31(dst, src));
6710   ins_pipe(ialu_reg);
6711 %}
6712 
6713 instruct loadConF(regF dst, immF src)
6714 %{
6715   match(Set dst src);
6716   ins_cost(125);
6717 
6718   format %{ "movss   $dst, [$src]" %}
6719   ins_encode(load_conF(dst, src));
6720   ins_pipe(pipe_slow);
6721 %}
6722 
6723 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6724   match(Set dst src);
6725   effect(KILL cr);
6726   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6727   ins_encode %{
6728     __ xorq($dst$$Register, $dst$$Register);
6729   %}
6730   ins_pipe(ialu_reg);
6731 %}
6732 
6733 instruct loadConN(rRegN dst, immN src) %{
6734   match(Set dst src);
6735 
6736   ins_cost(125);
6737   format %{ "movl    $dst, $src\t# compressed ptr" %}
6738   ins_encode %{
6739     address con = (address)$src$$constant;
6740     if (con == NULL) {
6741       ShouldNotReachHere();
6742     } else {
6743       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6744     }
6745   %}
6746   ins_pipe(ialu_reg_fat); // XXX
6747 %}
6748 
6749 instruct loadConF0(regF dst, immF0 src)
6750 %{
6751   match(Set dst src);
6752   ins_cost(100);
6753 
6754   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6755   opcode(0x0F, 0x57);
6756   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6757   ins_pipe(pipe_slow);
6758 %}
6759 
6760 // Use the same format since predicate() can not be used here.
6761 instruct loadConD(regD dst, immD src)
6762 %{
6763   match(Set dst src);
6764   ins_cost(125);
6765 
6766   format %{ "movsd   $dst, [$src]" %}
6767   ins_encode(load_conD(dst, src));
6768   ins_pipe(pipe_slow);
6769 %}
6770 
6771 instruct loadConD0(regD dst, immD0 src)
6772 %{
6773   match(Set dst src);
6774   ins_cost(100);
6775 
6776   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6777   opcode(0x66, 0x0F, 0x57);
6778   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6779   ins_pipe(pipe_slow);
6780 %}
6781 
6782 instruct loadSSI(rRegI dst, stackSlotI src)
6783 %{
6784   match(Set dst src);
6785 
6786   ins_cost(125);
6787   format %{ "movl    $dst, $src\t# int stk" %}
6788   opcode(0x8B);
6789   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6790   ins_pipe(ialu_reg_mem);
6791 %}
6792 
6793 instruct loadSSL(rRegL dst, stackSlotL src)
6794 %{
6795   match(Set dst src);
6796 
6797   ins_cost(125);
6798   format %{ "movq    $dst, $src\t# long stk" %}
6799   opcode(0x8B);
6800   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6801   ins_pipe(ialu_reg_mem);
6802 %}
6803 
6804 instruct loadSSP(rRegP dst, stackSlotP src)
6805 %{
6806   match(Set dst src);
6807 
6808   ins_cost(125);
6809   format %{ "movq    $dst, $src\t# ptr stk" %}
6810   opcode(0x8B);
6811   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6812   ins_pipe(ialu_reg_mem);
6813 %}
6814 
6815 instruct loadSSF(regF dst, stackSlotF src)
6816 %{
6817   match(Set dst src);
6818 
6819   ins_cost(125);
6820   format %{ "movss   $dst, $src\t# float stk" %}
6821   opcode(0xF3, 0x0F, 0x10);
6822   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6823   ins_pipe(pipe_slow); // XXX
6824 %}
6825 
6826 // Use the same format since predicate() can not be used here.
6827 instruct loadSSD(regD dst, stackSlotD src)
6828 %{
6829   match(Set dst src);
6830 
6831   ins_cost(125);
6832   format %{ "movsd   $dst, $src\t# double stk" %}
6833   ins_encode  %{
6834     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6835   %}
6836   ins_pipe(pipe_slow); // XXX
6837 %}
6838 
6839 // Prefetch instructions.
6840 // Must be safe to execute with invalid address (cannot fault).
6841 
6842 instruct prefetchr( memory mem ) %{
6843   predicate(ReadPrefetchInstr==3);
6844   match(PrefetchRead mem);
6845   ins_cost(125);
6846 
6847   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6848   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6849   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6850   ins_pipe(ialu_mem);
6851 %}
6852 
6853 instruct prefetchrNTA( memory mem ) %{
6854   predicate(ReadPrefetchInstr==0);
6855   match(PrefetchRead mem);
6856   ins_cost(125);
6857 
6858   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6859   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6860   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6861   ins_pipe(ialu_mem);
6862 %}
6863 
6864 instruct prefetchrT0( memory mem ) %{
6865   predicate(ReadPrefetchInstr==1);
6866   match(PrefetchRead mem);
6867   ins_cost(125);
6868 
6869   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6870   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6871   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6872   ins_pipe(ialu_mem);
6873 %}
6874 
6875 instruct prefetchrT2( memory mem ) %{
6876   predicate(ReadPrefetchInstr==2);
6877   match(PrefetchRead mem);
6878   ins_cost(125);
6879 
6880   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6881   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6882   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6883   ins_pipe(ialu_mem);
6884 %}
6885 
6886 instruct prefetchw( memory mem ) %{
6887   predicate(AllocatePrefetchInstr==3);
6888   match(PrefetchWrite mem);
6889   ins_cost(125);
6890 
6891   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6892   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6893   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6894   ins_pipe(ialu_mem);
6895 %}
6896 
6897 instruct prefetchwNTA( memory mem ) %{
6898   predicate(AllocatePrefetchInstr==0);
6899   match(PrefetchWrite mem);
6900   ins_cost(125);
6901 
6902   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6903   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6904   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6905   ins_pipe(ialu_mem);
6906 %}
6907 
6908 instruct prefetchwT0( memory mem ) %{
6909   predicate(AllocatePrefetchInstr==1);
6910   match(PrefetchWrite mem);
6911   ins_cost(125);
6912 
6913   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6914   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6915   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6916   ins_pipe(ialu_mem);
6917 %}
6918 
6919 instruct prefetchwT2( memory mem ) %{
6920   predicate(AllocatePrefetchInstr==2);
6921   match(PrefetchWrite mem);
6922   ins_cost(125);
6923 
6924   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6925   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6926   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6927   ins_pipe(ialu_mem);
6928 %}
6929 
6930 //----------Store Instructions-------------------------------------------------
6931 
6932 // Store Byte
6933 instruct storeB(memory mem, rRegI src)
6934 %{
6935   match(Set mem (StoreB mem src));
6936 
6937   ins_cost(125); // XXX
6938   format %{ "movb    $mem, $src\t# byte" %}
6939   opcode(0x88);
6940   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6941   ins_pipe(ialu_mem_reg);
6942 %}
6943 
6944 // Store Char/Short
6945 instruct storeC(memory mem, rRegI src)
6946 %{
6947   match(Set mem (StoreC mem src));
6948 
6949   ins_cost(125); // XXX
6950   format %{ "movw    $mem, $src\t# char/short" %}
6951   opcode(0x89);
6952   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6953   ins_pipe(ialu_mem_reg);
6954 %}
6955 
6956 // Store Integer
6957 instruct storeI(memory mem, rRegI src)
6958 %{
6959   match(Set mem (StoreI mem src));
6960 
6961   ins_cost(125); // XXX
6962   format %{ "movl    $mem, $src\t# int" %}
6963   opcode(0x89);
6964   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6965   ins_pipe(ialu_mem_reg);
6966 %}
6967 
6968 // Store Long
6969 instruct storeL(memory mem, rRegL src)
6970 %{
6971   match(Set mem (StoreL mem src));
6972 
6973   ins_cost(125); // XXX
6974   format %{ "movq    $mem, $src\t# long" %}
6975   opcode(0x89);
6976   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6977   ins_pipe(ialu_mem_reg); // XXX
6978 %}
6979 
6980 // Store Pointer
6981 instruct storeP(memory mem, any_RegP src)
6982 %{
6983   match(Set mem (StoreP mem src));
6984 
6985   ins_cost(125); // XXX
6986   format %{ "movq    $mem, $src\t# ptr" %}
6987   opcode(0x89);
6988   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6989   ins_pipe(ialu_mem_reg);
6990 %}
6991 
6992 instruct storeImmP0(memory mem, immP0 zero)
6993 %{
6994   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6995   match(Set mem (StoreP mem zero));
6996 
6997   ins_cost(125); // XXX
6998   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6999   ins_encode %{
7000     __ movq($mem$$Address, r12);
7001   %}
7002   ins_pipe(ialu_mem_reg);
7003 %}
7004 
7005 // Store NULL Pointer, mark word, or other simple pointer constant.
7006 instruct storeImmP(memory mem, immP31 src)
7007 %{
7008   match(Set mem (StoreP mem src));
7009 
7010   ins_cost(150); // XXX
7011   format %{ "movq    $mem, $src\t# ptr" %}
7012   opcode(0xC7); /* C7 /0 */
7013   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7014   ins_pipe(ialu_mem_imm);
7015 %}
7016 
7017 // Store Compressed Pointer
7018 instruct storeN(memory mem, rRegN src)
7019 %{
7020   match(Set mem (StoreN mem src));
7021 
7022   ins_cost(125); // XXX
7023   format %{ "movl    $mem, $src\t# compressed ptr" %}
7024   ins_encode %{
7025     __ movl($mem$$Address, $src$$Register);
7026   %}
7027   ins_pipe(ialu_mem_reg);
7028 %}
7029 
7030 instruct storeImmN0(memory mem, immN0 zero)
7031 %{
7032   predicate(Universe::narrow_oop_base() == NULL);
7033   match(Set mem (StoreN mem zero));
7034 
7035   ins_cost(125); // XXX
7036   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7037   ins_encode %{
7038     __ movl($mem$$Address, r12);
7039   %}
7040   ins_pipe(ialu_mem_reg);
7041 %}
7042 
7043 instruct storeImmN(memory mem, immN src)
7044 %{
7045   match(Set mem (StoreN mem src));
7046 
7047   ins_cost(150); // XXX
7048   format %{ "movl    $mem, $src\t# compressed ptr" %}
7049   ins_encode %{
7050     address con = (address)$src$$constant;
7051     if (con == NULL) {
7052       __ movl($mem$$Address, (int32_t)0);
7053     } else {
7054       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7055     }
7056   %}
7057   ins_pipe(ialu_mem_imm);
7058 %}
7059 
7060 // Store Integer Immediate
7061 instruct storeImmI0(memory mem, immI0 zero)
7062 %{
7063   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7064   match(Set mem (StoreI mem zero));
7065 
7066   ins_cost(125); // XXX
7067   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7068   ins_encode %{
7069     __ movl($mem$$Address, r12);
7070   %}
7071   ins_pipe(ialu_mem_reg);
7072 %}
7073 
7074 instruct storeImmI(memory mem, immI src)
7075 %{
7076   match(Set mem (StoreI mem src));
7077 
7078   ins_cost(150);
7079   format %{ "movl    $mem, $src\t# int" %}
7080   opcode(0xC7); /* C7 /0 */
7081   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7082   ins_pipe(ialu_mem_imm);
7083 %}
7084 
7085 // Store Long Immediate
7086 instruct storeImmL0(memory mem, immL0 zero)
7087 %{
7088   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7089   match(Set mem (StoreL mem zero));
7090 
7091   ins_cost(125); // XXX
7092   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7093   ins_encode %{
7094     __ movq($mem$$Address, r12);
7095   %}
7096   ins_pipe(ialu_mem_reg);
7097 %}
7098 
7099 instruct storeImmL(memory mem, immL32 src)
7100 %{
7101   match(Set mem (StoreL mem src));
7102 
7103   ins_cost(150);
7104   format %{ "movq    $mem, $src\t# long" %}
7105   opcode(0xC7); /* C7 /0 */
7106   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7107   ins_pipe(ialu_mem_imm);
7108 %}
7109 
7110 // Store Short/Char Immediate
7111 instruct storeImmC0(memory mem, immI0 zero)
7112 %{
7113   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7114   match(Set mem (StoreC mem zero));
7115 
7116   ins_cost(125); // XXX
7117   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7118   ins_encode %{
7119     __ movw($mem$$Address, r12);
7120   %}
7121   ins_pipe(ialu_mem_reg);
7122 %}
7123 
7124 instruct storeImmI16(memory mem, immI16 src)
7125 %{
7126   predicate(UseStoreImmI16);
7127   match(Set mem (StoreC mem src));
7128 
7129   ins_cost(150);
7130   format %{ "movw    $mem, $src\t# short/char" %}
7131   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7132   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7133   ins_pipe(ialu_mem_imm);
7134 %}
7135 
7136 // Store Byte Immediate
7137 instruct storeImmB0(memory mem, immI0 zero)
7138 %{
7139   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7140   match(Set mem (StoreB mem zero));
7141 
7142   ins_cost(125); // XXX
7143   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7144   ins_encode %{
7145     __ movb($mem$$Address, r12);
7146   %}
7147   ins_pipe(ialu_mem_reg);
7148 %}
7149 
7150 instruct storeImmB(memory mem, immI8 src)
7151 %{
7152   match(Set mem (StoreB mem src));
7153 
7154   ins_cost(150); // XXX
7155   format %{ "movb    $mem, $src\t# byte" %}
7156   opcode(0xC6); /* C6 /0 */
7157   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7158   ins_pipe(ialu_mem_imm);
7159 %}
7160 
7161 // Store Aligned Packed Byte XMM register to memory
7162 instruct storeA8B(memory mem, regD src) %{
7163   match(Set mem (Store8B mem src));
7164   ins_cost(145);
7165   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7166   ins_encode( movq_st(mem, src));
7167   ins_pipe( pipe_slow );
7168 %}
7169 
7170 // Store Aligned Packed Char/Short XMM register to memory
7171 instruct storeA4C(memory mem, regD src) %{
7172   match(Set mem (Store4C mem src));
7173   ins_cost(145);
7174   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7175   ins_encode( movq_st(mem, src));
7176   ins_pipe( pipe_slow );
7177 %}
7178 
7179 // Store Aligned Packed Integer XMM register to memory
7180 instruct storeA2I(memory mem, regD src) %{
7181   match(Set mem (Store2I mem src));
7182   ins_cost(145);
7183   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7184   ins_encode( movq_st(mem, src));
7185   ins_pipe( pipe_slow );
7186 %}
7187 
7188 // Store CMS card-mark Immediate
7189 instruct storeImmCM0_reg(memory mem, immI0 zero)
7190 %{
7191   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7192   match(Set mem (StoreCM mem zero));
7193 
7194   ins_cost(125); // XXX
7195   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7196   ins_encode %{
7197     __ movb($mem$$Address, r12);
7198   %}
7199   ins_pipe(ialu_mem_reg);
7200 %}
7201 
7202 instruct storeImmCM0(memory mem, immI0 src)
7203 %{
7204   match(Set mem (StoreCM mem src));
7205 
7206   ins_cost(150); // XXX
7207   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7208   opcode(0xC6); /* C6 /0 */
7209   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7210   ins_pipe(ialu_mem_imm);
7211 %}
7212 
7213 // Store Aligned Packed Single Float XMM register to memory
7214 instruct storeA2F(memory mem, regD src) %{
7215   match(Set mem (Store2F mem src));
7216   ins_cost(145);
7217   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7218   ins_encode( movq_st(mem, src));
7219   ins_pipe( pipe_slow );
7220 %}
7221 
7222 // Store Float
7223 instruct storeF(memory mem, regF src)
7224 %{
7225   match(Set mem (StoreF mem src));
7226 
7227   ins_cost(95); // XXX
7228   format %{ "movss   $mem, $src\t# float" %}
7229   opcode(0xF3, 0x0F, 0x11);
7230   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7231   ins_pipe(pipe_slow); // XXX
7232 %}
7233 
7234 // Store immediate Float value (it is faster than store from XMM register)
7235 instruct storeF0(memory mem, immF0 zero)
7236 %{
7237   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7238   match(Set mem (StoreF mem zero));
7239 
7240   ins_cost(25); // XXX
7241   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7242   ins_encode %{
7243     __ movl($mem$$Address, r12);
7244   %}
7245   ins_pipe(ialu_mem_reg);
7246 %}
7247 
7248 instruct storeF_imm(memory mem, immF src)
7249 %{
7250   match(Set mem (StoreF mem src));
7251 
7252   ins_cost(50);
7253   format %{ "movl    $mem, $src\t# float" %}
7254   opcode(0xC7); /* C7 /0 */
7255   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7256   ins_pipe(ialu_mem_imm);
7257 %}
7258 
7259 // Store Double
7260 instruct storeD(memory mem, regD src)
7261 %{
7262   match(Set mem (StoreD mem src));
7263 
7264   ins_cost(95); // XXX
7265   format %{ "movsd   $mem, $src\t# double" %}
7266   opcode(0xF2, 0x0F, 0x11);
7267   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7268   ins_pipe(pipe_slow); // XXX
7269 %}
7270 
7271 // Store immediate double 0.0 (it is faster than store from XMM register)
7272 instruct storeD0_imm(memory mem, immD0 src)
7273 %{
7274   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7275   match(Set mem (StoreD mem src));
7276 
7277   ins_cost(50);
7278   format %{ "movq    $mem, $src\t# double 0." %}
7279   opcode(0xC7); /* C7 /0 */
7280   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7281   ins_pipe(ialu_mem_imm);
7282 %}
7283 
7284 instruct storeD0(memory mem, immD0 zero)
7285 %{
7286   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7287   match(Set mem (StoreD mem zero));
7288 
7289   ins_cost(25); // XXX
7290   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7291   ins_encode %{
7292     __ movq($mem$$Address, r12);
7293   %}
7294   ins_pipe(ialu_mem_reg);
7295 %}
7296 
7297 instruct storeSSI(stackSlotI dst, rRegI src)
7298 %{
7299   match(Set dst src);
7300 
7301   ins_cost(100);
7302   format %{ "movl    $dst, $src\t# int stk" %}
7303   opcode(0x89);
7304   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7305   ins_pipe( ialu_mem_reg );
7306 %}
7307 
7308 instruct storeSSL(stackSlotL dst, rRegL src)
7309 %{
7310   match(Set dst src);
7311 
7312   ins_cost(100);
7313   format %{ "movq    $dst, $src\t# long stk" %}
7314   opcode(0x89);
7315   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7316   ins_pipe(ialu_mem_reg);
7317 %}
7318 
7319 instruct storeSSP(stackSlotP dst, rRegP src)
7320 %{
7321   match(Set dst src);
7322 
7323   ins_cost(100);
7324   format %{ "movq    $dst, $src\t# ptr stk" %}
7325   opcode(0x89);
7326   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7327   ins_pipe(ialu_mem_reg);
7328 %}
7329 
7330 instruct storeSSF(stackSlotF dst, regF src)
7331 %{
7332   match(Set dst src);
7333 
7334   ins_cost(95); // XXX
7335   format %{ "movss   $dst, $src\t# float stk" %}
7336   opcode(0xF3, 0x0F, 0x11);
7337   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7338   ins_pipe(pipe_slow); // XXX
7339 %}
7340 
7341 instruct storeSSD(stackSlotD dst, regD src)
7342 %{
7343   match(Set dst src);
7344 
7345   ins_cost(95); // XXX
7346   format %{ "movsd   $dst, $src\t# double stk" %}
7347   opcode(0xF2, 0x0F, 0x11);
7348   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7349   ins_pipe(pipe_slow); // XXX
7350 %}
7351 
7352 //----------BSWAP Instructions-------------------------------------------------
7353 instruct bytes_reverse_int(rRegI dst) %{
7354   match(Set dst (ReverseBytesI dst));
7355 
7356   format %{ "bswapl  $dst" %}
7357   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7358   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7359   ins_pipe( ialu_reg );
7360 %}
7361 
7362 instruct bytes_reverse_long(rRegL dst) %{
7363   match(Set dst (ReverseBytesL dst));
7364 
7365   format %{ "bswapq  $dst" %}
7366 
7367   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7368   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7369   ins_pipe( ialu_reg);
7370 %}
7371 
7372 instruct loadI_reversed(rRegI dst, memory src) %{
7373   match(Set dst (ReverseBytesI (LoadI src)));
7374 
7375   format %{ "bswap_movl $dst, $src" %}
7376   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7377   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7378   ins_pipe( ialu_reg_mem );
7379 %}
7380 
7381 instruct loadL_reversed(rRegL dst, memory src) %{
7382   match(Set dst (ReverseBytesL (LoadL src)));
7383 
7384   format %{ "bswap_movq $dst, $src" %}
7385   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7386   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7387   ins_pipe( ialu_reg_mem );
7388 %}
7389 
7390 instruct storeI_reversed(memory dst, rRegI src) %{
7391   match(Set dst (StoreI dst (ReverseBytesI  src)));
7392 
7393   format %{ "movl_bswap $dst, $src" %}
7394   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7395   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7396   ins_pipe( ialu_mem_reg );
7397 %}
7398 
7399 instruct storeL_reversed(memory dst, rRegL src) %{
7400   match(Set dst (StoreL dst (ReverseBytesL  src)));
7401 
7402   format %{ "movq_bswap $dst, $src" %}
7403   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7404   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7405   ins_pipe( ialu_mem_reg );
7406 %}
7407 
7408 
7409 //---------- Zeros Count Instructions ------------------------------------------
7410 
7411 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7412   predicate(UseCountLeadingZerosInstruction);
7413   match(Set dst (CountLeadingZerosI src));
7414   effect(KILL cr);
7415 
7416   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7417   ins_encode %{
7418     __ lzcntl($dst$$Register, $src$$Register);
7419   %}
7420   ins_pipe(ialu_reg);
7421 %}
7422 
7423 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7424   predicate(!UseCountLeadingZerosInstruction);
7425   match(Set dst (CountLeadingZerosI src));
7426   effect(KILL cr);
7427 
7428   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7429             "jnz     skip\n\t"
7430             "movl    $dst, -1\n"
7431       "skip:\n\t"
7432             "negl    $dst\n\t"
7433             "addl    $dst, 31" %}
7434   ins_encode %{
7435     Register Rdst = $dst$$Register;
7436     Register Rsrc = $src$$Register;
7437     Label skip;
7438     __ bsrl(Rdst, Rsrc);
7439     __ jccb(Assembler::notZero, skip);
7440     __ movl(Rdst, -1);
7441     __ bind(skip);
7442     __ negl(Rdst);
7443     __ addl(Rdst, BitsPerInt - 1);
7444   %}
7445   ins_pipe(ialu_reg);
7446 %}
7447 
7448 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7449   predicate(UseCountLeadingZerosInstruction);
7450   match(Set dst (CountLeadingZerosL src));
7451   effect(KILL cr);
7452 
7453   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7454   ins_encode %{
7455     __ lzcntq($dst$$Register, $src$$Register);
7456   %}
7457   ins_pipe(ialu_reg);
7458 %}
7459 
7460 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7461   predicate(!UseCountLeadingZerosInstruction);
7462   match(Set dst (CountLeadingZerosL src));
7463   effect(KILL cr);
7464 
7465   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7466             "jnz     skip\n\t"
7467             "movl    $dst, -1\n"
7468       "skip:\n\t"
7469             "negl    $dst\n\t"
7470             "addl    $dst, 63" %}
7471   ins_encode %{
7472     Register Rdst = $dst$$Register;
7473     Register Rsrc = $src$$Register;
7474     Label skip;
7475     __ bsrq(Rdst, Rsrc);
7476     __ jccb(Assembler::notZero, skip);
7477     __ movl(Rdst, -1);
7478     __ bind(skip);
7479     __ negl(Rdst);
7480     __ addl(Rdst, BitsPerLong - 1);
7481   %}
7482   ins_pipe(ialu_reg);
7483 %}
7484 
7485 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7486   match(Set dst (CountTrailingZerosI src));
7487   effect(KILL cr);
7488 
7489   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7490             "jnz     done\n\t"
7491             "movl    $dst, 32\n"
7492       "done:" %}
7493   ins_encode %{
7494     Register Rdst = $dst$$Register;
7495     Label done;
7496     __ bsfl(Rdst, $src$$Register);
7497     __ jccb(Assembler::notZero, done);
7498     __ movl(Rdst, BitsPerInt);
7499     __ bind(done);
7500   %}
7501   ins_pipe(ialu_reg);
7502 %}
7503 
7504 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7505   match(Set dst (CountTrailingZerosL src));
7506   effect(KILL cr);
7507 
7508   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7509             "jnz     done\n\t"
7510             "movl    $dst, 64\n"
7511       "done:" %}
7512   ins_encode %{
7513     Register Rdst = $dst$$Register;
7514     Label done;
7515     __ bsfq(Rdst, $src$$Register);
7516     __ jccb(Assembler::notZero, done);
7517     __ movl(Rdst, BitsPerLong);
7518     __ bind(done);
7519   %}
7520   ins_pipe(ialu_reg);
7521 %}
7522 
7523 
7524 //---------- Population Count Instructions -------------------------------------
7525 
7526 instruct popCountI(rRegI dst, rRegI src) %{
7527   predicate(UsePopCountInstruction);
7528   match(Set dst (PopCountI src));
7529 
7530   format %{ "popcnt  $dst, $src" %}
7531   ins_encode %{
7532     __ popcntl($dst$$Register, $src$$Register);
7533   %}
7534   ins_pipe(ialu_reg);
7535 %}
7536 
7537 instruct popCountI_mem(rRegI dst, memory mem) %{
7538   predicate(UsePopCountInstruction);
7539   match(Set dst (PopCountI (LoadI mem)));
7540 
7541   format %{ "popcnt  $dst, $mem" %}
7542   ins_encode %{
7543     __ popcntl($dst$$Register, $mem$$Address);
7544   %}
7545   ins_pipe(ialu_reg);
7546 %}
7547 
7548 // Note: Long.bitCount(long) returns an int.
7549 instruct popCountL(rRegI dst, rRegL src) %{
7550   predicate(UsePopCountInstruction);
7551   match(Set dst (PopCountL src));
7552 
7553   format %{ "popcnt  $dst, $src" %}
7554   ins_encode %{
7555     __ popcntq($dst$$Register, $src$$Register);
7556   %}
7557   ins_pipe(ialu_reg);
7558 %}
7559 
7560 // Note: Long.bitCount(long) returns an int.
7561 instruct popCountL_mem(rRegI dst, memory mem) %{
7562   predicate(UsePopCountInstruction);
7563   match(Set dst (PopCountL (LoadL mem)));
7564 
7565   format %{ "popcnt  $dst, $mem" %}
7566   ins_encode %{
7567     __ popcntq($dst$$Register, $mem$$Address);
7568   %}
7569   ins_pipe(ialu_reg);
7570 %}
7571 
7572 
7573 //----------MemBar Instructions-----------------------------------------------
7574 // Memory barrier flavors
7575 
7576 instruct membar_acquire()
7577 %{
7578   match(MemBarAcquire);
7579   ins_cost(0);
7580 
7581   size(0);
7582   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7583   ins_encode();
7584   ins_pipe(empty);
7585 %}
7586 
7587 instruct membar_acquire_lock()
7588 %{
7589   match(MemBarAcquire);
7590   predicate(Matcher::prior_fast_lock(n));
7591   ins_cost(0);
7592 
7593   size(0);
7594   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7595   ins_encode();
7596   ins_pipe(empty);
7597 %}
7598 
7599 instruct membar_release()
7600 %{
7601   match(MemBarRelease);
7602   ins_cost(0);
7603 
7604   size(0);
7605   format %{ "MEMBAR-release ! (empty encoding)" %}
7606   ins_encode();
7607   ins_pipe(empty);
7608 %}
7609 
7610 instruct membar_release_lock()
7611 %{
7612   match(MemBarRelease);
7613   predicate(Matcher::post_fast_unlock(n));
7614   ins_cost(0);
7615 
7616   size(0);
7617   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7618   ins_encode();
7619   ins_pipe(empty);
7620 %}
7621 
7622 instruct membar_volatile(rFlagsReg cr) %{
7623   match(MemBarVolatile);
7624   effect(KILL cr);
7625   ins_cost(400);
7626 
7627   format %{ 
7628     $$template
7629     if (os::is_MP()) {
7630       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7631     } else {
7632       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7633     }
7634   %}
7635   ins_encode %{
7636     __ membar(Assembler::StoreLoad);
7637   %}
7638   ins_pipe(pipe_slow);
7639 %}
7640 
7641 instruct unnecessary_membar_volatile()
7642 %{
7643   match(MemBarVolatile);
7644   predicate(Matcher::post_store_load_barrier(n));
7645   ins_cost(0);
7646 
7647   size(0);
7648   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7649   ins_encode();
7650   ins_pipe(empty);
7651 %}
7652 
7653 //----------Move Instructions--------------------------------------------------
7654 
7655 instruct castX2P(rRegP dst, rRegL src)
7656 %{
7657   match(Set dst (CastX2P src));
7658 
7659   format %{ "movq    $dst, $src\t# long->ptr" %}
7660   ins_encode(enc_copy_wide(dst, src));
7661   ins_pipe(ialu_reg_reg); // XXX
7662 %}
7663 
7664 instruct castP2X(rRegL dst, rRegP src)
7665 %{
7666   match(Set dst (CastP2X src));
7667 
7668   format %{ "movq    $dst, $src\t# ptr -> long" %}
7669   ins_encode(enc_copy_wide(dst, src));
7670   ins_pipe(ialu_reg_reg); // XXX
7671 %}
7672 
7673 
7674 // Convert oop pointer into compressed form
7675 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7676   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7677   match(Set dst (EncodeP src));
7678   effect(KILL cr);
7679   format %{ "encode_heap_oop $dst,$src" %}
7680   ins_encode %{
7681     Register s = $src$$Register;
7682     Register d = $dst$$Register;
7683     if (s != d) {
7684       __ movq(d, s);
7685     }
7686     __ encode_heap_oop(d);
7687   %}
7688   ins_pipe(ialu_reg_long);
7689 %}
7690 
7691 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7692   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7693   match(Set dst (EncodeP src));
7694   effect(KILL cr);
7695   format %{ "encode_heap_oop_not_null $dst,$src" %}
7696   ins_encode %{
7697     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7698   %}
7699   ins_pipe(ialu_reg_long);
7700 %}
7701 
7702 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7703   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7704             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7705   match(Set dst (DecodeN src));
7706   effect(KILL cr);
7707   format %{ "decode_heap_oop $dst,$src" %}
7708   ins_encode %{
7709     Register s = $src$$Register;
7710     Register d = $dst$$Register;
7711     if (s != d) {
7712       __ movq(d, s);
7713     }
7714     __ decode_heap_oop(d);
7715   %}
7716   ins_pipe(ialu_reg_long);
7717 %}
7718 
7719 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7720   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7721             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7722   match(Set dst (DecodeN src));
7723   format %{ "decode_heap_oop_not_null $dst,$src" %}
7724   ins_encode %{
7725     Register s = $src$$Register;
7726     Register d = $dst$$Register;
7727     if (s != d) {
7728       __ decode_heap_oop_not_null(d, s);
7729     } else {
7730       __ decode_heap_oop_not_null(d);
7731     }
7732   %}
7733   ins_pipe(ialu_reg_long);
7734 %}
7735 
7736 
7737 //----------Conditional Move---------------------------------------------------
7738 // Jump
7739 // dummy instruction for generating temp registers
7740 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7741   match(Jump (LShiftL switch_val shift));
7742   ins_cost(350);
7743   predicate(false);
7744   effect(TEMP dest);
7745 
7746   format %{ "leaq    $dest, table_base\n\t"
7747             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7748   ins_encode(jump_enc_offset(switch_val, shift, dest));
7749   ins_pipe(pipe_jmp);
7750   ins_pc_relative(1);
7751 %}
7752 
7753 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7754   match(Jump (AddL (LShiftL switch_val shift) offset));
7755   ins_cost(350);
7756   effect(TEMP dest);
7757 
7758   format %{ "leaq    $dest, table_base\n\t"
7759             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7760   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7761   ins_pipe(pipe_jmp);
7762   ins_pc_relative(1);
7763 %}
7764 
7765 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7766   match(Jump switch_val);
7767   ins_cost(350);
7768   effect(TEMP dest);
7769 
7770   format %{ "leaq    $dest, table_base\n\t"
7771             "jmp     [$dest + $switch_val]\n\t" %}
7772   ins_encode(jump_enc(switch_val, dest));
7773   ins_pipe(pipe_jmp);
7774   ins_pc_relative(1);
7775 %}
7776 
7777 // Conditional move
7778 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7779 %{
7780   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7781 
7782   ins_cost(200); // XXX
7783   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7784   opcode(0x0F, 0x40);
7785   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7786   ins_pipe(pipe_cmov_reg);
7787 %}
7788 
7789 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7790   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7791 
7792   ins_cost(200); // XXX
7793   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7794   opcode(0x0F, 0x40);
7795   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7796   ins_pipe(pipe_cmov_reg);
7797 %}
7798 
7799 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7800   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7801   ins_cost(200);
7802   expand %{
7803     cmovI_regU(cop, cr, dst, src);
7804   %}
7805 %}
7806 
7807 // Conditional move
7808 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7809   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7810 
7811   ins_cost(250); // XXX
7812   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7813   opcode(0x0F, 0x40);
7814   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7815   ins_pipe(pipe_cmov_mem);
7816 %}
7817 
7818 // Conditional move
7819 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7820 %{
7821   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7822 
7823   ins_cost(250); // XXX
7824   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7825   opcode(0x0F, 0x40);
7826   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7827   ins_pipe(pipe_cmov_mem);
7828 %}
7829 
7830 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7831   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7832   ins_cost(250);
7833   expand %{
7834     cmovI_memU(cop, cr, dst, src);
7835   %}
7836 %}
7837 
7838 // Conditional move
7839 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7840 %{
7841   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7842 
7843   ins_cost(200); // XXX
7844   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7845   opcode(0x0F, 0x40);
7846   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7847   ins_pipe(pipe_cmov_reg);
7848 %}
7849 
7850 // Conditional move
7851 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7852 %{
7853   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7854 
7855   ins_cost(200); // XXX
7856   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7857   opcode(0x0F, 0x40);
7858   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7859   ins_pipe(pipe_cmov_reg);
7860 %}
7861 
7862 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7863   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7864   ins_cost(200);
7865   expand %{
7866     cmovN_regU(cop, cr, dst, src);
7867   %}
7868 %}
7869 
7870 // Conditional move
7871 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7872 %{
7873   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7874 
7875   ins_cost(200); // XXX
7876   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7877   opcode(0x0F, 0x40);
7878   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7879   ins_pipe(pipe_cmov_reg);  // XXX
7880 %}
7881 
7882 // Conditional move
7883 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7884 %{
7885   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7886 
7887   ins_cost(200); // XXX
7888   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7889   opcode(0x0F, 0x40);
7890   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7891   ins_pipe(pipe_cmov_reg); // XXX
7892 %}
7893 
7894 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7895   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7896   ins_cost(200);
7897   expand %{
7898     cmovP_regU(cop, cr, dst, src);
7899   %}
7900 %}
7901 
7902 // DISABLED: Requires the ADLC to emit a bottom_type call that
7903 // correctly meets the two pointer arguments; one is an incoming
7904 // register but the other is a memory operand.  ALSO appears to
7905 // be buggy with implicit null checks.
7906 //
7907 //// Conditional move
7908 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7909 //%{
7910 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7911 //  ins_cost(250);
7912 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7913 //  opcode(0x0F,0x40);
7914 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7915 //  ins_pipe( pipe_cmov_mem );
7916 //%}
7917 //
7918 //// Conditional move
7919 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7920 //%{
7921 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7922 //  ins_cost(250);
7923 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7924 //  opcode(0x0F,0x40);
7925 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7926 //  ins_pipe( pipe_cmov_mem );
7927 //%}
7928 
7929 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7930 %{
7931   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7932 
7933   ins_cost(200); // XXX
7934   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7935   opcode(0x0F, 0x40);
7936   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7937   ins_pipe(pipe_cmov_reg);  // XXX
7938 %}
7939 
7940 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7941 %{
7942   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7943 
7944   ins_cost(200); // XXX
7945   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7946   opcode(0x0F, 0x40);
7947   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7948   ins_pipe(pipe_cmov_mem);  // XXX
7949 %}
7950 
7951 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7952 %{
7953   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7954 
7955   ins_cost(200); // XXX
7956   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7957   opcode(0x0F, 0x40);
7958   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7959   ins_pipe(pipe_cmov_reg); // XXX
7960 %}
7961 
7962 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7963   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7964   ins_cost(200);
7965   expand %{
7966     cmovL_regU(cop, cr, dst, src);
7967   %}
7968 %}
7969 
7970 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7971 %{
7972   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7973 
7974   ins_cost(200); // XXX
7975   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7976   opcode(0x0F, 0x40);
7977   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7978   ins_pipe(pipe_cmov_mem); // XXX
7979 %}
7980 
7981 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7982   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7983   ins_cost(200);
7984   expand %{
7985     cmovL_memU(cop, cr, dst, src);
7986   %}
7987 %}
7988 
7989 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7990 %{
7991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7992 
7993   ins_cost(200); // XXX
7994   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7995             "movss     $dst, $src\n"
7996     "skip:" %}
7997   ins_encode(enc_cmovf_branch(cop, dst, src));
7998   ins_pipe(pipe_slow);
7999 %}
8000 
8001 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8002 // %{
8003 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8004 
8005 //   ins_cost(200); // XXX
8006 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8007 //             "movss     $dst, $src\n"
8008 //     "skip:" %}
8009 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8010 //   ins_pipe(pipe_slow);
8011 // %}
8012 
8013 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8014 %{
8015   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8016 
8017   ins_cost(200); // XXX
8018   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8019             "movss     $dst, $src\n"
8020     "skip:" %}
8021   ins_encode(enc_cmovf_branch(cop, dst, src));
8022   ins_pipe(pipe_slow);
8023 %}
8024 
8025 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8026   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8027   ins_cost(200);
8028   expand %{
8029     cmovF_regU(cop, cr, dst, src);
8030   %}
8031 %}
8032 
8033 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8034 %{
8035   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8036 
8037   ins_cost(200); // XXX
8038   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8039             "movsd     $dst, $src\n"
8040     "skip:" %}
8041   ins_encode(enc_cmovd_branch(cop, dst, src));
8042   ins_pipe(pipe_slow);
8043 %}
8044 
8045 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8046 %{
8047   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8048 
8049   ins_cost(200); // XXX
8050   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8051             "movsd     $dst, $src\n"
8052     "skip:" %}
8053   ins_encode(enc_cmovd_branch(cop, dst, src));
8054   ins_pipe(pipe_slow);
8055 %}
8056 
8057 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8058   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8059   ins_cost(200);
8060   expand %{
8061     cmovD_regU(cop, cr, dst, src);
8062   %}
8063 %}
8064 
8065 //----------Arithmetic Instructions--------------------------------------------
8066 //----------Addition Instructions----------------------------------------------
8067 
8068 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8069 %{
8070   match(Set dst (AddI dst src));
8071   effect(KILL cr);
8072 
8073   format %{ "addl    $dst, $src\t# int" %}
8074   opcode(0x03);
8075   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8076   ins_pipe(ialu_reg_reg);
8077 %}
8078 
8079 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8080 %{
8081   match(Set dst (AddI dst src));
8082   effect(KILL cr);
8083 
8084   format %{ "addl    $dst, $src\t# int" %}
8085   opcode(0x81, 0x00); /* /0 id */
8086   ins_encode(OpcSErm(dst, src), Con8or32(src));
8087   ins_pipe( ialu_reg );
8088 %}
8089 
8090 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8091 %{
8092   match(Set dst (AddI dst (LoadI src)));
8093   effect(KILL cr);
8094 
8095   ins_cost(125); // XXX
8096   format %{ "addl    $dst, $src\t# int" %}
8097   opcode(0x03);
8098   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8099   ins_pipe(ialu_reg_mem);
8100 %}
8101 
8102 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8103 %{
8104   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8105   effect(KILL cr);
8106 
8107   ins_cost(150); // XXX
8108   format %{ "addl    $dst, $src\t# int" %}
8109   opcode(0x01); /* Opcode 01 /r */
8110   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8111   ins_pipe(ialu_mem_reg);
8112 %}
8113 
8114 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8115 %{
8116   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8117   effect(KILL cr);
8118 
8119   ins_cost(125); // XXX
8120   format %{ "addl    $dst, $src\t# int" %}
8121   opcode(0x81); /* Opcode 81 /0 id */
8122   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8123   ins_pipe(ialu_mem_imm);
8124 %}
8125 
8126 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8127 %{
8128   predicate(UseIncDec);
8129   match(Set dst (AddI dst src));
8130   effect(KILL cr);
8131 
8132   format %{ "incl    $dst\t# int" %}
8133   opcode(0xFF, 0x00); // FF /0
8134   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8135   ins_pipe(ialu_reg);
8136 %}
8137 
8138 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8139 %{
8140   predicate(UseIncDec);
8141   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8142   effect(KILL cr);
8143 
8144   ins_cost(125); // XXX
8145   format %{ "incl    $dst\t# int" %}
8146   opcode(0xFF); /* Opcode FF /0 */
8147   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8148   ins_pipe(ialu_mem_imm);
8149 %}
8150 
8151 // XXX why does that use AddI
8152 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8153 %{
8154   predicate(UseIncDec);
8155   match(Set dst (AddI dst src));
8156   effect(KILL cr);
8157 
8158   format %{ "decl    $dst\t# int" %}
8159   opcode(0xFF, 0x01); // FF /1
8160   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8161   ins_pipe(ialu_reg);
8162 %}
8163 
8164 // XXX why does that use AddI
8165 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8166 %{
8167   predicate(UseIncDec);
8168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8169   effect(KILL cr);
8170 
8171   ins_cost(125); // XXX
8172   format %{ "decl    $dst\t# int" %}
8173   opcode(0xFF); /* Opcode FF /1 */
8174   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8175   ins_pipe(ialu_mem_imm);
8176 %}
8177 
8178 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8179 %{
8180   match(Set dst (AddI src0 src1));
8181 
8182   ins_cost(110);
8183   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8184   opcode(0x8D); /* 0x8D /r */
8185   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8186   ins_pipe(ialu_reg_reg);
8187 %}
8188 
8189 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8190 %{
8191   match(Set dst (AddL dst src));
8192   effect(KILL cr);
8193 
8194   format %{ "addq    $dst, $src\t# long" %}
8195   opcode(0x03);
8196   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8197   ins_pipe(ialu_reg_reg);
8198 %}
8199 
8200 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8201 %{
8202   match(Set dst (AddL dst src));
8203   effect(KILL cr);
8204 
8205   format %{ "addq    $dst, $src\t# long" %}
8206   opcode(0x81, 0x00); /* /0 id */
8207   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8208   ins_pipe( ialu_reg );
8209 %}
8210 
8211 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8212 %{
8213   match(Set dst (AddL dst (LoadL src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(125); // XXX
8217   format %{ "addq    $dst, $src\t# long" %}
8218   opcode(0x03);
8219   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8220   ins_pipe(ialu_reg_mem);
8221 %}
8222 
8223 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8224 %{
8225   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8226   effect(KILL cr);
8227 
8228   ins_cost(150); // XXX
8229   format %{ "addq    $dst, $src\t# long" %}
8230   opcode(0x01); /* Opcode 01 /r */
8231   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8232   ins_pipe(ialu_mem_reg);
8233 %}
8234 
8235 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8236 %{
8237   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8238   effect(KILL cr);
8239 
8240   ins_cost(125); // XXX
8241   format %{ "addq    $dst, $src\t# long" %}
8242   opcode(0x81); /* Opcode 81 /0 id */
8243   ins_encode(REX_mem_wide(dst),
8244              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8245   ins_pipe(ialu_mem_imm);
8246 %}
8247 
8248 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8249 %{
8250   predicate(UseIncDec);
8251   match(Set dst (AddL dst src));
8252   effect(KILL cr);
8253 
8254   format %{ "incq    $dst\t# long" %}
8255   opcode(0xFF, 0x00); // FF /0
8256   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8257   ins_pipe(ialu_reg);
8258 %}
8259 
8260 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8261 %{
8262   predicate(UseIncDec);
8263   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8264   effect(KILL cr);
8265 
8266   ins_cost(125); // XXX
8267   format %{ "incq    $dst\t# long" %}
8268   opcode(0xFF); /* Opcode FF /0 */
8269   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8270   ins_pipe(ialu_mem_imm);
8271 %}
8272 
8273 // XXX why does that use AddL
8274 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8275 %{
8276   predicate(UseIncDec);
8277   match(Set dst (AddL dst src));
8278   effect(KILL cr);
8279 
8280   format %{ "decq    $dst\t# long" %}
8281   opcode(0xFF, 0x01); // FF /1
8282   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8283   ins_pipe(ialu_reg);
8284 %}
8285 
8286 // XXX why does that use AddL
8287 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8288 %{
8289   predicate(UseIncDec);
8290   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8291   effect(KILL cr);
8292 
8293   ins_cost(125); // XXX
8294   format %{ "decq    $dst\t# long" %}
8295   opcode(0xFF); /* Opcode FF /1 */
8296   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8297   ins_pipe(ialu_mem_imm);
8298 %}
8299 
8300 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8301 %{
8302   match(Set dst (AddL src0 src1));
8303 
8304   ins_cost(110);
8305   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8306   opcode(0x8D); /* 0x8D /r */
8307   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8308   ins_pipe(ialu_reg_reg);
8309 %}
8310 
8311 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8312 %{
8313   match(Set dst (AddP dst src));
8314   effect(KILL cr);
8315 
8316   format %{ "addq    $dst, $src\t# ptr" %}
8317   opcode(0x03);
8318   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8319   ins_pipe(ialu_reg_reg);
8320 %}
8321 
8322 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8323 %{
8324   match(Set dst (AddP dst src));
8325   effect(KILL cr);
8326 
8327   format %{ "addq    $dst, $src\t# ptr" %}
8328   opcode(0x81, 0x00); /* /0 id */
8329   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8330   ins_pipe( ialu_reg );
8331 %}
8332 
8333 // XXX addP mem ops ????
8334 
8335 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8336 %{
8337   match(Set dst (AddP src0 src1));
8338 
8339   ins_cost(110);
8340   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8341   opcode(0x8D); /* 0x8D /r */
8342   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8343   ins_pipe(ialu_reg_reg);
8344 %}
8345 
8346 instruct checkCastPP(rRegP dst)
8347 %{
8348   match(Set dst (CheckCastPP dst));
8349 
8350   size(0);
8351   format %{ "# checkcastPP of $dst" %}
8352   ins_encode(/* empty encoding */);
8353   ins_pipe(empty);
8354 %}
8355 
8356 instruct castPP(rRegP dst)
8357 %{
8358   match(Set dst (CastPP dst));
8359 
8360   size(0);
8361   format %{ "# castPP of $dst" %}
8362   ins_encode(/* empty encoding */);
8363   ins_pipe(empty);
8364 %}
8365 
8366 instruct castII(rRegI dst)
8367 %{
8368   match(Set dst (CastII dst));
8369 
8370   size(0);
8371   format %{ "# castII of $dst" %}
8372   ins_encode(/* empty encoding */);
8373   ins_cost(0);
8374   ins_pipe(empty);
8375 %}
8376 
8377 // LoadP-locked same as a regular LoadP when used with compare-swap
8378 instruct loadPLocked(rRegP dst, memory mem)
8379 %{
8380   match(Set dst (LoadPLocked mem));
8381 
8382   ins_cost(125); // XXX
8383   format %{ "movq    $dst, $mem\t# ptr locked" %}
8384   opcode(0x8B);
8385   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8386   ins_pipe(ialu_reg_mem); // XXX
8387 %}
8388 
8389 // LoadL-locked - same as a regular LoadL when used with compare-swap
8390 instruct loadLLocked(rRegL dst, memory mem)
8391 %{
8392   match(Set dst (LoadLLocked mem));
8393 
8394   ins_cost(125); // XXX
8395   format %{ "movq    $dst, $mem\t# long locked" %}
8396   opcode(0x8B);
8397   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8398   ins_pipe(ialu_reg_mem); // XXX
8399 %}
8400 
8401 // Conditional-store of the updated heap-top.
8402 // Used during allocation of the shared heap.
8403 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8404 
8405 instruct storePConditional(memory heap_top_ptr,
8406                            rax_RegP oldval, rRegP newval,
8407                            rFlagsReg cr)
8408 %{
8409   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8410  
8411   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8412             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8413   opcode(0x0F, 0xB1);
8414   ins_encode(lock_prefix,
8415              REX_reg_mem_wide(newval, heap_top_ptr),
8416              OpcP, OpcS,
8417              reg_mem(newval, heap_top_ptr));
8418   ins_pipe(pipe_cmpxchg);
8419 %}
8420 
8421 // Conditional-store of an int value.
8422 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8423 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8424 %{
8425   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8426   effect(KILL oldval);
8427 
8428   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8429   opcode(0x0F, 0xB1);
8430   ins_encode(lock_prefix,
8431              REX_reg_mem(newval, mem),
8432              OpcP, OpcS,
8433              reg_mem(newval, mem));
8434   ins_pipe(pipe_cmpxchg);
8435 %}
8436 
8437 // Conditional-store of a long value.
8438 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8439 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8440 %{
8441   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8442   effect(KILL oldval);
8443 
8444   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8445   opcode(0x0F, 0xB1);
8446   ins_encode(lock_prefix,
8447              REX_reg_mem_wide(newval, mem),
8448              OpcP, OpcS,
8449              reg_mem(newval, mem));
8450   ins_pipe(pipe_cmpxchg);
8451 %}
8452 
8453 
8454 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8455 instruct compareAndSwapP(rRegI res,
8456                          memory mem_ptr,
8457                          rax_RegP oldval, rRegP newval,
8458                          rFlagsReg cr)
8459 %{
8460   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8461   effect(KILL cr, KILL oldval);
8462 
8463   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8464             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8465             "sete    $res\n\t"
8466             "movzbl  $res, $res" %}
8467   opcode(0x0F, 0xB1);
8468   ins_encode(lock_prefix,
8469              REX_reg_mem_wide(newval, mem_ptr),
8470              OpcP, OpcS,
8471              reg_mem(newval, mem_ptr),
8472              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8473              REX_reg_breg(res, res), // movzbl
8474              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8475   ins_pipe( pipe_cmpxchg );
8476 %}
8477 
8478 instruct compareAndSwapL(rRegI res,
8479                          memory mem_ptr,
8480                          rax_RegL oldval, rRegL newval,
8481                          rFlagsReg cr)
8482 %{
8483   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8484   effect(KILL cr, KILL oldval);
8485 
8486   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8487             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8488             "sete    $res\n\t"
8489             "movzbl  $res, $res" %}
8490   opcode(0x0F, 0xB1);
8491   ins_encode(lock_prefix,
8492              REX_reg_mem_wide(newval, mem_ptr),
8493              OpcP, OpcS,
8494              reg_mem(newval, mem_ptr),
8495              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8496              REX_reg_breg(res, res), // movzbl
8497              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8498   ins_pipe( pipe_cmpxchg );
8499 %}
8500 
8501 instruct compareAndSwapI(rRegI res,
8502                          memory mem_ptr,
8503                          rax_RegI oldval, rRegI newval,
8504                          rFlagsReg cr)
8505 %{
8506   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8507   effect(KILL cr, KILL oldval);
8508 
8509   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8510             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8511             "sete    $res\n\t"
8512             "movzbl  $res, $res" %}
8513   opcode(0x0F, 0xB1);
8514   ins_encode(lock_prefix,
8515              REX_reg_mem(newval, mem_ptr),
8516              OpcP, OpcS,
8517              reg_mem(newval, mem_ptr),
8518              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8519              REX_reg_breg(res, res), // movzbl
8520              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8521   ins_pipe( pipe_cmpxchg );
8522 %}
8523 
8524 
8525 instruct compareAndSwapN(rRegI res,
8526                           memory mem_ptr,
8527                           rax_RegN oldval, rRegN newval,
8528                           rFlagsReg cr) %{
8529   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8530   effect(KILL cr, KILL oldval);
8531 
8532   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8533             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8534             "sete    $res\n\t"
8535             "movzbl  $res, $res" %}
8536   opcode(0x0F, 0xB1);
8537   ins_encode(lock_prefix,
8538              REX_reg_mem(newval, mem_ptr),
8539              OpcP, OpcS,
8540              reg_mem(newval, mem_ptr),
8541              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8542              REX_reg_breg(res, res), // movzbl
8543              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8544   ins_pipe( pipe_cmpxchg );
8545 %}
8546 
8547 //----------Subtraction Instructions-------------------------------------------
8548 
8549 // Integer Subtraction Instructions
8550 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8551 %{
8552   match(Set dst (SubI dst src));
8553   effect(KILL cr);
8554 
8555   format %{ "subl    $dst, $src\t# int" %}
8556   opcode(0x2B);
8557   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8558   ins_pipe(ialu_reg_reg);
8559 %}
8560 
8561 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8562 %{
8563   match(Set dst (SubI dst src));
8564   effect(KILL cr);
8565 
8566   format %{ "subl    $dst, $src\t# int" %}
8567   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8568   ins_encode(OpcSErm(dst, src), Con8or32(src));
8569   ins_pipe(ialu_reg);
8570 %}
8571 
8572 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8573 %{
8574   match(Set dst (SubI dst (LoadI src)));
8575   effect(KILL cr);
8576 
8577   ins_cost(125);
8578   format %{ "subl    $dst, $src\t# int" %}
8579   opcode(0x2B);
8580   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8581   ins_pipe(ialu_reg_mem);
8582 %}
8583 
8584 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8585 %{
8586   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8587   effect(KILL cr);
8588 
8589   ins_cost(150);
8590   format %{ "subl    $dst, $src\t# int" %}
8591   opcode(0x29); /* Opcode 29 /r */
8592   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8593   ins_pipe(ialu_mem_reg);
8594 %}
8595 
8596 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8597 %{
8598   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8599   effect(KILL cr);
8600 
8601   ins_cost(125); // XXX
8602   format %{ "subl    $dst, $src\t# int" %}
8603   opcode(0x81); /* Opcode 81 /5 id */
8604   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8605   ins_pipe(ialu_mem_imm);
8606 %}
8607 
8608 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8609 %{
8610   match(Set dst (SubL dst src));
8611   effect(KILL cr);
8612 
8613   format %{ "subq    $dst, $src\t# long" %}
8614   opcode(0x2B);
8615   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8616   ins_pipe(ialu_reg_reg);
8617 %}
8618 
8619 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8620 %{
8621   match(Set dst (SubL dst src));
8622   effect(KILL cr);
8623 
8624   format %{ "subq    $dst, $src\t# long" %}
8625   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8626   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8627   ins_pipe(ialu_reg);
8628 %}
8629 
8630 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8631 %{
8632   match(Set dst (SubL dst (LoadL src)));
8633   effect(KILL cr);
8634 
8635   ins_cost(125);
8636   format %{ "subq    $dst, $src\t# long" %}
8637   opcode(0x2B);
8638   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8639   ins_pipe(ialu_reg_mem);
8640 %}
8641 
8642 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8643 %{
8644   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8645   effect(KILL cr);
8646 
8647   ins_cost(150);
8648   format %{ "subq    $dst, $src\t# long" %}
8649   opcode(0x29); /* Opcode 29 /r */
8650   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8651   ins_pipe(ialu_mem_reg);
8652 %}
8653 
8654 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8655 %{
8656   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8657   effect(KILL cr);
8658 
8659   ins_cost(125); // XXX
8660   format %{ "subq    $dst, $src\t# long" %}
8661   opcode(0x81); /* Opcode 81 /5 id */
8662   ins_encode(REX_mem_wide(dst),
8663              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8664   ins_pipe(ialu_mem_imm);
8665 %}
8666 
8667 // Subtract from a pointer
8668 // XXX hmpf???
8669 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8670 %{
8671   match(Set dst (AddP dst (SubI zero src)));
8672   effect(KILL cr);
8673 
8674   format %{ "subq    $dst, $src\t# ptr - int" %}
8675   opcode(0x2B);
8676   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8677   ins_pipe(ialu_reg_reg);
8678 %}
8679 
8680 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8681 %{
8682   match(Set dst (SubI zero dst));
8683   effect(KILL cr);
8684 
8685   format %{ "negl    $dst\t# int" %}
8686   opcode(0xF7, 0x03);  // Opcode F7 /3
8687   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8688   ins_pipe(ialu_reg);
8689 %}
8690 
8691 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8692 %{
8693   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8694   effect(KILL cr);
8695 
8696   format %{ "negl    $dst\t# int" %}
8697   opcode(0xF7, 0x03);  // Opcode F7 /3
8698   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8699   ins_pipe(ialu_reg);
8700 %}
8701 
8702 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8703 %{
8704   match(Set dst (SubL zero dst));
8705   effect(KILL cr);
8706 
8707   format %{ "negq    $dst\t# long" %}
8708   opcode(0xF7, 0x03);  // Opcode F7 /3
8709   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8710   ins_pipe(ialu_reg);
8711 %}
8712 
8713 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8714 %{
8715   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8716   effect(KILL cr);
8717 
8718   format %{ "negq    $dst\t# long" %}
8719   opcode(0xF7, 0x03);  // Opcode F7 /3
8720   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8721   ins_pipe(ialu_reg);
8722 %}
8723 
8724 
8725 //----------Multiplication/Division Instructions-------------------------------
8726 // Integer Multiplication Instructions
8727 // Multiply Register
8728 
8729 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8730 %{
8731   match(Set dst (MulI dst src));
8732   effect(KILL cr);
8733 
8734   ins_cost(300);
8735   format %{ "imull   $dst, $src\t# int" %}
8736   opcode(0x0F, 0xAF);
8737   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8738   ins_pipe(ialu_reg_reg_alu0);
8739 %}
8740 
8741 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8742 %{
8743   match(Set dst (MulI src imm));
8744   effect(KILL cr);
8745 
8746   ins_cost(300);
8747   format %{ "imull   $dst, $src, $imm\t# int" %}
8748   opcode(0x69); /* 69 /r id */
8749   ins_encode(REX_reg_reg(dst, src),
8750              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8751   ins_pipe(ialu_reg_reg_alu0);
8752 %}
8753 
8754 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8755 %{
8756   match(Set dst (MulI dst (LoadI src)));
8757   effect(KILL cr);
8758 
8759   ins_cost(350);
8760   format %{ "imull   $dst, $src\t# int" %}
8761   opcode(0x0F, 0xAF);
8762   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8763   ins_pipe(ialu_reg_mem_alu0);
8764 %}
8765 
8766 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8767 %{
8768   match(Set dst (MulI (LoadI src) imm));
8769   effect(KILL cr);
8770 
8771   ins_cost(300);
8772   format %{ "imull   $dst, $src, $imm\t# int" %}
8773   opcode(0x69); /* 69 /r id */
8774   ins_encode(REX_reg_mem(dst, src),
8775              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8776   ins_pipe(ialu_reg_mem_alu0);
8777 %}
8778 
8779 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8780 %{
8781   match(Set dst (MulL dst src));
8782   effect(KILL cr);
8783 
8784   ins_cost(300);
8785   format %{ "imulq   $dst, $src\t# long" %}
8786   opcode(0x0F, 0xAF);
8787   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8788   ins_pipe(ialu_reg_reg_alu0);
8789 %}
8790 
8791 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8792 %{
8793   match(Set dst (MulL src imm));
8794   effect(KILL cr);
8795 
8796   ins_cost(300);
8797   format %{ "imulq   $dst, $src, $imm\t# long" %}
8798   opcode(0x69); /* 69 /r id */
8799   ins_encode(REX_reg_reg_wide(dst, src),
8800              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8801   ins_pipe(ialu_reg_reg_alu0);
8802 %}
8803 
8804 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8805 %{
8806   match(Set dst (MulL dst (LoadL src)));
8807   effect(KILL cr);
8808 
8809   ins_cost(350);
8810   format %{ "imulq   $dst, $src\t# long" %}
8811   opcode(0x0F, 0xAF);
8812   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8813   ins_pipe(ialu_reg_mem_alu0);
8814 %}
8815 
8816 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8817 %{
8818   match(Set dst (MulL (LoadL src) imm));
8819   effect(KILL cr);
8820 
8821   ins_cost(300);
8822   format %{ "imulq   $dst, $src, $imm\t# long" %}
8823   opcode(0x69); /* 69 /r id */
8824   ins_encode(REX_reg_mem_wide(dst, src),
8825              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8826   ins_pipe(ialu_reg_mem_alu0);
8827 %}
8828 
8829 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8830 %{
8831   match(Set dst (MulHiL src rax));
8832   effect(USE_KILL rax, KILL cr);
8833 
8834   ins_cost(300);
8835   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8836   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8837   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8838   ins_pipe(ialu_reg_reg_alu0);
8839 %}
8840 
8841 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8842                    rFlagsReg cr)
8843 %{
8844   match(Set rax (DivI rax div));
8845   effect(KILL rdx, KILL cr);
8846 
8847   ins_cost(30*100+10*100); // XXX
8848   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8849             "jne,s   normal\n\t"
8850             "xorl    rdx, rdx\n\t"
8851             "cmpl    $div, -1\n\t"
8852             "je,s    done\n"
8853     "normal: cdql\n\t"
8854             "idivl   $div\n"
8855     "done:"        %}
8856   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8857   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8858   ins_pipe(ialu_reg_reg_alu0);
8859 %}
8860 
8861 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8862                    rFlagsReg cr)
8863 %{
8864   match(Set rax (DivL rax div));
8865   effect(KILL rdx, KILL cr);
8866 
8867   ins_cost(30*100+10*100); // XXX
8868   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8869             "cmpq    rax, rdx\n\t"
8870             "jne,s   normal\n\t"
8871             "xorl    rdx, rdx\n\t"
8872             "cmpq    $div, -1\n\t"
8873             "je,s    done\n"
8874     "normal: cdqq\n\t"
8875             "idivq   $div\n"
8876     "done:"        %}
8877   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8878   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8879   ins_pipe(ialu_reg_reg_alu0);
8880 %}
8881 
8882 // Integer DIVMOD with Register, both quotient and mod results
8883 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8884                              rFlagsReg cr)
8885 %{
8886   match(DivModI rax div);
8887   effect(KILL cr);
8888 
8889   ins_cost(30*100+10*100); // XXX
8890   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8891             "jne,s   normal\n\t"
8892             "xorl    rdx, rdx\n\t"
8893             "cmpl    $div, -1\n\t"
8894             "je,s    done\n"
8895     "normal: cdql\n\t"
8896             "idivl   $div\n"
8897     "done:"        %}
8898   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8899   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8900   ins_pipe(pipe_slow);
8901 %}
8902 
8903 // Long DIVMOD with Register, both quotient and mod results
8904 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8905                              rFlagsReg cr)
8906 %{
8907   match(DivModL rax div);
8908   effect(KILL cr);
8909 
8910   ins_cost(30*100+10*100); // XXX
8911   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8912             "cmpq    rax, rdx\n\t"
8913             "jne,s   normal\n\t"
8914             "xorl    rdx, rdx\n\t"
8915             "cmpq    $div, -1\n\t"
8916             "je,s    done\n"
8917     "normal: cdqq\n\t"
8918             "idivq   $div\n"
8919     "done:"        %}
8920   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8921   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8922   ins_pipe(pipe_slow);
8923 %}
8924 
8925 //----------- DivL-By-Constant-Expansions--------------------------------------
8926 // DivI cases are handled by the compiler
8927 
8928 // Magic constant, reciprocal of 10
8929 instruct loadConL_0x6666666666666667(rRegL dst)
8930 %{
8931   effect(DEF dst);
8932 
8933   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8934   ins_encode(load_immL(dst, 0x6666666666666667));
8935   ins_pipe(ialu_reg);
8936 %}
8937 
8938 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8939 %{
8940   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8941 
8942   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8943   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8944   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8945   ins_pipe(ialu_reg_reg_alu0);
8946 %}
8947 
8948 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8949 %{
8950   effect(USE_DEF dst, KILL cr);
8951 
8952   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8953   opcode(0xC1, 0x7); /* C1 /7 ib */
8954   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8955   ins_pipe(ialu_reg);
8956 %}
8957 
8958 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8959 %{
8960   effect(USE_DEF dst, KILL cr);
8961 
8962   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8963   opcode(0xC1, 0x7); /* C1 /7 ib */
8964   ins_encode(reg_opc_imm_wide(dst, 0x2));
8965   ins_pipe(ialu_reg);
8966 %}
8967 
8968 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8969 %{
8970   match(Set dst (DivL src div));
8971 
8972   ins_cost((5+8)*100);
8973   expand %{
8974     rax_RegL rax;                     // Killed temp
8975     rFlagsReg cr;                     // Killed
8976     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8977     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8978     sarL_rReg_63(src, cr);            // sarq  src, 63
8979     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8980     subL_rReg(dst, src, cr);          // subl  rdx, src
8981   %}
8982 %}
8983 
8984 //-----------------------------------------------------------------------------
8985 
8986 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8987                    rFlagsReg cr)
8988 %{
8989   match(Set rdx (ModI rax div));
8990   effect(KILL rax, KILL cr);
8991 
8992   ins_cost(300); // XXX
8993   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8994             "jne,s   normal\n\t"
8995             "xorl    rdx, rdx\n\t"
8996             "cmpl    $div, -1\n\t"
8997             "je,s    done\n"
8998     "normal: cdql\n\t"
8999             "idivl   $div\n"
9000     "done:"        %}
9001   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9002   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9003   ins_pipe(ialu_reg_reg_alu0);
9004 %}
9005 
9006 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9007                    rFlagsReg cr)
9008 %{
9009   match(Set rdx (ModL rax div));
9010   effect(KILL rax, KILL cr);
9011 
9012   ins_cost(300); // XXX
9013   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9014             "cmpq    rax, rdx\n\t"
9015             "jne,s   normal\n\t"
9016             "xorl    rdx, rdx\n\t"
9017             "cmpq    $div, -1\n\t"
9018             "je,s    done\n"
9019     "normal: cdqq\n\t"
9020             "idivq   $div\n"
9021     "done:"        %}
9022   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9023   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9024   ins_pipe(ialu_reg_reg_alu0);
9025 %}
9026 
9027 // Integer Shift Instructions
9028 // Shift Left by one
9029 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9030 %{
9031   match(Set dst (LShiftI dst shift));
9032   effect(KILL cr);
9033 
9034   format %{ "sall    $dst, $shift" %}
9035   opcode(0xD1, 0x4); /* D1 /4 */
9036   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9037   ins_pipe(ialu_reg);
9038 %}
9039 
9040 // Shift Left by one
9041 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9042 %{
9043   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9044   effect(KILL cr);
9045 
9046   format %{ "sall    $dst, $shift\t" %}
9047   opcode(0xD1, 0x4); /* D1 /4 */
9048   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9049   ins_pipe(ialu_mem_imm);
9050 %}
9051 
9052 // Shift Left by 8-bit immediate
9053 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9054 %{
9055   match(Set dst (LShiftI dst shift));
9056   effect(KILL cr);
9057 
9058   format %{ "sall    $dst, $shift" %}
9059   opcode(0xC1, 0x4); /* C1 /4 ib */
9060   ins_encode(reg_opc_imm(dst, shift));
9061   ins_pipe(ialu_reg);
9062 %}
9063 
9064 // Shift Left by 8-bit immediate
9065 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9066 %{
9067   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9068   effect(KILL cr);
9069 
9070   format %{ "sall    $dst, $shift" %}
9071   opcode(0xC1, 0x4); /* C1 /4 ib */
9072   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9073   ins_pipe(ialu_mem_imm);
9074 %}
9075 
9076 // Shift Left by variable
9077 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9078 %{
9079   match(Set dst (LShiftI dst shift));
9080   effect(KILL cr);
9081 
9082   format %{ "sall    $dst, $shift" %}
9083   opcode(0xD3, 0x4); /* D3 /4 */
9084   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9085   ins_pipe(ialu_reg_reg);
9086 %}
9087 
9088 // Shift Left by variable
9089 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9090 %{
9091   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9092   effect(KILL cr);
9093 
9094   format %{ "sall    $dst, $shift" %}
9095   opcode(0xD3, 0x4); /* D3 /4 */
9096   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9097   ins_pipe(ialu_mem_reg);
9098 %}
9099 
9100 // Arithmetic shift right by one
9101 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9102 %{
9103   match(Set dst (RShiftI dst shift));
9104   effect(KILL cr);
9105 
9106   format %{ "sarl    $dst, $shift" %}
9107   opcode(0xD1, 0x7); /* D1 /7 */
9108   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9109   ins_pipe(ialu_reg);
9110 %}
9111 
9112 // Arithmetic shift right by one
9113 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9114 %{
9115   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9116   effect(KILL cr);
9117 
9118   format %{ "sarl    $dst, $shift" %}
9119   opcode(0xD1, 0x7); /* D1 /7 */
9120   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9121   ins_pipe(ialu_mem_imm);
9122 %}
9123 
9124 // Arithmetic Shift Right by 8-bit immediate
9125 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9126 %{
9127   match(Set dst (RShiftI dst shift));
9128   effect(KILL cr);
9129 
9130   format %{ "sarl    $dst, $shift" %}
9131   opcode(0xC1, 0x7); /* C1 /7 ib */
9132   ins_encode(reg_opc_imm(dst, shift));
9133   ins_pipe(ialu_mem_imm);
9134 %}
9135 
9136 // Arithmetic Shift Right by 8-bit immediate
9137 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9138 %{
9139   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9140   effect(KILL cr);
9141 
9142   format %{ "sarl    $dst, $shift" %}
9143   opcode(0xC1, 0x7); /* C1 /7 ib */
9144   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9145   ins_pipe(ialu_mem_imm);
9146 %}
9147 
9148 // Arithmetic Shift Right by variable
9149 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9150 %{
9151   match(Set dst (RShiftI dst shift));
9152   effect(KILL cr);
9153 
9154   format %{ "sarl    $dst, $shift" %}
9155   opcode(0xD3, 0x7); /* D3 /7 */
9156   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9157   ins_pipe(ialu_reg_reg);
9158 %}
9159 
9160 // Arithmetic Shift Right by variable
9161 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9162 %{
9163   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9164   effect(KILL cr);
9165 
9166   format %{ "sarl    $dst, $shift" %}
9167   opcode(0xD3, 0x7); /* D3 /7 */
9168   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9169   ins_pipe(ialu_mem_reg);
9170 %}
9171 
9172 // Logical shift right by one
9173 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9174 %{
9175   match(Set dst (URShiftI dst shift));
9176   effect(KILL cr);
9177 
9178   format %{ "shrl    $dst, $shift" %}
9179   opcode(0xD1, 0x5); /* D1 /5 */
9180   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9181   ins_pipe(ialu_reg);
9182 %}
9183 
9184 // Logical shift right by one
9185 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9186 %{
9187   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9188   effect(KILL cr);
9189 
9190   format %{ "shrl    $dst, $shift" %}
9191   opcode(0xD1, 0x5); /* D1 /5 */
9192   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9193   ins_pipe(ialu_mem_imm);
9194 %}
9195 
9196 // Logical Shift Right by 8-bit immediate
9197 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9198 %{
9199   match(Set dst (URShiftI dst shift));
9200   effect(KILL cr);
9201 
9202   format %{ "shrl    $dst, $shift" %}
9203   opcode(0xC1, 0x5); /* C1 /5 ib */
9204   ins_encode(reg_opc_imm(dst, shift));
9205   ins_pipe(ialu_reg);
9206 %}
9207 
9208 // Logical Shift Right by 8-bit immediate
9209 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9210 %{
9211   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9212   effect(KILL cr);
9213 
9214   format %{ "shrl    $dst, $shift" %}
9215   opcode(0xC1, 0x5); /* C1 /5 ib */
9216   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9217   ins_pipe(ialu_mem_imm);
9218 %}
9219 
9220 // Logical Shift Right by variable
9221 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9222 %{
9223   match(Set dst (URShiftI dst shift));
9224   effect(KILL cr);
9225 
9226   format %{ "shrl    $dst, $shift" %}
9227   opcode(0xD3, 0x5); /* D3 /5 */
9228   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9229   ins_pipe(ialu_reg_reg);
9230 %}
9231 
9232 // Logical Shift Right by variable
9233 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9234 %{
9235   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9236   effect(KILL cr);
9237 
9238   format %{ "shrl    $dst, $shift" %}
9239   opcode(0xD3, 0x5); /* D3 /5 */
9240   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9241   ins_pipe(ialu_mem_reg);
9242 %}
9243 
9244 // Long Shift Instructions
9245 // Shift Left by one
9246 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9247 %{
9248   match(Set dst (LShiftL dst shift));
9249   effect(KILL cr);
9250 
9251   format %{ "salq    $dst, $shift" %}
9252   opcode(0xD1, 0x4); /* D1 /4 */
9253   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9254   ins_pipe(ialu_reg);
9255 %}
9256 
9257 // Shift Left by one
9258 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9259 %{
9260   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9261   effect(KILL cr);
9262 
9263   format %{ "salq    $dst, $shift" %}
9264   opcode(0xD1, 0x4); /* D1 /4 */
9265   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9266   ins_pipe(ialu_mem_imm);
9267 %}
9268 
9269 // Shift Left by 8-bit immediate
9270 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9271 %{
9272   match(Set dst (LShiftL dst shift));
9273   effect(KILL cr);
9274 
9275   format %{ "salq    $dst, $shift" %}
9276   opcode(0xC1, 0x4); /* C1 /4 ib */
9277   ins_encode(reg_opc_imm_wide(dst, shift));
9278   ins_pipe(ialu_reg);
9279 %}
9280 
9281 // Shift Left by 8-bit immediate
9282 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9283 %{
9284   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9285   effect(KILL cr);
9286 
9287   format %{ "salq    $dst, $shift" %}
9288   opcode(0xC1, 0x4); /* C1 /4 ib */
9289   ins_encode(REX_mem_wide(dst), OpcP,
9290              RM_opc_mem(secondary, dst), Con8or32(shift));
9291   ins_pipe(ialu_mem_imm);
9292 %}
9293 
9294 // Shift Left by variable
9295 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9296 %{
9297   match(Set dst (LShiftL dst shift));
9298   effect(KILL cr);
9299 
9300   format %{ "salq    $dst, $shift" %}
9301   opcode(0xD3, 0x4); /* D3 /4 */
9302   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9303   ins_pipe(ialu_reg_reg);
9304 %}
9305 
9306 // Shift Left by variable
9307 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9308 %{
9309   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9310   effect(KILL cr);
9311 
9312   format %{ "salq    $dst, $shift" %}
9313   opcode(0xD3, 0x4); /* D3 /4 */
9314   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9315   ins_pipe(ialu_mem_reg);
9316 %}
9317 
9318 // Arithmetic shift right by one
9319 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9320 %{
9321   match(Set dst (RShiftL dst shift));
9322   effect(KILL cr);
9323 
9324   format %{ "sarq    $dst, $shift" %}
9325   opcode(0xD1, 0x7); /* D1 /7 */
9326   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9327   ins_pipe(ialu_reg);
9328 %}
9329 
9330 // Arithmetic shift right by one
9331 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9332 %{
9333   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9334   effect(KILL cr);
9335 
9336   format %{ "sarq    $dst, $shift" %}
9337   opcode(0xD1, 0x7); /* D1 /7 */
9338   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9339   ins_pipe(ialu_mem_imm);
9340 %}
9341 
9342 // Arithmetic Shift Right by 8-bit immediate
9343 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9344 %{
9345   match(Set dst (RShiftL dst shift));
9346   effect(KILL cr);
9347 
9348   format %{ "sarq    $dst, $shift" %}
9349   opcode(0xC1, 0x7); /* C1 /7 ib */
9350   ins_encode(reg_opc_imm_wide(dst, shift));
9351   ins_pipe(ialu_mem_imm);
9352 %}
9353 
9354 // Arithmetic Shift Right by 8-bit immediate
9355 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9356 %{
9357   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9358   effect(KILL cr);
9359 
9360   format %{ "sarq    $dst, $shift" %}
9361   opcode(0xC1, 0x7); /* C1 /7 ib */
9362   ins_encode(REX_mem_wide(dst), OpcP,
9363              RM_opc_mem(secondary, dst), Con8or32(shift));
9364   ins_pipe(ialu_mem_imm);
9365 %}
9366 
9367 // Arithmetic Shift Right by variable
9368 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9369 %{
9370   match(Set dst (RShiftL dst shift));
9371   effect(KILL cr);
9372 
9373   format %{ "sarq    $dst, $shift" %}
9374   opcode(0xD3, 0x7); /* D3 /7 */
9375   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9376   ins_pipe(ialu_reg_reg);
9377 %}
9378 
9379 // Arithmetic Shift Right by variable
9380 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9381 %{
9382   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9383   effect(KILL cr);
9384 
9385   format %{ "sarq    $dst, $shift" %}
9386   opcode(0xD3, 0x7); /* D3 /7 */
9387   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9388   ins_pipe(ialu_mem_reg);
9389 %}
9390 
9391 // Logical shift right by one
9392 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9393 %{
9394   match(Set dst (URShiftL dst shift));
9395   effect(KILL cr);
9396 
9397   format %{ "shrq    $dst, $shift" %}
9398   opcode(0xD1, 0x5); /* D1 /5 */
9399   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9400   ins_pipe(ialu_reg);
9401 %}
9402 
9403 // Logical shift right by one
9404 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9405 %{
9406   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9407   effect(KILL cr);
9408 
9409   format %{ "shrq    $dst, $shift" %}
9410   opcode(0xD1, 0x5); /* D1 /5 */
9411   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9412   ins_pipe(ialu_mem_imm);
9413 %}
9414 
9415 // Logical Shift Right by 8-bit immediate
9416 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9417 %{
9418   match(Set dst (URShiftL dst shift));
9419   effect(KILL cr);
9420 
9421   format %{ "shrq    $dst, $shift" %}
9422   opcode(0xC1, 0x5); /* C1 /5 ib */
9423   ins_encode(reg_opc_imm_wide(dst, shift));
9424   ins_pipe(ialu_reg);
9425 %}
9426 
9427 
9428 // Logical Shift Right by 8-bit immediate
9429 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9430 %{
9431   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9432   effect(KILL cr);
9433 
9434   format %{ "shrq    $dst, $shift" %}
9435   opcode(0xC1, 0x5); /* C1 /5 ib */
9436   ins_encode(REX_mem_wide(dst), OpcP,
9437              RM_opc_mem(secondary, dst), Con8or32(shift));
9438   ins_pipe(ialu_mem_imm);
9439 %}
9440 
9441 // Logical Shift Right by variable
9442 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9443 %{
9444   match(Set dst (URShiftL dst shift));
9445   effect(KILL cr);
9446 
9447   format %{ "shrq    $dst, $shift" %}
9448   opcode(0xD3, 0x5); /* D3 /5 */
9449   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9450   ins_pipe(ialu_reg_reg);
9451 %}
9452 
9453 // Logical Shift Right by variable
9454 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9455 %{
9456   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9457   effect(KILL cr);
9458 
9459   format %{ "shrq    $dst, $shift" %}
9460   opcode(0xD3, 0x5); /* D3 /5 */
9461   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9462   ins_pipe(ialu_mem_reg);
9463 %}
9464 
9465 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9466 // This idiom is used by the compiler for the i2b bytecode.
9467 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9468 %{
9469   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9470 
9471   format %{ "movsbl  $dst, $src\t# i2b" %}
9472   opcode(0x0F, 0xBE);
9473   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9474   ins_pipe(ialu_reg_reg);
9475 %}
9476 
9477 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9478 // This idiom is used by the compiler the i2s bytecode.
9479 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9480 %{
9481   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9482 
9483   format %{ "movswl  $dst, $src\t# i2s" %}
9484   opcode(0x0F, 0xBF);
9485   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9486   ins_pipe(ialu_reg_reg);
9487 %}
9488 
9489 // ROL/ROR instructions
9490 
9491 // ROL expand
9492 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9493   effect(KILL cr, USE_DEF dst);
9494 
9495   format %{ "roll    $dst" %}
9496   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9497   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9498   ins_pipe(ialu_reg);
9499 %}
9500 
9501 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9502   effect(USE_DEF dst, USE shift, KILL cr);
9503 
9504   format %{ "roll    $dst, $shift" %}
9505   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9506   ins_encode( reg_opc_imm(dst, shift) );
9507   ins_pipe(ialu_reg);
9508 %}
9509 
9510 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9511 %{
9512   effect(USE_DEF dst, USE shift, KILL cr);
9513 
9514   format %{ "roll    $dst, $shift" %}
9515   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9516   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9517   ins_pipe(ialu_reg_reg);
9518 %}
9519 // end of ROL expand
9520 
9521 // Rotate Left by one
9522 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9523 %{
9524   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9525 
9526   expand %{
9527     rolI_rReg_imm1(dst, cr);
9528   %}
9529 %}
9530 
9531 // Rotate Left by 8-bit immediate
9532 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9533 %{
9534   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9535   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9536 
9537   expand %{
9538     rolI_rReg_imm8(dst, lshift, cr);
9539   %}
9540 %}
9541 
9542 // Rotate Left by variable
9543 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9544 %{
9545   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9546 
9547   expand %{
9548     rolI_rReg_CL(dst, shift, cr);
9549   %}
9550 %}
9551 
9552 // Rotate Left by variable
9553 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9554 %{
9555   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9556 
9557   expand %{
9558     rolI_rReg_CL(dst, shift, cr);
9559   %}
9560 %}
9561 
9562 // ROR expand
9563 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9564 %{
9565   effect(USE_DEF dst, KILL cr);
9566 
9567   format %{ "rorl    $dst" %}
9568   opcode(0xD1, 0x1); /* D1 /1 */
9569   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9570   ins_pipe(ialu_reg);
9571 %}
9572 
9573 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9574 %{
9575   effect(USE_DEF dst, USE shift, KILL cr);
9576 
9577   format %{ "rorl    $dst, $shift" %}
9578   opcode(0xC1, 0x1); /* C1 /1 ib */
9579   ins_encode(reg_opc_imm(dst, shift));
9580   ins_pipe(ialu_reg);
9581 %}
9582 
9583 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9584 %{
9585   effect(USE_DEF dst, USE shift, KILL cr);
9586 
9587   format %{ "rorl    $dst, $shift" %}
9588   opcode(0xD3, 0x1); /* D3 /1 */
9589   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9590   ins_pipe(ialu_reg_reg);
9591 %}
9592 // end of ROR expand
9593 
9594 // Rotate Right by one
9595 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9596 %{
9597   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9598 
9599   expand %{
9600     rorI_rReg_imm1(dst, cr);
9601   %}
9602 %}
9603 
9604 // Rotate Right by 8-bit immediate
9605 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9606 %{
9607   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9608   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9609 
9610   expand %{
9611     rorI_rReg_imm8(dst, rshift, cr);
9612   %}
9613 %}
9614 
9615 // Rotate Right by variable
9616 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9617 %{
9618   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9619 
9620   expand %{
9621     rorI_rReg_CL(dst, shift, cr);
9622   %}
9623 %}
9624 
9625 // Rotate Right by variable
9626 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9627 %{
9628   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9629 
9630   expand %{
9631     rorI_rReg_CL(dst, shift, cr);
9632   %}
9633 %}
9634 
9635 // for long rotate
9636 // ROL expand
9637 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9638   effect(USE_DEF dst, KILL cr);
9639 
9640   format %{ "rolq    $dst" %}
9641   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9642   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9643   ins_pipe(ialu_reg);
9644 %}
9645 
9646 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9647   effect(USE_DEF dst, USE shift, KILL cr);
9648 
9649   format %{ "rolq    $dst, $shift" %}
9650   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9651   ins_encode( reg_opc_imm_wide(dst, shift) );
9652   ins_pipe(ialu_reg);
9653 %}
9654 
9655 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9656 %{
9657   effect(USE_DEF dst, USE shift, KILL cr);
9658 
9659   format %{ "rolq    $dst, $shift" %}
9660   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9661   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9662   ins_pipe(ialu_reg_reg);
9663 %}
9664 // end of ROL expand
9665 
9666 // Rotate Left by one
9667 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9668 %{
9669   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9670 
9671   expand %{
9672     rolL_rReg_imm1(dst, cr);
9673   %}
9674 %}
9675 
9676 // Rotate Left by 8-bit immediate
9677 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9678 %{
9679   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9680   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9681 
9682   expand %{
9683     rolL_rReg_imm8(dst, lshift, cr);
9684   %}
9685 %}
9686 
9687 // Rotate Left by variable
9688 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9689 %{
9690   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9691 
9692   expand %{
9693     rolL_rReg_CL(dst, shift, cr);
9694   %}
9695 %}
9696 
9697 // Rotate Left by variable
9698 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9699 %{
9700   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9701 
9702   expand %{
9703     rolL_rReg_CL(dst, shift, cr);
9704   %}
9705 %}
9706 
9707 // ROR expand
9708 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9709 %{
9710   effect(USE_DEF dst, KILL cr);
9711 
9712   format %{ "rorq    $dst" %}
9713   opcode(0xD1, 0x1); /* D1 /1 */
9714   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9715   ins_pipe(ialu_reg);
9716 %}
9717 
9718 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9719 %{
9720   effect(USE_DEF dst, USE shift, KILL cr);
9721 
9722   format %{ "rorq    $dst, $shift" %}
9723   opcode(0xC1, 0x1); /* C1 /1 ib */
9724   ins_encode(reg_opc_imm_wide(dst, shift));
9725   ins_pipe(ialu_reg);
9726 %}
9727 
9728 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9729 %{
9730   effect(USE_DEF dst, USE shift, KILL cr);
9731 
9732   format %{ "rorq    $dst, $shift" %}
9733   opcode(0xD3, 0x1); /* D3 /1 */
9734   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9735   ins_pipe(ialu_reg_reg);
9736 %}
9737 // end of ROR expand
9738 
9739 // Rotate Right by one
9740 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9741 %{
9742   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9743 
9744   expand %{
9745     rorL_rReg_imm1(dst, cr);
9746   %}
9747 %}
9748 
9749 // Rotate Right by 8-bit immediate
9750 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9751 %{
9752   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9753   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9754 
9755   expand %{
9756     rorL_rReg_imm8(dst, rshift, cr);
9757   %}
9758 %}
9759 
9760 // Rotate Right by variable
9761 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9762 %{
9763   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9764 
9765   expand %{
9766     rorL_rReg_CL(dst, shift, cr);
9767   %}
9768 %}
9769 
9770 // Rotate Right by variable
9771 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9772 %{
9773   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9774 
9775   expand %{
9776     rorL_rReg_CL(dst, shift, cr);
9777   %}
9778 %}
9779 
9780 // Logical Instructions
9781 
9782 // Integer Logical Instructions
9783 
9784 // And Instructions
9785 // And Register with Register
9786 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9787 %{
9788   match(Set dst (AndI dst src));
9789   effect(KILL cr);
9790 
9791   format %{ "andl    $dst, $src\t# int" %}
9792   opcode(0x23);
9793   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9794   ins_pipe(ialu_reg_reg);
9795 %}
9796 
9797 // And Register with Immediate 255
9798 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9799 %{
9800   match(Set dst (AndI dst src));
9801 
9802   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9803   opcode(0x0F, 0xB6);
9804   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9805   ins_pipe(ialu_reg);
9806 %}
9807 
9808 // And Register with Immediate 255 and promote to long
9809 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9810 %{
9811   match(Set dst (ConvI2L (AndI src mask)));
9812 
9813   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9814   opcode(0x0F, 0xB6);
9815   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9816   ins_pipe(ialu_reg);
9817 %}
9818 
9819 // And Register with Immediate 65535
9820 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9821 %{
9822   match(Set dst (AndI dst src));
9823 
9824   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9825   opcode(0x0F, 0xB7);
9826   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9827   ins_pipe(ialu_reg);
9828 %}
9829 
9830 // And Register with Immediate 65535 and promote to long
9831 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9832 %{
9833   match(Set dst (ConvI2L (AndI src mask)));
9834 
9835   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9836   opcode(0x0F, 0xB7);
9837   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9838   ins_pipe(ialu_reg);
9839 %}
9840 
9841 // And Register with Immediate
9842 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9843 %{
9844   match(Set dst (AndI dst src));
9845   effect(KILL cr);
9846 
9847   format %{ "andl    $dst, $src\t# int" %}
9848   opcode(0x81, 0x04); /* Opcode 81 /4 */
9849   ins_encode(OpcSErm(dst, src), Con8or32(src));
9850   ins_pipe(ialu_reg);
9851 %}
9852 
9853 // And Register with Memory
9854 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9855 %{
9856   match(Set dst (AndI dst (LoadI src)));
9857   effect(KILL cr);
9858 
9859   ins_cost(125);
9860   format %{ "andl    $dst, $src\t# int" %}
9861   opcode(0x23);
9862   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9863   ins_pipe(ialu_reg_mem);
9864 %}
9865 
9866 // And Memory with Register
9867 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9868 %{
9869   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9870   effect(KILL cr);
9871 
9872   ins_cost(150);
9873   format %{ "andl    $dst, $src\t# int" %}
9874   opcode(0x21); /* Opcode 21 /r */
9875   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9876   ins_pipe(ialu_mem_reg);
9877 %}
9878 
9879 // And Memory with Immediate
9880 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9881 %{
9882   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9883   effect(KILL cr);
9884 
9885   ins_cost(125);
9886   format %{ "andl    $dst, $src\t# int" %}
9887   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9888   ins_encode(REX_mem(dst), OpcSE(src),
9889              RM_opc_mem(secondary, dst), Con8or32(src));
9890   ins_pipe(ialu_mem_imm);
9891 %}
9892 
9893 // Or Instructions
9894 // Or Register with Register
9895 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9896 %{
9897   match(Set dst (OrI dst src));
9898   effect(KILL cr);
9899 
9900   format %{ "orl     $dst, $src\t# int" %}
9901   opcode(0x0B);
9902   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9903   ins_pipe(ialu_reg_reg);
9904 %}
9905 
9906 // Or Register with Immediate
9907 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9908 %{
9909   match(Set dst (OrI dst src));
9910   effect(KILL cr);
9911 
9912   format %{ "orl     $dst, $src\t# int" %}
9913   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9914   ins_encode(OpcSErm(dst, src), Con8or32(src));
9915   ins_pipe(ialu_reg);
9916 %}
9917 
9918 // Or Register with Memory
9919 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9920 %{
9921   match(Set dst (OrI dst (LoadI src)));
9922   effect(KILL cr);
9923 
9924   ins_cost(125);
9925   format %{ "orl     $dst, $src\t# int" %}
9926   opcode(0x0B);
9927   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9928   ins_pipe(ialu_reg_mem);
9929 %}
9930 
9931 // Or Memory with Register
9932 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9933 %{
9934   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9935   effect(KILL cr);
9936 
9937   ins_cost(150);
9938   format %{ "orl     $dst, $src\t# int" %}
9939   opcode(0x09); /* Opcode 09 /r */
9940   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9941   ins_pipe(ialu_mem_reg);
9942 %}
9943 
9944 // Or Memory with Immediate
9945 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9946 %{
9947   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9948   effect(KILL cr);
9949 
9950   ins_cost(125);
9951   format %{ "orl     $dst, $src\t# int" %}
9952   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9953   ins_encode(REX_mem(dst), OpcSE(src),
9954              RM_opc_mem(secondary, dst), Con8or32(src));
9955   ins_pipe(ialu_mem_imm);
9956 %}
9957 
9958 // Xor Instructions
9959 // Xor Register with Register
9960 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9961 %{
9962   match(Set dst (XorI dst src));
9963   effect(KILL cr);
9964 
9965   format %{ "xorl    $dst, $src\t# int" %}
9966   opcode(0x33);
9967   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9968   ins_pipe(ialu_reg_reg);
9969 %}
9970 
9971 // Xor Register with Immediate -1
9972 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9973   match(Set dst (XorI dst imm));  
9974 
9975   format %{ "not    $dst" %}  
9976   ins_encode %{
9977      __ notl($dst$$Register);
9978   %}
9979   ins_pipe(ialu_reg);
9980 %}
9981 
9982 // Xor Register with Immediate
9983 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9984 %{
9985   match(Set dst (XorI dst src));
9986   effect(KILL cr);
9987 
9988   format %{ "xorl    $dst, $src\t# int" %}
9989   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9990   ins_encode(OpcSErm(dst, src), Con8or32(src));
9991   ins_pipe(ialu_reg);
9992 %}
9993 
9994 // Xor Register with Memory
9995 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9996 %{
9997   match(Set dst (XorI dst (LoadI src)));
9998   effect(KILL cr);
9999 
10000   ins_cost(125);
10001   format %{ "xorl    $dst, $src\t# int" %}
10002   opcode(0x33);
10003   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10004   ins_pipe(ialu_reg_mem);
10005 %}
10006 
10007 // Xor Memory with Register
10008 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10009 %{
10010   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10011   effect(KILL cr);
10012 
10013   ins_cost(150);
10014   format %{ "xorl    $dst, $src\t# int" %}
10015   opcode(0x31); /* Opcode 31 /r */
10016   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10017   ins_pipe(ialu_mem_reg);
10018 %}
10019 
10020 // Xor Memory with Immediate
10021 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10022 %{
10023   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10024   effect(KILL cr);
10025 
10026   ins_cost(125);
10027   format %{ "xorl    $dst, $src\t# int" %}
10028   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10029   ins_encode(REX_mem(dst), OpcSE(src),
10030              RM_opc_mem(secondary, dst), Con8or32(src));
10031   ins_pipe(ialu_mem_imm);
10032 %}
10033 
10034 
10035 // Long Logical Instructions
10036 
10037 // And Instructions
10038 // And Register with Register
10039 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10040 %{
10041   match(Set dst (AndL dst src));
10042   effect(KILL cr);
10043 
10044   format %{ "andq    $dst, $src\t# long" %}
10045   opcode(0x23);
10046   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10047   ins_pipe(ialu_reg_reg);
10048 %}
10049 
10050 // And Register with Immediate 255
10051 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10052 %{
10053   match(Set dst (AndL dst src));
10054 
10055   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10056   opcode(0x0F, 0xB6);
10057   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10058   ins_pipe(ialu_reg);
10059 %}
10060 
10061 // And Register with Immediate 65535
10062 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10063 %{
10064   match(Set dst (AndL dst src));
10065 
10066   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10067   opcode(0x0F, 0xB7);
10068   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10069   ins_pipe(ialu_reg);
10070 %}
10071 
10072 // And Register with Immediate
10073 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10074 %{
10075   match(Set dst (AndL dst src));
10076   effect(KILL cr);
10077 
10078   format %{ "andq    $dst, $src\t# long" %}
10079   opcode(0x81, 0x04); /* Opcode 81 /4 */
10080   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10081   ins_pipe(ialu_reg);
10082 %}
10083 
10084 // And Register with Memory
10085 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10086 %{
10087   match(Set dst (AndL dst (LoadL src)));
10088   effect(KILL cr);
10089 
10090   ins_cost(125);
10091   format %{ "andq    $dst, $src\t# long" %}
10092   opcode(0x23);
10093   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10094   ins_pipe(ialu_reg_mem);
10095 %}
10096 
10097 // And Memory with Register
10098 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10099 %{
10100   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10101   effect(KILL cr);
10102 
10103   ins_cost(150);
10104   format %{ "andq    $dst, $src\t# long" %}
10105   opcode(0x21); /* Opcode 21 /r */
10106   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10107   ins_pipe(ialu_mem_reg);
10108 %}
10109 
10110 // And Memory with Immediate
10111 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10112 %{
10113   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10114   effect(KILL cr);
10115 
10116   ins_cost(125);
10117   format %{ "andq    $dst, $src\t# long" %}
10118   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10119   ins_encode(REX_mem_wide(dst), OpcSE(src),
10120              RM_opc_mem(secondary, dst), Con8or32(src));
10121   ins_pipe(ialu_mem_imm);
10122 %}
10123 
10124 // Or Instructions
10125 // Or Register with Register
10126 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10127 %{
10128   match(Set dst (OrL dst src));
10129   effect(KILL cr);
10130 
10131   format %{ "orq     $dst, $src\t# long" %}
10132   opcode(0x0B);
10133   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10134   ins_pipe(ialu_reg_reg);
10135 %}
10136 
10137 // Use any_RegP to match R15 (TLS register) without spilling.
10138 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10139   match(Set dst (OrL dst (CastP2X src)));
10140   effect(KILL cr);
10141 
10142   format %{ "orq     $dst, $src\t# long" %}
10143   opcode(0x0B);
10144   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10145   ins_pipe(ialu_reg_reg);
10146 %}
10147 
10148 
10149 // Or Register with Immediate
10150 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10151 %{
10152   match(Set dst (OrL dst src));
10153   effect(KILL cr);
10154 
10155   format %{ "orq     $dst, $src\t# long" %}
10156   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10157   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10158   ins_pipe(ialu_reg);
10159 %}
10160 
10161 // Or Register with Memory
10162 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10163 %{
10164   match(Set dst (OrL dst (LoadL src)));
10165   effect(KILL cr);
10166 
10167   ins_cost(125);
10168   format %{ "orq     $dst, $src\t# long" %}
10169   opcode(0x0B);
10170   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10171   ins_pipe(ialu_reg_mem);
10172 %}
10173 
10174 // Or Memory with Register
10175 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10176 %{
10177   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10178   effect(KILL cr);
10179 
10180   ins_cost(150);
10181   format %{ "orq     $dst, $src\t# long" %}
10182   opcode(0x09); /* Opcode 09 /r */
10183   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10184   ins_pipe(ialu_mem_reg);
10185 %}
10186 
10187 // Or Memory with Immediate
10188 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10189 %{
10190   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10191   effect(KILL cr);
10192 
10193   ins_cost(125);
10194   format %{ "orq     $dst, $src\t# long" %}
10195   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10196   ins_encode(REX_mem_wide(dst), OpcSE(src),
10197              RM_opc_mem(secondary, dst), Con8or32(src));
10198   ins_pipe(ialu_mem_imm);
10199 %}
10200 
10201 // Xor Instructions
10202 // Xor Register with Register
10203 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10204 %{
10205   match(Set dst (XorL dst src));
10206   effect(KILL cr);
10207 
10208   format %{ "xorq    $dst, $src\t# long" %}
10209   opcode(0x33);
10210   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10211   ins_pipe(ialu_reg_reg);
10212 %}
10213 
10214 // Xor Register with Immediate -1
10215 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10216   match(Set dst (XorL dst imm));  
10217 
10218   format %{ "notq   $dst" %}  
10219   ins_encode %{
10220      __ notq($dst$$Register);
10221   %}
10222   ins_pipe(ialu_reg);
10223 %}
10224 
10225 // Xor Register with Immediate
10226 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10227 %{
10228   match(Set dst (XorL dst src));
10229   effect(KILL cr);
10230 
10231   format %{ "xorq    $dst, $src\t# long" %}
10232   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10233   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10234   ins_pipe(ialu_reg);
10235 %}
10236 
10237 // Xor Register with Memory
10238 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10239 %{
10240   match(Set dst (XorL dst (LoadL src)));
10241   effect(KILL cr);
10242 
10243   ins_cost(125);
10244   format %{ "xorq    $dst, $src\t# long" %}
10245   opcode(0x33);
10246   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10247   ins_pipe(ialu_reg_mem);
10248 %}
10249 
10250 // Xor Memory with Register
10251 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10252 %{
10253   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10254   effect(KILL cr);
10255 
10256   ins_cost(150);
10257   format %{ "xorq    $dst, $src\t# long" %}
10258   opcode(0x31); /* Opcode 31 /r */
10259   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10260   ins_pipe(ialu_mem_reg);
10261 %}
10262 
10263 // Xor Memory with Immediate
10264 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10265 %{
10266   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10267   effect(KILL cr);
10268 
10269   ins_cost(125);
10270   format %{ "xorq    $dst, $src\t# long" %}
10271   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10272   ins_encode(REX_mem_wide(dst), OpcSE(src),
10273              RM_opc_mem(secondary, dst), Con8or32(src));
10274   ins_pipe(ialu_mem_imm);
10275 %}
10276 
10277 // Convert Int to Boolean
10278 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10279 %{
10280   match(Set dst (Conv2B src));
10281   effect(KILL cr);
10282 
10283   format %{ "testl   $src, $src\t# ci2b\n\t"
10284             "setnz   $dst\n\t"
10285             "movzbl  $dst, $dst" %}
10286   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10287              setNZ_reg(dst),
10288              REX_reg_breg(dst, dst), // movzbl
10289              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10290   ins_pipe(pipe_slow); // XXX
10291 %}
10292 
10293 // Convert Pointer to Boolean
10294 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10295 %{
10296   match(Set dst (Conv2B src));
10297   effect(KILL cr);
10298 
10299   format %{ "testq   $src, $src\t# cp2b\n\t"
10300             "setnz   $dst\n\t"
10301             "movzbl  $dst, $dst" %}
10302   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10303              setNZ_reg(dst),
10304              REX_reg_breg(dst, dst), // movzbl
10305              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10306   ins_pipe(pipe_slow); // XXX
10307 %}
10308 
10309 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10310 %{
10311   match(Set dst (CmpLTMask p q));
10312   effect(KILL cr);
10313 
10314   ins_cost(400); // XXX
10315   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10316             "setlt   $dst\n\t"
10317             "movzbl  $dst, $dst\n\t"
10318             "negl    $dst" %}
10319   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10320              setLT_reg(dst),
10321              REX_reg_breg(dst, dst), // movzbl
10322              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10323              neg_reg(dst));
10324   ins_pipe(pipe_slow);
10325 %}
10326 
10327 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10328 %{
10329   match(Set dst (CmpLTMask dst zero));
10330   effect(KILL cr);
10331 
10332   ins_cost(100); // XXX
10333   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10334   opcode(0xC1, 0x7);  /* C1 /7 ib */
10335   ins_encode(reg_opc_imm(dst, 0x1F));
10336   ins_pipe(ialu_reg);
10337 %}
10338 
10339 
10340 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10341                          rRegI tmp,
10342                          rFlagsReg cr)
10343 %{
10344   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10345   effect(TEMP tmp, KILL cr);
10346 
10347   ins_cost(400); // XXX
10348   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10349             "sbbl    $tmp, $tmp\n\t"
10350             "andl    $tmp, $y\n\t"
10351             "addl    $p, $tmp" %}
10352   ins_encode(enc_cmpLTP(p, q, y, tmp));
10353   ins_pipe(pipe_cmplt);
10354 %}
10355 
10356 /* If I enable this, I encourage spilling in the inner loop of compress.
10357 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10358 %{
10359   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10360   effect( TEMP tmp, KILL cr );
10361   ins_cost(400);
10362 
10363   format %{ "SUB    $p,$q\n\t"
10364             "SBB    RCX,RCX\n\t"
10365             "AND    RCX,$y\n\t"
10366             "ADD    $p,RCX" %}
10367   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10368 %}
10369 */
10370 
10371 //---------- FP Instructions------------------------------------------------
10372 
10373 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10374 %{
10375   match(Set cr (CmpF src1 src2));
10376 
10377   ins_cost(145);
10378   format %{ "ucomiss $src1, $src2\n\t"
10379             "jnp,s   exit\n\t"
10380             "pushfq\t# saw NaN, set CF\n\t"
10381             "andq    [rsp], #0xffffff2b\n\t"
10382             "popfq\n"
10383     "exit:   nop\t# avoid branch to branch" %}
10384   opcode(0x0F, 0x2E);
10385   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10386              cmpfp_fixup);
10387   ins_pipe(pipe_slow);
10388 %}
10389 
10390 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10391   match(Set cr (CmpF src1 src2));
10392 
10393   ins_cost(145);
10394   format %{ "ucomiss $src1, $src2" %}
10395   ins_encode %{
10396     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10397   %}
10398   ins_pipe(pipe_slow);
10399 %}
10400 
10401 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10402 %{
10403   match(Set cr (CmpF src1 (LoadF src2)));
10404 
10405   ins_cost(145);
10406   format %{ "ucomiss $src1, $src2\n\t"
10407             "jnp,s   exit\n\t"
10408             "pushfq\t# saw NaN, set CF\n\t"
10409             "andq    [rsp], #0xffffff2b\n\t"
10410             "popfq\n"
10411     "exit:   nop\t# avoid branch to branch" %}
10412   opcode(0x0F, 0x2E);
10413   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10414              cmpfp_fixup);
10415   ins_pipe(pipe_slow);
10416 %}
10417 
10418 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10419   match(Set cr (CmpF src1 (LoadF src2)));
10420 
10421   ins_cost(100);
10422   format %{ "ucomiss $src1, $src2" %}
10423   opcode(0x0F, 0x2E);
10424   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10425   ins_pipe(pipe_slow);
10426 %}
10427 
10428 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10429 %{
10430   match(Set cr (CmpF src1 src2));
10431 
10432   ins_cost(145);
10433   format %{ "ucomiss $src1, $src2\n\t"
10434             "jnp,s   exit\n\t"
10435             "pushfq\t# saw NaN, set CF\n\t"
10436             "andq    [rsp], #0xffffff2b\n\t"
10437             "popfq\n"
10438     "exit:   nop\t# avoid branch to branch" %}
10439   opcode(0x0F, 0x2E);
10440   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10441              cmpfp_fixup);
10442   ins_pipe(pipe_slow);
10443 %}
10444 
10445 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10446   match(Set cr (CmpF src1 src2));
10447 
10448   ins_cost(100);
10449   format %{ "ucomiss $src1, $src2" %}
10450   opcode(0x0F, 0x2E);
10451   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10452   ins_pipe(pipe_slow);
10453 %}
10454 
10455 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10456 %{
10457   match(Set cr (CmpD src1 src2));
10458 
10459   ins_cost(145);
10460   format %{ "ucomisd $src1, $src2\n\t"
10461             "jnp,s   exit\n\t"
10462             "pushfq\t# saw NaN, set CF\n\t"
10463             "andq    [rsp], #0xffffff2b\n\t"
10464             "popfq\n"
10465     "exit:   nop\t# avoid branch to branch" %}
10466   opcode(0x66, 0x0F, 0x2E);
10467   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10468              cmpfp_fixup);
10469   ins_pipe(pipe_slow);
10470 %}
10471 
10472 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10473   match(Set cr (CmpD src1 src2));
10474 
10475   ins_cost(100);
10476   format %{ "ucomisd $src1, $src2 test" %}
10477   ins_encode %{
10478     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10479   %}
10480   ins_pipe(pipe_slow);
10481 %}
10482 
10483 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10484 %{
10485   match(Set cr (CmpD src1 (LoadD src2)));
10486 
10487   ins_cost(145);
10488   format %{ "ucomisd $src1, $src2\n\t"
10489             "jnp,s   exit\n\t"
10490             "pushfq\t# saw NaN, set CF\n\t"
10491             "andq    [rsp], #0xffffff2b\n\t"
10492             "popfq\n"
10493     "exit:   nop\t# avoid branch to branch" %}
10494   opcode(0x66, 0x0F, 0x2E);
10495   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10496              cmpfp_fixup);
10497   ins_pipe(pipe_slow);
10498 %}
10499 
10500 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10501   match(Set cr (CmpD src1 (LoadD src2)));
10502 
10503   ins_cost(100);
10504   format %{ "ucomisd $src1, $src2" %}
10505   opcode(0x66, 0x0F, 0x2E);
10506   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10507   ins_pipe(pipe_slow);
10508 %}
10509 
10510 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10511 %{
10512   match(Set cr (CmpD src1 src2));
10513 
10514   ins_cost(145);
10515   format %{ "ucomisd $src1, [$src2]\n\t"
10516             "jnp,s   exit\n\t"
10517             "pushfq\t# saw NaN, set CF\n\t"
10518             "andq    [rsp], #0xffffff2b\n\t"
10519             "popfq\n"
10520     "exit:   nop\t# avoid branch to branch" %}
10521   opcode(0x66, 0x0F, 0x2E);
10522   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10523              cmpfp_fixup);
10524   ins_pipe(pipe_slow);
10525 %}
10526 
10527 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10528   match(Set cr (CmpD src1 src2));
10529 
10530   ins_cost(100);
10531   format %{ "ucomisd $src1, [$src2]" %}
10532   opcode(0x66, 0x0F, 0x2E);
10533   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10534   ins_pipe(pipe_slow);
10535 %}
10536 
10537 // Compare into -1,0,1
10538 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10539 %{
10540   match(Set dst (CmpF3 src1 src2));
10541   effect(KILL cr);
10542 
10543   ins_cost(275);
10544   format %{ "ucomiss $src1, $src2\n\t"
10545             "movl    $dst, #-1\n\t"
10546             "jp,s    done\n\t"
10547             "jb,s    done\n\t"
10548             "setne   $dst\n\t"
10549             "movzbl  $dst, $dst\n"
10550     "done:" %}
10551 
10552   opcode(0x0F, 0x2E);
10553   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10554              cmpfp3(dst));
10555   ins_pipe(pipe_slow);
10556 %}
10557 
10558 // Compare into -1,0,1
10559 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10560 %{
10561   match(Set dst (CmpF3 src1 (LoadF src2)));
10562   effect(KILL cr);
10563 
10564   ins_cost(275);
10565   format %{ "ucomiss $src1, $src2\n\t"
10566             "movl    $dst, #-1\n\t"
10567             "jp,s    done\n\t"
10568             "jb,s    done\n\t"
10569             "setne   $dst\n\t"
10570             "movzbl  $dst, $dst\n"
10571     "done:" %}
10572 
10573   opcode(0x0F, 0x2E);
10574   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10575              cmpfp3(dst));
10576   ins_pipe(pipe_slow);
10577 %}
10578 
10579 // Compare into -1,0,1
10580 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10581 %{
10582   match(Set dst (CmpF3 src1 src2));
10583   effect(KILL cr);
10584 
10585   ins_cost(275);
10586   format %{ "ucomiss $src1, [$src2]\n\t"
10587             "movl    $dst, #-1\n\t"
10588             "jp,s    done\n\t"
10589             "jb,s    done\n\t"
10590             "setne   $dst\n\t"
10591             "movzbl  $dst, $dst\n"
10592     "done:" %}
10593 
10594   opcode(0x0F, 0x2E);
10595   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10596              cmpfp3(dst));
10597   ins_pipe(pipe_slow);
10598 %}
10599 
10600 // Compare into -1,0,1
10601 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10602 %{
10603   match(Set dst (CmpD3 src1 src2));
10604   effect(KILL cr);
10605 
10606   ins_cost(275);
10607   format %{ "ucomisd $src1, $src2\n\t"
10608             "movl    $dst, #-1\n\t"
10609             "jp,s    done\n\t"
10610             "jb,s    done\n\t"
10611             "setne   $dst\n\t"
10612             "movzbl  $dst, $dst\n"
10613     "done:" %}
10614 
10615   opcode(0x66, 0x0F, 0x2E);
10616   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10617              cmpfp3(dst));
10618   ins_pipe(pipe_slow);
10619 %}
10620 
10621 // Compare into -1,0,1
10622 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10623 %{
10624   match(Set dst (CmpD3 src1 (LoadD src2)));
10625   effect(KILL cr);
10626 
10627   ins_cost(275);
10628   format %{ "ucomisd $src1, $src2\n\t"
10629             "movl    $dst, #-1\n\t"
10630             "jp,s    done\n\t"
10631             "jb,s    done\n\t"
10632             "setne   $dst\n\t"
10633             "movzbl  $dst, $dst\n"
10634     "done:" %}
10635 
10636   opcode(0x66, 0x0F, 0x2E);
10637   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10638              cmpfp3(dst));
10639   ins_pipe(pipe_slow);
10640 %}
10641 
10642 // Compare into -1,0,1
10643 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10644 %{
10645   match(Set dst (CmpD3 src1 src2));
10646   effect(KILL cr);
10647 
10648   ins_cost(275);
10649   format %{ "ucomisd $src1, [$src2]\n\t"
10650             "movl    $dst, #-1\n\t"
10651             "jp,s    done\n\t"
10652             "jb,s    done\n\t"
10653             "setne   $dst\n\t"
10654             "movzbl  $dst, $dst\n"
10655     "done:" %}
10656 
10657   opcode(0x66, 0x0F, 0x2E);
10658   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10659              cmpfp3(dst));
10660   ins_pipe(pipe_slow);
10661 %}
10662 
10663 instruct addF_reg(regF dst, regF src)
10664 %{
10665   match(Set dst (AddF dst src));
10666 
10667   format %{ "addss   $dst, $src" %}
10668   ins_cost(150); // XXX
10669   opcode(0xF3, 0x0F, 0x58);
10670   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10671   ins_pipe(pipe_slow);
10672 %}
10673 
10674 instruct addF_mem(regF dst, memory src)
10675 %{
10676   match(Set dst (AddF dst (LoadF src)));
10677 
10678   format %{ "addss   $dst, $src" %}
10679   ins_cost(150); // XXX
10680   opcode(0xF3, 0x0F, 0x58);
10681   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10682   ins_pipe(pipe_slow);
10683 %}
10684 
10685 instruct addF_imm(regF dst, immF src)
10686 %{
10687   match(Set dst (AddF dst src));
10688 
10689   format %{ "addss   $dst, [$src]" %}
10690   ins_cost(150); // XXX
10691   opcode(0xF3, 0x0F, 0x58);
10692   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10693   ins_pipe(pipe_slow);
10694 %}
10695 
10696 instruct addD_reg(regD dst, regD src)
10697 %{
10698   match(Set dst (AddD dst src));
10699 
10700   format %{ "addsd   $dst, $src" %}
10701   ins_cost(150); // XXX
10702   opcode(0xF2, 0x0F, 0x58);
10703   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10704   ins_pipe(pipe_slow);
10705 %}
10706 
10707 instruct addD_mem(regD dst, memory src)
10708 %{
10709   match(Set dst (AddD dst (LoadD src)));
10710 
10711   format %{ "addsd   $dst, $src" %}
10712   ins_cost(150); // XXX
10713   opcode(0xF2, 0x0F, 0x58);
10714   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10715   ins_pipe(pipe_slow);
10716 %}
10717 
10718 instruct addD_imm(regD dst, immD src)
10719 %{
10720   match(Set dst (AddD dst src));
10721 
10722   format %{ "addsd   $dst, [$src]" %}
10723   ins_cost(150); // XXX
10724   opcode(0xF2, 0x0F, 0x58);
10725   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10726   ins_pipe(pipe_slow);
10727 %}
10728 
10729 instruct subF_reg(regF dst, regF src)
10730 %{
10731   match(Set dst (SubF dst src));
10732 
10733   format %{ "subss   $dst, $src" %}
10734   ins_cost(150); // XXX
10735   opcode(0xF3, 0x0F, 0x5C);
10736   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10737   ins_pipe(pipe_slow);
10738 %}
10739 
10740 instruct subF_mem(regF dst, memory src)
10741 %{
10742   match(Set dst (SubF dst (LoadF src)));
10743 
10744   format %{ "subss   $dst, $src" %}
10745   ins_cost(150); // XXX
10746   opcode(0xF3, 0x0F, 0x5C);
10747   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10748   ins_pipe(pipe_slow);
10749 %}
10750 
10751 instruct subF_imm(regF dst, immF src)
10752 %{
10753   match(Set dst (SubF dst src));
10754 
10755   format %{ "subss   $dst, [$src]" %}
10756   ins_cost(150); // XXX
10757   opcode(0xF3, 0x0F, 0x5C);
10758   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10759   ins_pipe(pipe_slow);
10760 %}
10761 
10762 instruct subD_reg(regD dst, regD src)
10763 %{
10764   match(Set dst (SubD dst src));
10765 
10766   format %{ "subsd   $dst, $src" %}
10767   ins_cost(150); // XXX
10768   opcode(0xF2, 0x0F, 0x5C);
10769   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10770   ins_pipe(pipe_slow);
10771 %}
10772 
10773 instruct subD_mem(regD dst, memory src)
10774 %{
10775   match(Set dst (SubD dst (LoadD src)));
10776 
10777   format %{ "subsd   $dst, $src" %}
10778   ins_cost(150); // XXX
10779   opcode(0xF2, 0x0F, 0x5C);
10780   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10781   ins_pipe(pipe_slow);
10782 %}
10783 
10784 instruct subD_imm(regD dst, immD src)
10785 %{
10786   match(Set dst (SubD dst src));
10787 
10788   format %{ "subsd   $dst, [$src]" %}
10789   ins_cost(150); // XXX
10790   opcode(0xF2, 0x0F, 0x5C);
10791   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10792   ins_pipe(pipe_slow);
10793 %}
10794 
10795 instruct mulF_reg(regF dst, regF src)
10796 %{
10797   match(Set dst (MulF dst src));
10798 
10799   format %{ "mulss   $dst, $src" %}
10800   ins_cost(150); // XXX
10801   opcode(0xF3, 0x0F, 0x59);
10802   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10803   ins_pipe(pipe_slow);
10804 %}
10805 
10806 instruct mulF_mem(regF dst, memory src)
10807 %{
10808   match(Set dst (MulF dst (LoadF src)));
10809 
10810   format %{ "mulss   $dst, $src" %}
10811   ins_cost(150); // XXX
10812   opcode(0xF3, 0x0F, 0x59);
10813   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10814   ins_pipe(pipe_slow);
10815 %}
10816 
10817 instruct mulF_imm(regF dst, immF src)
10818 %{
10819   match(Set dst (MulF dst src));
10820 
10821   format %{ "mulss   $dst, [$src]" %}
10822   ins_cost(150); // XXX
10823   opcode(0xF3, 0x0F, 0x59);
10824   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10825   ins_pipe(pipe_slow);
10826 %}
10827 
10828 instruct mulD_reg(regD dst, regD src)
10829 %{
10830   match(Set dst (MulD dst src));
10831 
10832   format %{ "mulsd   $dst, $src" %}
10833   ins_cost(150); // XXX
10834   opcode(0xF2, 0x0F, 0x59);
10835   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10836   ins_pipe(pipe_slow);
10837 %}
10838 
10839 instruct mulD_mem(regD dst, memory src)
10840 %{
10841   match(Set dst (MulD dst (LoadD src)));
10842 
10843   format %{ "mulsd   $dst, $src" %}
10844   ins_cost(150); // XXX
10845   opcode(0xF2, 0x0F, 0x59);
10846   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10847   ins_pipe(pipe_slow);
10848 %}
10849 
10850 instruct mulD_imm(regD dst, immD src)
10851 %{
10852   match(Set dst (MulD dst src));
10853 
10854   format %{ "mulsd   $dst, [$src]" %}
10855   ins_cost(150); // XXX
10856   opcode(0xF2, 0x0F, 0x59);
10857   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10858   ins_pipe(pipe_slow);
10859 %}
10860 
10861 instruct divF_reg(regF dst, regF src)
10862 %{
10863   match(Set dst (DivF dst src));
10864 
10865   format %{ "divss   $dst, $src" %}
10866   ins_cost(150); // XXX
10867   opcode(0xF3, 0x0F, 0x5E);
10868   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10869   ins_pipe(pipe_slow);
10870 %}
10871 
10872 instruct divF_mem(regF dst, memory src)
10873 %{
10874   match(Set dst (DivF dst (LoadF src)));
10875 
10876   format %{ "divss   $dst, $src" %}
10877   ins_cost(150); // XXX
10878   opcode(0xF3, 0x0F, 0x5E);
10879   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10880   ins_pipe(pipe_slow);
10881 %}
10882 
10883 instruct divF_imm(regF dst, immF src)
10884 %{
10885   match(Set dst (DivF dst src));
10886 
10887   format %{ "divss   $dst, [$src]" %}
10888   ins_cost(150); // XXX
10889   opcode(0xF3, 0x0F, 0x5E);
10890   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10891   ins_pipe(pipe_slow);
10892 %}
10893 
10894 instruct divD_reg(regD dst, regD src)
10895 %{
10896   match(Set dst (DivD dst src));
10897 
10898   format %{ "divsd   $dst, $src" %}
10899   ins_cost(150); // XXX
10900   opcode(0xF2, 0x0F, 0x5E);
10901   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10902   ins_pipe(pipe_slow);
10903 %}
10904 
10905 instruct divD_mem(regD dst, memory src)
10906 %{
10907   match(Set dst (DivD dst (LoadD src)));
10908 
10909   format %{ "divsd   $dst, $src" %}
10910   ins_cost(150); // XXX
10911   opcode(0xF2, 0x0F, 0x5E);
10912   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10913   ins_pipe(pipe_slow);
10914 %}
10915 
10916 instruct divD_imm(regD dst, immD src)
10917 %{
10918   match(Set dst (DivD dst src));
10919 
10920   format %{ "divsd   $dst, [$src]" %}
10921   ins_cost(150); // XXX
10922   opcode(0xF2, 0x0F, 0x5E);
10923   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10924   ins_pipe(pipe_slow);
10925 %}
10926 
10927 instruct sqrtF_reg(regF dst, regF src)
10928 %{
10929   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10930 
10931   format %{ "sqrtss  $dst, $src" %}
10932   ins_cost(150); // XXX
10933   opcode(0xF3, 0x0F, 0x51);
10934   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10935   ins_pipe(pipe_slow);
10936 %}
10937 
10938 instruct sqrtF_mem(regF dst, memory src)
10939 %{
10940   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10941 
10942   format %{ "sqrtss  $dst, $src" %}
10943   ins_cost(150); // XXX
10944   opcode(0xF3, 0x0F, 0x51);
10945   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10946   ins_pipe(pipe_slow);
10947 %}
10948 
10949 instruct sqrtF_imm(regF dst, immF src)
10950 %{
10951   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10952 
10953   format %{ "sqrtss  $dst, [$src]" %}
10954   ins_cost(150); // XXX
10955   opcode(0xF3, 0x0F, 0x51);
10956   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10957   ins_pipe(pipe_slow);
10958 %}
10959 
10960 instruct sqrtD_reg(regD dst, regD src)
10961 %{
10962   match(Set dst (SqrtD src));
10963 
10964   format %{ "sqrtsd  $dst, $src" %}
10965   ins_cost(150); // XXX
10966   opcode(0xF2, 0x0F, 0x51);
10967   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10968   ins_pipe(pipe_slow);
10969 %}
10970 
10971 instruct sqrtD_mem(regD dst, memory src)
10972 %{
10973   match(Set dst (SqrtD (LoadD src)));
10974 
10975   format %{ "sqrtsd  $dst, $src" %}
10976   ins_cost(150); // XXX
10977   opcode(0xF2, 0x0F, 0x51);
10978   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10979   ins_pipe(pipe_slow);
10980 %}
10981 
10982 instruct sqrtD_imm(regD dst, immD src)
10983 %{
10984   match(Set dst (SqrtD src));
10985 
10986   format %{ "sqrtsd  $dst, [$src]" %}
10987   ins_cost(150); // XXX
10988   opcode(0xF2, 0x0F, 0x51);
10989   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10990   ins_pipe(pipe_slow);
10991 %}
10992 
10993 instruct absF_reg(regF dst)
10994 %{
10995   match(Set dst (AbsF dst));
10996 
10997   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10998   ins_encode(absF_encoding(dst));
10999   ins_pipe(pipe_slow);
11000 %}
11001 
11002 instruct absD_reg(regD dst)
11003 %{
11004   match(Set dst (AbsD dst));
11005 
11006   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11007             "# abs double by sign masking" %}
11008   ins_encode(absD_encoding(dst));
11009   ins_pipe(pipe_slow);
11010 %}
11011 
11012 instruct negF_reg(regF dst)
11013 %{
11014   match(Set dst (NegF dst));
11015 
11016   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11017   ins_encode(negF_encoding(dst));
11018   ins_pipe(pipe_slow);
11019 %}
11020 
11021 instruct negD_reg(regD dst)
11022 %{
11023   match(Set dst (NegD dst));
11024 
11025   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11026             "# neg double by sign flipping" %}
11027   ins_encode(negD_encoding(dst));
11028   ins_pipe(pipe_slow);
11029 %}
11030 
11031 // -----------Trig and Trancendental Instructions------------------------------
11032 instruct cosD_reg(regD dst) %{
11033   match(Set dst (CosD dst));
11034 
11035   format %{ "dcos   $dst\n\t" %}
11036   opcode(0xD9, 0xFF);
11037   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11038   ins_pipe( pipe_slow );
11039 %}
11040 
11041 instruct sinD_reg(regD dst) %{
11042   match(Set dst (SinD dst));
11043 
11044   format %{ "dsin   $dst\n\t" %}
11045   opcode(0xD9, 0xFE);
11046   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11047   ins_pipe( pipe_slow );
11048 %}
11049 
11050 instruct tanD_reg(regD dst) %{
11051   match(Set dst (TanD dst));
11052 
11053   format %{ "dtan   $dst\n\t" %}
11054   ins_encode( Push_SrcXD(dst),
11055               Opcode(0xD9), Opcode(0xF2),   //fptan
11056               Opcode(0xDD), Opcode(0xD8),   //fstp st
11057               Push_ResultXD(dst) );
11058   ins_pipe( pipe_slow );
11059 %}
11060 
11061 instruct log10D_reg(regD dst) %{
11062   // The source and result Double operands in XMM registers
11063   match(Set dst (Log10D dst));
11064   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11065   // fyl2x        ; compute log_10(2) * log_2(x)
11066   format %{ "fldlg2\t\t\t#Log10\n\t"
11067             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11068          %}
11069    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11070               Push_SrcXD(dst),
11071               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11072               Push_ResultXD(dst));
11073 
11074   ins_pipe( pipe_slow );
11075 %}
11076 
11077 instruct logD_reg(regD dst) %{
11078   // The source and result Double operands in XMM registers
11079   match(Set dst (LogD dst));
11080   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11081   // fyl2x        ; compute log_e(2) * log_2(x)
11082   format %{ "fldln2\t\t\t#Log_e\n\t"
11083             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11084          %}
11085   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11086               Push_SrcXD(dst),
11087               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11088               Push_ResultXD(dst));
11089   ins_pipe( pipe_slow );
11090 %}
11091 
11092 
11093 
11094 //----------Arithmetic Conversion Instructions---------------------------------
11095 
11096 instruct roundFloat_nop(regF dst)
11097 %{
11098   match(Set dst (RoundFloat dst));
11099 
11100   ins_cost(0);
11101   ins_encode();
11102   ins_pipe(empty);
11103 %}
11104 
11105 instruct roundDouble_nop(regD dst)
11106 %{
11107   match(Set dst (RoundDouble dst));
11108 
11109   ins_cost(0);
11110   ins_encode();
11111   ins_pipe(empty);
11112 %}
11113 
11114 instruct convF2D_reg_reg(regD dst, regF src)
11115 %{
11116   match(Set dst (ConvF2D src));
11117 
11118   format %{ "cvtss2sd $dst, $src" %}
11119   opcode(0xF3, 0x0F, 0x5A);
11120   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11121   ins_pipe(pipe_slow); // XXX
11122 %}
11123 
11124 instruct convF2D_reg_mem(regD dst, memory src)
11125 %{
11126   match(Set dst (ConvF2D (LoadF src)));
11127 
11128   format %{ "cvtss2sd $dst, $src" %}
11129   opcode(0xF3, 0x0F, 0x5A);
11130   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11131   ins_pipe(pipe_slow); // XXX
11132 %}
11133 
11134 instruct convD2F_reg_reg(regF dst, regD src)
11135 %{
11136   match(Set dst (ConvD2F src));
11137 
11138   format %{ "cvtsd2ss $dst, $src" %}
11139   opcode(0xF2, 0x0F, 0x5A);
11140   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11141   ins_pipe(pipe_slow); // XXX
11142 %}
11143 
11144 instruct convD2F_reg_mem(regF dst, memory src)
11145 %{
11146   match(Set dst (ConvD2F (LoadD src)));
11147 
11148   format %{ "cvtsd2ss $dst, $src" %}
11149   opcode(0xF2, 0x0F, 0x5A);
11150   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11151   ins_pipe(pipe_slow); // XXX
11152 %}
11153 
11154 // XXX do mem variants
11155 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11156 %{
11157   match(Set dst (ConvF2I src));
11158   effect(KILL cr);
11159 
11160   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11161             "cmpl    $dst, #0x80000000\n\t"
11162             "jne,s   done\n\t"
11163             "subq    rsp, #8\n\t"
11164             "movss   [rsp], $src\n\t"
11165             "call    f2i_fixup\n\t"
11166             "popq    $dst\n"
11167     "done:   "%}
11168   opcode(0xF3, 0x0F, 0x2C);
11169   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11170              f2i_fixup(dst, src));
11171   ins_pipe(pipe_slow);
11172 %}
11173 
11174 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11175 %{
11176   match(Set dst (ConvF2L src));
11177   effect(KILL cr);
11178 
11179   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11180             "cmpq    $dst, [0x8000000000000000]\n\t"
11181             "jne,s   done\n\t"
11182             "subq    rsp, #8\n\t"
11183             "movss   [rsp], $src\n\t"
11184             "call    f2l_fixup\n\t"
11185             "popq    $dst\n"
11186     "done:   "%}
11187   opcode(0xF3, 0x0F, 0x2C);
11188   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11189              f2l_fixup(dst, src));
11190   ins_pipe(pipe_slow);
11191 %}
11192 
11193 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11194 %{
11195   match(Set dst (ConvD2I src));
11196   effect(KILL cr);
11197 
11198   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11199             "cmpl    $dst, #0x80000000\n\t"
11200             "jne,s   done\n\t"
11201             "subq    rsp, #8\n\t"
11202             "movsd   [rsp], $src\n\t"
11203             "call    d2i_fixup\n\t"
11204             "popq    $dst\n"
11205     "done:   "%}
11206   opcode(0xF2, 0x0F, 0x2C);
11207   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11208              d2i_fixup(dst, src));
11209   ins_pipe(pipe_slow);
11210 %}
11211 
11212 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11213 %{
11214   match(Set dst (ConvD2L src));
11215   effect(KILL cr);
11216 
11217   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11218             "cmpq    $dst, [0x8000000000000000]\n\t"
11219             "jne,s   done\n\t"
11220             "subq    rsp, #8\n\t"
11221             "movsd   [rsp], $src\n\t"
11222             "call    d2l_fixup\n\t"
11223             "popq    $dst\n"
11224     "done:   "%}
11225   opcode(0xF2, 0x0F, 0x2C);
11226   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11227              d2l_fixup(dst, src));
11228   ins_pipe(pipe_slow);
11229 %}
11230 
11231 instruct convI2F_reg_reg(regF dst, rRegI src)
11232 %{
11233   predicate(!UseXmmI2F);
11234   match(Set dst (ConvI2F src));
11235 
11236   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11237   opcode(0xF3, 0x0F, 0x2A);
11238   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11239   ins_pipe(pipe_slow); // XXX
11240 %}
11241 
11242 instruct convI2F_reg_mem(regF dst, memory src)
11243 %{
11244   match(Set dst (ConvI2F (LoadI src)));
11245 
11246   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11247   opcode(0xF3, 0x0F, 0x2A);
11248   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11249   ins_pipe(pipe_slow); // XXX
11250 %}
11251 
11252 instruct convI2D_reg_reg(regD dst, rRegI src)
11253 %{
11254   predicate(!UseXmmI2D);
11255   match(Set dst (ConvI2D src));
11256 
11257   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11258   opcode(0xF2, 0x0F, 0x2A);
11259   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11260   ins_pipe(pipe_slow); // XXX
11261 %}
11262 
11263 instruct convI2D_reg_mem(regD dst, memory src)
11264 %{
11265   match(Set dst (ConvI2D (LoadI src)));
11266 
11267   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11268   opcode(0xF2, 0x0F, 0x2A);
11269   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11270   ins_pipe(pipe_slow); // XXX
11271 %}
11272 
11273 instruct convXI2F_reg(regF dst, rRegI src)
11274 %{
11275   predicate(UseXmmI2F);
11276   match(Set dst (ConvI2F src));
11277 
11278   format %{ "movdl $dst, $src\n\t"
11279             "cvtdq2psl $dst, $dst\t# i2f" %}
11280   ins_encode %{
11281     __ movdl($dst$$XMMRegister, $src$$Register);
11282     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11283   %}
11284   ins_pipe(pipe_slow); // XXX
11285 %}
11286 
11287 instruct convXI2D_reg(regD dst, rRegI src)
11288 %{
11289   predicate(UseXmmI2D);
11290   match(Set dst (ConvI2D src));
11291 
11292   format %{ "movdl $dst, $src\n\t"
11293             "cvtdq2pdl $dst, $dst\t# i2d" %}
11294   ins_encode %{
11295     __ movdl($dst$$XMMRegister, $src$$Register);
11296     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11297   %}
11298   ins_pipe(pipe_slow); // XXX
11299 %}
11300 
11301 instruct convL2F_reg_reg(regF dst, rRegL src)
11302 %{
11303   match(Set dst (ConvL2F src));
11304 
11305   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11306   opcode(0xF3, 0x0F, 0x2A);
11307   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11308   ins_pipe(pipe_slow); // XXX
11309 %}
11310 
11311 instruct convL2F_reg_mem(regF dst, memory src)
11312 %{
11313   match(Set dst (ConvL2F (LoadL src)));
11314 
11315   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11316   opcode(0xF3, 0x0F, 0x2A);
11317   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11318   ins_pipe(pipe_slow); // XXX
11319 %}
11320 
11321 instruct convL2D_reg_reg(regD dst, rRegL src)
11322 %{
11323   match(Set dst (ConvL2D src));
11324 
11325   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11326   opcode(0xF2, 0x0F, 0x2A);
11327   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11328   ins_pipe(pipe_slow); // XXX
11329 %}
11330 
11331 instruct convL2D_reg_mem(regD dst, memory src)
11332 %{
11333   match(Set dst (ConvL2D (LoadL src)));
11334 
11335   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11336   opcode(0xF2, 0x0F, 0x2A);
11337   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11338   ins_pipe(pipe_slow); // XXX
11339 %}
11340 
11341 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11342 %{
11343   match(Set dst (ConvI2L src));
11344 
11345   ins_cost(125);
11346   format %{ "movslq  $dst, $src\t# i2l" %}
11347   ins_encode %{
11348     __ movslq($dst$$Register, $src$$Register);
11349   %}
11350   ins_pipe(ialu_reg_reg);
11351 %}
11352 
11353 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11354 // %{
11355 //   match(Set dst (ConvI2L src));
11356 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11357 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11358 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11359 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11360 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11361 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11362 
11363 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11364 //   ins_encode(enc_copy(dst, src));
11365 // //   opcode(0x63); // needs REX.W
11366 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11367 //   ins_pipe(ialu_reg_reg);
11368 // %}
11369 
11370 // Zero-extend convert int to long
11371 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11372 %{
11373   match(Set dst (AndL (ConvI2L src) mask));
11374 
11375   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11376   ins_encode(enc_copy(dst, src));
11377   ins_pipe(ialu_reg_reg);
11378 %}
11379 
11380 // Zero-extend convert int to long
11381 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11382 %{
11383   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11384 
11385   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11386   opcode(0x8B);
11387   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11388   ins_pipe(ialu_reg_mem);
11389 %}
11390 
11391 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11392 %{
11393   match(Set dst (AndL src mask));
11394 
11395   format %{ "movl    $dst, $src\t# zero-extend long" %}
11396   ins_encode(enc_copy_always(dst, src));
11397   ins_pipe(ialu_reg_reg);
11398 %}
11399 
11400 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11401 %{
11402   match(Set dst (ConvL2I src));
11403 
11404   format %{ "movl    $dst, $src\t# l2i" %}
11405   ins_encode(enc_copy_always(dst, src));
11406   ins_pipe(ialu_reg_reg);
11407 %}
11408 
11409 
11410 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11411   match(Set dst (MoveF2I src));
11412   effect(DEF dst, USE src);
11413 
11414   ins_cost(125);
11415   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11416   opcode(0x8B);
11417   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11418   ins_pipe(ialu_reg_mem);
11419 %}
11420 
11421 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11422   match(Set dst (MoveI2F src));
11423   effect(DEF dst, USE src);
11424 
11425   ins_cost(125);
11426   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11427   opcode(0xF3, 0x0F, 0x10);
11428   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11429   ins_pipe(pipe_slow);
11430 %}
11431 
11432 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11433   match(Set dst (MoveD2L src));
11434   effect(DEF dst, USE src);
11435 
11436   ins_cost(125);
11437   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11438   opcode(0x8B);
11439   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11440   ins_pipe(ialu_reg_mem);
11441 %}
11442 
11443 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11444   predicate(!UseXmmLoadAndClearUpper);
11445   match(Set dst (MoveL2D src));
11446   effect(DEF dst, USE src);
11447 
11448   ins_cost(125);
11449   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11450   opcode(0x66, 0x0F, 0x12);
11451   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11452   ins_pipe(pipe_slow);
11453 %}
11454 
11455 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11456   predicate(UseXmmLoadAndClearUpper);
11457   match(Set dst (MoveL2D src));
11458   effect(DEF dst, USE src);
11459 
11460   ins_cost(125);
11461   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11462   opcode(0xF2, 0x0F, 0x10);
11463   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11464   ins_pipe(pipe_slow);
11465 %}
11466 
11467 
11468 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11469   match(Set dst (MoveF2I src));
11470   effect(DEF dst, USE src);
11471 
11472   ins_cost(95); // XXX
11473   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11474   opcode(0xF3, 0x0F, 0x11);
11475   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11476   ins_pipe(pipe_slow);
11477 %}
11478 
11479 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11480   match(Set dst (MoveI2F src));
11481   effect(DEF dst, USE src);
11482 
11483   ins_cost(100);
11484   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11485   opcode(0x89);
11486   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11487   ins_pipe( ialu_mem_reg );
11488 %}
11489 
11490 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11491   match(Set dst (MoveD2L src));
11492   effect(DEF dst, USE src);
11493 
11494   ins_cost(95); // XXX
11495   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11496   opcode(0xF2, 0x0F, 0x11);
11497   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11498   ins_pipe(pipe_slow);
11499 %}
11500 
11501 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11502   match(Set dst (MoveL2D src));
11503   effect(DEF dst, USE src);
11504 
11505   ins_cost(100);
11506   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11507   opcode(0x89);
11508   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11509   ins_pipe(ialu_mem_reg);
11510 %}
11511 
11512 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11513   match(Set dst (MoveF2I src));
11514   effect(DEF dst, USE src);
11515   ins_cost(85);
11516   format %{ "movd    $dst,$src\t# MoveF2I" %}
11517   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11518   ins_pipe( pipe_slow );
11519 %}
11520 
11521 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11522   match(Set dst (MoveD2L src));
11523   effect(DEF dst, USE src);
11524   ins_cost(85);
11525   format %{ "movd    $dst,$src\t# MoveD2L" %}
11526   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11527   ins_pipe( pipe_slow );
11528 %}
11529 
11530 // The next instructions have long latency and use Int unit. Set high cost.
11531 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11532   match(Set dst (MoveI2F src));
11533   effect(DEF dst, USE src);
11534   ins_cost(300);
11535   format %{ "movd    $dst,$src\t# MoveI2F" %}
11536   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11537   ins_pipe( pipe_slow );
11538 %}
11539 
11540 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11541   match(Set dst (MoveL2D src));
11542   effect(DEF dst, USE src);
11543   ins_cost(300);
11544   format %{ "movd    $dst,$src\t# MoveL2D" %}
11545   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11546   ins_pipe( pipe_slow );
11547 %}
11548 
11549 // Replicate scalar to packed byte (1 byte) values in xmm
11550 instruct Repl8B_reg(regD dst, regD src) %{
11551   match(Set dst (Replicate8B src));
11552   format %{ "MOVDQA  $dst,$src\n\t"
11553             "PUNPCKLBW $dst,$dst\n\t"
11554             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11555   ins_encode( pshufd_8x8(dst, src));
11556   ins_pipe( pipe_slow );
11557 %}
11558 
11559 // Replicate scalar to packed byte (1 byte) values in xmm
11560 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11561   match(Set dst (Replicate8B src));
11562   format %{ "MOVD    $dst,$src\n\t"
11563             "PUNPCKLBW $dst,$dst\n\t"
11564             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11565   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11566   ins_pipe( pipe_slow );
11567 %}
11568 
11569 // Replicate scalar zero to packed byte (1 byte) values in xmm
11570 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11571   match(Set dst (Replicate8B zero));
11572   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11573   ins_encode( pxor(dst, dst));
11574   ins_pipe( fpu_reg_reg );
11575 %}
11576 
11577 // Replicate scalar to packed shore (2 byte) values in xmm
11578 instruct Repl4S_reg(regD dst, regD src) %{
11579   match(Set dst (Replicate4S src));
11580   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11581   ins_encode( pshufd_4x16(dst, src));
11582   ins_pipe( fpu_reg_reg );
11583 %}
11584 
11585 // Replicate scalar to packed shore (2 byte) values in xmm
11586 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11587   match(Set dst (Replicate4S src));
11588   format %{ "MOVD    $dst,$src\n\t"
11589             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11590   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11591   ins_pipe( fpu_reg_reg );
11592 %}
11593 
11594 // Replicate scalar zero to packed short (2 byte) values in xmm
11595 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11596   match(Set dst (Replicate4S zero));
11597   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11598   ins_encode( pxor(dst, dst));
11599   ins_pipe( fpu_reg_reg );
11600 %}
11601 
11602 // Replicate scalar to packed char (2 byte) values in xmm
11603 instruct Repl4C_reg(regD dst, regD src) %{
11604   match(Set dst (Replicate4C src));
11605   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11606   ins_encode( pshufd_4x16(dst, src));
11607   ins_pipe( fpu_reg_reg );
11608 %}
11609 
11610 // Replicate scalar to packed char (2 byte) values in xmm
11611 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11612   match(Set dst (Replicate4C src));
11613   format %{ "MOVD    $dst,$src\n\t"
11614             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11615   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11616   ins_pipe( fpu_reg_reg );
11617 %}
11618 
11619 // Replicate scalar zero to packed char (2 byte) values in xmm
11620 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11621   match(Set dst (Replicate4C zero));
11622   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11623   ins_encode( pxor(dst, dst));
11624   ins_pipe( fpu_reg_reg );
11625 %}
11626 
11627 // Replicate scalar to packed integer (4 byte) values in xmm
11628 instruct Repl2I_reg(regD dst, regD src) %{
11629   match(Set dst (Replicate2I src));
11630   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11631   ins_encode( pshufd(dst, src, 0x00));
11632   ins_pipe( fpu_reg_reg );
11633 %}
11634 
11635 // Replicate scalar to packed integer (4 byte) values in xmm
11636 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11637   match(Set dst (Replicate2I src));
11638   format %{ "MOVD   $dst,$src\n\t"
11639             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11640   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11641   ins_pipe( fpu_reg_reg );
11642 %}
11643 
11644 // Replicate scalar zero to packed integer (2 byte) values in xmm
11645 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11646   match(Set dst (Replicate2I zero));
11647   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11648   ins_encode( pxor(dst, dst));
11649   ins_pipe( fpu_reg_reg );
11650 %}
11651 
11652 // Replicate scalar to packed single precision floating point values in xmm
11653 instruct Repl2F_reg(regD dst, regD src) %{
11654   match(Set dst (Replicate2F src));
11655   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11656   ins_encode( pshufd(dst, src, 0xe0));
11657   ins_pipe( fpu_reg_reg );
11658 %}
11659 
11660 // Replicate scalar to packed single precision floating point values in xmm
11661 instruct Repl2F_regF(regD dst, regF src) %{
11662   match(Set dst (Replicate2F src));
11663   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11664   ins_encode( pshufd(dst, src, 0xe0));
11665   ins_pipe( fpu_reg_reg );
11666 %}
11667 
11668 // Replicate scalar to packed single precision floating point values in xmm
11669 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11670   match(Set dst (Replicate2F zero));
11671   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11672   ins_encode( pxor(dst, dst));
11673   ins_pipe( fpu_reg_reg );
11674 %}
11675 
11676 
11677 // =======================================================================
11678 // fast clearing of an array
11679 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11680                   rFlagsReg cr)
11681 %{
11682   match(Set dummy (ClearArray cnt base));
11683   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11684 
11685   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11686             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11687   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11688              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11689   ins_pipe(pipe_slow);
11690 %}
11691 
11692 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11693                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11694 %{
11695   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11696   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11697 
11698   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11699   ins_encode %{
11700     __ string_compare($str1$$Register, $str2$$Register,
11701                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11702                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11703   %}
11704   ins_pipe( pipe_slow );
11705 %}
11706 
11707 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11708                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11709 %{
11710   predicate(UseSSE42Intrinsics);
11711   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11712   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11713 
11714   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11715   ins_encode %{
11716     __ string_indexof($str1$$Register, $str2$$Register,
11717                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11718                       $tmp1$$XMMRegister, $tmp2$$Register);
11719   %}
11720   ins_pipe( pipe_slow );
11721 %}
11722 
11723 // fast string equals
11724 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11725                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11726 %{
11727   match(Set result (StrEquals (Binary str1 str2) cnt));
11728   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11729 
11730   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11731   ins_encode %{
11732     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11733                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11734                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11735   %}
11736   ins_pipe( pipe_slow );
11737 %}
11738 
11739 // fast array equals
11740 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11741                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11742 %{
11743   match(Set result (AryEq ary1 ary2));
11744   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11745   //ins_cost(300);
11746 
11747   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11748   ins_encode %{
11749     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11750                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11751                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11752   %}
11753   ins_pipe( pipe_slow );
11754 %}
11755 
11756 //----------Control Flow Instructions------------------------------------------
11757 // Signed compare Instructions
11758 
11759 // XXX more variants!!
11760 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11761 %{
11762   match(Set cr (CmpI op1 op2));
11763   effect(DEF cr, USE op1, USE op2);
11764 
11765   format %{ "cmpl    $op1, $op2" %}
11766   opcode(0x3B);  /* Opcode 3B /r */
11767   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11768   ins_pipe(ialu_cr_reg_reg);
11769 %}
11770 
11771 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11772 %{
11773   match(Set cr (CmpI op1 op2));
11774 
11775   format %{ "cmpl    $op1, $op2" %}
11776   opcode(0x81, 0x07); /* Opcode 81 /7 */
11777   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11778   ins_pipe(ialu_cr_reg_imm);
11779 %}
11780 
11781 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11782 %{
11783   match(Set cr (CmpI op1 (LoadI op2)));
11784 
11785   ins_cost(500); // XXX
11786   format %{ "cmpl    $op1, $op2" %}
11787   opcode(0x3B); /* Opcode 3B /r */
11788   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11789   ins_pipe(ialu_cr_reg_mem);
11790 %}
11791 
11792 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11793 %{
11794   match(Set cr (CmpI src zero));
11795 
11796   format %{ "testl   $src, $src" %}
11797   opcode(0x85);
11798   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11799   ins_pipe(ialu_cr_reg_imm);
11800 %}
11801 
11802 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11803 %{
11804   match(Set cr (CmpI (AndI src con) zero));
11805 
11806   format %{ "testl   $src, $con" %}
11807   opcode(0xF7, 0x00);
11808   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11809   ins_pipe(ialu_cr_reg_imm);
11810 %}
11811 
11812 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11813 %{
11814   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11815 
11816   format %{ "testl   $src, $mem" %}
11817   opcode(0x85);
11818   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11819   ins_pipe(ialu_cr_reg_mem);
11820 %}
11821 
11822 // Unsigned compare Instructions; really, same as signed except they
11823 // produce an rFlagsRegU instead of rFlagsReg.
11824 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11825 %{
11826   match(Set cr (CmpU op1 op2));
11827 
11828   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11829   opcode(0x3B); /* Opcode 3B /r */
11830   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11831   ins_pipe(ialu_cr_reg_reg);
11832 %}
11833 
11834 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11835 %{
11836   match(Set cr (CmpU op1 op2));
11837 
11838   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11839   opcode(0x81,0x07); /* Opcode 81 /7 */
11840   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11841   ins_pipe(ialu_cr_reg_imm);
11842 %}
11843 
11844 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11845 %{
11846   match(Set cr (CmpU op1 (LoadI op2)));
11847 
11848   ins_cost(500); // XXX
11849   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11850   opcode(0x3B); /* Opcode 3B /r */
11851   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11852   ins_pipe(ialu_cr_reg_mem);
11853 %}
11854 
11855 // // // Cisc-spilled version of cmpU_rReg
11856 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11857 // //%{
11858 // //  match(Set cr (CmpU (LoadI op1) op2));
11859 // //
11860 // //  format %{ "CMPu   $op1,$op2" %}
11861 // //  ins_cost(500);
11862 // //  opcode(0x39);  /* Opcode 39 /r */
11863 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11864 // //%}
11865 
11866 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11867 %{
11868   match(Set cr (CmpU src zero));
11869 
11870   format %{ "testl  $src, $src\t# unsigned" %}
11871   opcode(0x85);
11872   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11873   ins_pipe(ialu_cr_reg_imm);
11874 %}
11875 
11876 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11877 %{
11878   match(Set cr (CmpP op1 op2));
11879 
11880   format %{ "cmpq    $op1, $op2\t# ptr" %}
11881   opcode(0x3B); /* Opcode 3B /r */
11882   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11883   ins_pipe(ialu_cr_reg_reg);
11884 %}
11885 
11886 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11887 %{
11888   match(Set cr (CmpP op1 (LoadP op2)));
11889 
11890   ins_cost(500); // XXX
11891   format %{ "cmpq    $op1, $op2\t# ptr" %}
11892   opcode(0x3B); /* Opcode 3B /r */
11893   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11894   ins_pipe(ialu_cr_reg_mem);
11895 %}
11896 
11897 // // // Cisc-spilled version of cmpP_rReg
11898 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11899 // //%{
11900 // //  match(Set cr (CmpP (LoadP op1) op2));
11901 // //
11902 // //  format %{ "CMPu   $op1,$op2" %}
11903 // //  ins_cost(500);
11904 // //  opcode(0x39);  /* Opcode 39 /r */
11905 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11906 // //%}
11907 
11908 // XXX this is generalized by compP_rReg_mem???
11909 // Compare raw pointer (used in out-of-heap check).
11910 // Only works because non-oop pointers must be raw pointers
11911 // and raw pointers have no anti-dependencies.
11912 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11913 %{
11914   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11915   match(Set cr (CmpP op1 (LoadP op2)));
11916 
11917   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11918   opcode(0x3B); /* Opcode 3B /r */
11919   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11920   ins_pipe(ialu_cr_reg_mem);
11921 %}
11922 
11923 // This will generate a signed flags result. This should be OK since
11924 // any compare to a zero should be eq/neq.
11925 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11926 %{
11927   match(Set cr (CmpP src zero));
11928 
11929   format %{ "testq   $src, $src\t# ptr" %}
11930   opcode(0x85);
11931   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11932   ins_pipe(ialu_cr_reg_imm);
11933 %}
11934 
11935 // This will generate a signed flags result. This should be OK since
11936 // any compare to a zero should be eq/neq.
11937 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11938 %{
11939   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11940   match(Set cr (CmpP (LoadP op) zero));
11941 
11942   ins_cost(500); // XXX
11943   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11944   opcode(0xF7); /* Opcode F7 /0 */
11945   ins_encode(REX_mem_wide(op),
11946              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11947   ins_pipe(ialu_cr_reg_imm);
11948 %}
11949 
11950 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11951 %{
11952   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11953   match(Set cr (CmpP (LoadP mem) zero));
11954 
11955   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11956   ins_encode %{
11957     __ cmpq(r12, $mem$$Address);
11958   %}
11959   ins_pipe(ialu_cr_reg_mem);
11960 %}
11961 
11962 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11963 %{
11964   match(Set cr (CmpN op1 op2));
11965 
11966   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11967   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11968   ins_pipe(ialu_cr_reg_reg);
11969 %}
11970 
11971 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11972 %{
11973   match(Set cr (CmpN src (LoadN mem)));
11974 
11975   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11976   ins_encode %{
11977     __ cmpl($src$$Register, $mem$$Address);
11978   %}
11979   ins_pipe(ialu_cr_reg_mem);
11980 %}
11981 
11982 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11983   match(Set cr (CmpN op1 op2));
11984 
11985   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11986   ins_encode %{
11987     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11988   %}
11989   ins_pipe(ialu_cr_reg_imm);
11990 %}
11991 
11992 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11993 %{
11994   match(Set cr (CmpN src (LoadN mem)));
11995 
11996   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11997   ins_encode %{
11998     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11999   %}
12000   ins_pipe(ialu_cr_reg_mem);
12001 %}
12002 
12003 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12004   match(Set cr (CmpN src zero));
12005 
12006   format %{ "testl   $src, $src\t# compressed ptr" %}
12007   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12008   ins_pipe(ialu_cr_reg_imm);
12009 %}
12010 
12011 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12012 %{
12013   predicate(Universe::narrow_oop_base() != NULL);
12014   match(Set cr (CmpN (LoadN mem) zero));
12015 
12016   ins_cost(500); // XXX
12017   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12018   ins_encode %{
12019     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12020   %}
12021   ins_pipe(ialu_cr_reg_mem);
12022 %}
12023 
12024 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12025 %{
12026   predicate(Universe::narrow_oop_base() == NULL);
12027   match(Set cr (CmpN (LoadN mem) zero));
12028 
12029   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12030   ins_encode %{
12031     __ cmpl(r12, $mem$$Address);
12032   %}
12033   ins_pipe(ialu_cr_reg_mem);
12034 %}
12035 
12036 // Yanked all unsigned pointer compare operations.
12037 // Pointer compares are done with CmpP which is already unsigned.
12038 
12039 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12040 %{
12041   match(Set cr (CmpL op1 op2));
12042 
12043   format %{ "cmpq    $op1, $op2" %}
12044   opcode(0x3B);  /* Opcode 3B /r */
12045   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12046   ins_pipe(ialu_cr_reg_reg);
12047 %}
12048 
12049 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12050 %{
12051   match(Set cr (CmpL op1 op2));
12052 
12053   format %{ "cmpq    $op1, $op2" %}
12054   opcode(0x81, 0x07); /* Opcode 81 /7 */
12055   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12056   ins_pipe(ialu_cr_reg_imm);
12057 %}
12058 
12059 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12060 %{
12061   match(Set cr (CmpL op1 (LoadL op2)));
12062 
12063   format %{ "cmpq    $op1, $op2" %}
12064   opcode(0x3B); /* Opcode 3B /r */
12065   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12066   ins_pipe(ialu_cr_reg_mem);
12067 %}
12068 
12069 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12070 %{
12071   match(Set cr (CmpL src zero));
12072 
12073   format %{ "testq   $src, $src" %}
12074   opcode(0x85);
12075   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12076   ins_pipe(ialu_cr_reg_imm);
12077 %}
12078 
12079 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12080 %{
12081   match(Set cr (CmpL (AndL src con) zero));
12082 
12083   format %{ "testq   $src, $con\t# long" %}
12084   opcode(0xF7, 0x00);
12085   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12086   ins_pipe(ialu_cr_reg_imm);
12087 %}
12088 
12089 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12090 %{
12091   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12092 
12093   format %{ "testq   $src, $mem" %}
12094   opcode(0x85);
12095   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12096   ins_pipe(ialu_cr_reg_mem);
12097 %}
12098 
12099 // Manifest a CmpL result in an integer register.  Very painful.
12100 // This is the test to avoid.
12101 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12102 %{
12103   match(Set dst (CmpL3 src1 src2));
12104   effect(KILL flags);
12105 
12106   ins_cost(275); // XXX
12107   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12108             "movl    $dst, -1\n\t"
12109             "jl,s    done\n\t"
12110             "setne   $dst\n\t"
12111             "movzbl  $dst, $dst\n\t"
12112     "done:" %}
12113   ins_encode(cmpl3_flag(src1, src2, dst));
12114   ins_pipe(pipe_slow);
12115 %}
12116 
12117 //----------Max and Min--------------------------------------------------------
12118 // Min Instructions
12119 
12120 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12121 %{
12122   effect(USE_DEF dst, USE src, USE cr);
12123 
12124   format %{ "cmovlgt $dst, $src\t# min" %}
12125   opcode(0x0F, 0x4F);
12126   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12127   ins_pipe(pipe_cmov_reg);
12128 %}
12129 
12130 
12131 instruct minI_rReg(rRegI dst, rRegI src)
12132 %{
12133   match(Set dst (MinI dst src));
12134 
12135   ins_cost(200);
12136   expand %{
12137     rFlagsReg cr;
12138     compI_rReg(cr, dst, src);
12139     cmovI_reg_g(dst, src, cr);
12140   %}
12141 %}
12142 
12143 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12144 %{
12145   effect(USE_DEF dst, USE src, USE cr);
12146 
12147   format %{ "cmovllt $dst, $src\t# max" %}
12148   opcode(0x0F, 0x4C);
12149   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12150   ins_pipe(pipe_cmov_reg);
12151 %}
12152 
12153 
12154 instruct maxI_rReg(rRegI dst, rRegI src)
12155 %{
12156   match(Set dst (MaxI dst src));
12157 
12158   ins_cost(200);
12159   expand %{
12160     rFlagsReg cr;
12161     compI_rReg(cr, dst, src);
12162     cmovI_reg_l(dst, src, cr);
12163   %}
12164 %}
12165 
12166 // ============================================================================
12167 // Branch Instructions
12168 
12169 // Jump Direct - Label defines a relative address from JMP+1
12170 instruct jmpDir(label labl)
12171 %{
12172   match(Goto);
12173   effect(USE labl);
12174 
12175   ins_cost(300);
12176   format %{ "jmp     $labl" %}
12177   size(5);
12178   opcode(0xE9);
12179   ins_encode(OpcP, Lbl(labl));
12180   ins_pipe(pipe_jmp);
12181   ins_pc_relative(1);
12182 %}
12183 
12184 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12185 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12186 %{
12187   match(If cop cr);
12188   effect(USE labl);
12189 
12190   ins_cost(300);
12191   format %{ "j$cop     $labl" %}
12192   size(6);
12193   opcode(0x0F, 0x80);
12194   ins_encode(Jcc(cop, labl));
12195   ins_pipe(pipe_jcc);
12196   ins_pc_relative(1);
12197 %}
12198 
12199 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12200 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12201 %{
12202   match(CountedLoopEnd cop cr);
12203   effect(USE labl);
12204 
12205   ins_cost(300);
12206   format %{ "j$cop     $labl\t# loop end" %}
12207   size(6);
12208   opcode(0x0F, 0x80);
12209   ins_encode(Jcc(cop, labl));
12210   ins_pipe(pipe_jcc);
12211   ins_pc_relative(1);
12212 %}
12213 
12214 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12215 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12216   match(CountedLoopEnd cop cmp);
12217   effect(USE labl);
12218 
12219   ins_cost(300);
12220   format %{ "j$cop,u   $labl\t# loop end" %}
12221   size(6);
12222   opcode(0x0F, 0x80);
12223   ins_encode(Jcc(cop, labl));
12224   ins_pipe(pipe_jcc);
12225   ins_pc_relative(1);
12226 %}
12227 
12228 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12229   match(CountedLoopEnd cop cmp);
12230   effect(USE labl);
12231 
12232   ins_cost(200);
12233   format %{ "j$cop,u   $labl\t# loop end" %}
12234   size(6);
12235   opcode(0x0F, 0x80);
12236   ins_encode(Jcc(cop, labl));
12237   ins_pipe(pipe_jcc);
12238   ins_pc_relative(1);
12239 %}
12240 
12241 // Jump Direct Conditional - using unsigned comparison
12242 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12243   match(If cop cmp);
12244   effect(USE labl);
12245 
12246   ins_cost(300);
12247   format %{ "j$cop,u  $labl" %}
12248   size(6);
12249   opcode(0x0F, 0x80);
12250   ins_encode(Jcc(cop, labl));
12251   ins_pipe(pipe_jcc);
12252   ins_pc_relative(1);
12253 %}
12254 
12255 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12256   match(If cop cmp);
12257   effect(USE labl);
12258 
12259   ins_cost(200);
12260   format %{ "j$cop,u  $labl" %}
12261   size(6);
12262   opcode(0x0F, 0x80);
12263   ins_encode(Jcc(cop, labl));
12264   ins_pipe(pipe_jcc);
12265   ins_pc_relative(1);
12266 %}
12267 
12268 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12269   match(If cop cmp);
12270   effect(USE labl);
12271 
12272   ins_cost(200);
12273   format %{ $$template
12274     if ($cop$$cmpcode == Assembler::notEqual) {
12275       $$emit$$"jp,u   $labl\n\t"
12276       $$emit$$"j$cop,u   $labl"
12277     } else {
12278       $$emit$$"jp,u   done\n\t"
12279       $$emit$$"j$cop,u   $labl\n\t"
12280       $$emit$$"done:"
12281     }
12282   %}
12283   size(12);
12284   opcode(0x0F, 0x80);
12285   ins_encode %{
12286     Label* l = $labl$$label;
12287     $$$emit8$primary;
12288     emit_cc(cbuf, $secondary, Assembler::parity);
12289     int parity_disp = -1;
12290     if ($cop$$cmpcode == Assembler::notEqual) {
12291        // the two jumps 6 bytes apart so the jump distances are too
12292        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12293     } else if ($cop$$cmpcode == Assembler::equal) {
12294        parity_disp = 6;
12295     } else {
12296        ShouldNotReachHere();
12297     }
12298     emit_d32(cbuf, parity_disp);
12299     $$$emit8$primary;
12300     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12301     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12302     emit_d32(cbuf, disp);
12303   %}
12304   ins_pipe(pipe_jcc);
12305   ins_pc_relative(1);
12306 %}
12307 
12308 // ============================================================================
12309 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12310 // superklass array for an instance of the superklass.  Set a hidden
12311 // internal cache on a hit (cache is checked with exposed code in
12312 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12313 // encoding ALSO sets flags.
12314 
12315 instruct partialSubtypeCheck(rdi_RegP result,
12316                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12317                              rFlagsReg cr)
12318 %{
12319   match(Set result (PartialSubtypeCheck sub super));
12320   effect(KILL rcx, KILL cr);
12321 
12322   ins_cost(1100);  // slightly larger than the next version
12323   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12324             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12325             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12326             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12327             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12328             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12329             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12330     "miss:\t" %}
12331 
12332   opcode(0x1); // Force a XOR of RDI
12333   ins_encode(enc_PartialSubtypeCheck());
12334   ins_pipe(pipe_slow);
12335 %}
12336 
12337 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12338                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12339                                      immP0 zero,
12340                                      rdi_RegP result)
12341 %{
12342   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12343   effect(KILL rcx, KILL result);
12344 
12345   ins_cost(1000);
12346   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12347             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12348             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12349             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12350             "jne,s   miss\t\t# Missed: flags nz\n\t"
12351             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12352     "miss:\t" %}
12353 
12354   opcode(0x0); // No need to XOR RDI
12355   ins_encode(enc_PartialSubtypeCheck());
12356   ins_pipe(pipe_slow);
12357 %}
12358 
12359 // ============================================================================
12360 // Branch Instructions -- short offset versions
12361 //
12362 // These instructions are used to replace jumps of a long offset (the default
12363 // match) with jumps of a shorter offset.  These instructions are all tagged
12364 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12365 // match rules in general matching.  Instead, the ADLC generates a conversion
12366 // method in the MachNode which can be used to do in-place replacement of the
12367 // long variant with the shorter variant.  The compiler will determine if a
12368 // branch can be taken by the is_short_branch_offset() predicate in the machine
12369 // specific code section of the file.
12370 
12371 // Jump Direct - Label defines a relative address from JMP+1
12372 instruct jmpDir_short(label labl) %{
12373   match(Goto);
12374   effect(USE labl);
12375 
12376   ins_cost(300);
12377   format %{ "jmp,s   $labl" %}
12378   size(2);
12379   opcode(0xEB);
12380   ins_encode(OpcP, LblShort(labl));
12381   ins_pipe(pipe_jmp);
12382   ins_pc_relative(1);
12383   ins_short_branch(1);
12384 %}
12385 
12386 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12387 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12388   match(If cop cr);
12389   effect(USE labl);
12390 
12391   ins_cost(300);
12392   format %{ "j$cop,s   $labl" %}
12393   size(2);
12394   opcode(0x70);
12395   ins_encode(JccShort(cop, labl));
12396   ins_pipe(pipe_jcc);
12397   ins_pc_relative(1);
12398   ins_short_branch(1);
12399 %}
12400 
12401 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12402 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12403   match(CountedLoopEnd cop cr);
12404   effect(USE labl);
12405 
12406   ins_cost(300);
12407   format %{ "j$cop,s   $labl\t# loop end" %}
12408   size(2);
12409   opcode(0x70);
12410   ins_encode(JccShort(cop, labl));
12411   ins_pipe(pipe_jcc);
12412   ins_pc_relative(1);
12413   ins_short_branch(1);
12414 %}
12415 
12416 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12417 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12418   match(CountedLoopEnd cop cmp);
12419   effect(USE labl);
12420 
12421   ins_cost(300);
12422   format %{ "j$cop,us  $labl\t# loop end" %}
12423   size(2);
12424   opcode(0x70);
12425   ins_encode(JccShort(cop, labl));
12426   ins_pipe(pipe_jcc);
12427   ins_pc_relative(1);
12428   ins_short_branch(1);
12429 %}
12430 
12431 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12432   match(CountedLoopEnd cop cmp);
12433   effect(USE labl);
12434 
12435   ins_cost(300);
12436   format %{ "j$cop,us  $labl\t# loop end" %}
12437   size(2);
12438   opcode(0x70);
12439   ins_encode(JccShort(cop, labl));
12440   ins_pipe(pipe_jcc);
12441   ins_pc_relative(1);
12442   ins_short_branch(1);
12443 %}
12444 
12445 // Jump Direct Conditional - using unsigned comparison
12446 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12447   match(If cop cmp);
12448   effect(USE labl);
12449 
12450   ins_cost(300);
12451   format %{ "j$cop,us  $labl" %}
12452   size(2);
12453   opcode(0x70);
12454   ins_encode(JccShort(cop, labl));
12455   ins_pipe(pipe_jcc);
12456   ins_pc_relative(1);
12457   ins_short_branch(1);
12458 %}
12459 
12460 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12461   match(If cop cmp);
12462   effect(USE labl);
12463 
12464   ins_cost(300);
12465   format %{ "j$cop,us  $labl" %}
12466   size(2);
12467   opcode(0x70);
12468   ins_encode(JccShort(cop, labl));
12469   ins_pipe(pipe_jcc);
12470   ins_pc_relative(1);
12471   ins_short_branch(1);
12472 %}
12473 
12474 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12475   match(If cop cmp);
12476   effect(USE labl);
12477 
12478   ins_cost(300);
12479   format %{ $$template
12480     if ($cop$$cmpcode == Assembler::notEqual) {
12481       $$emit$$"jp,u,s   $labl\n\t"
12482       $$emit$$"j$cop,u,s   $labl"
12483     } else {
12484       $$emit$$"jp,u,s   done\n\t"
12485       $$emit$$"j$cop,u,s  $labl\n\t"
12486       $$emit$$"done:"
12487     }
12488   %}
12489   size(4);
12490   opcode(0x70);
12491   ins_encode %{
12492     Label* l = $labl$$label;
12493     emit_cc(cbuf, $primary, Assembler::parity);
12494     int parity_disp = -1;
12495     if ($cop$$cmpcode == Assembler::notEqual) {
12496       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12497     } else if ($cop$$cmpcode == Assembler::equal) {
12498       parity_disp = 2;
12499     } else {
12500       ShouldNotReachHere();
12501     }
12502     emit_d8(cbuf, parity_disp);
12503     emit_cc(cbuf, $primary, $cop$$cmpcode);
12504     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12505     emit_d8(cbuf, disp);
12506     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12507     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12508   %}
12509   ins_pipe(pipe_jcc);
12510   ins_pc_relative(1);
12511   ins_short_branch(1);
12512 %}
12513 
12514 // ============================================================================
12515 // inlined locking and unlocking
12516 
12517 instruct cmpFastLock(rFlagsReg cr,
12518                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12519 %{
12520   match(Set cr (FastLock object box));
12521   effect(TEMP tmp, TEMP scr);
12522 
12523   ins_cost(300);
12524   format %{ "fastlock $object,$box,$tmp,$scr" %}
12525   ins_encode(Fast_Lock(object, box, tmp, scr));
12526   ins_pipe(pipe_slow);
12527   ins_pc_relative(1);
12528 %}
12529 
12530 instruct cmpFastUnlock(rFlagsReg cr,
12531                        rRegP object, rax_RegP box, rRegP tmp)
12532 %{
12533   match(Set cr (FastUnlock object box));
12534   effect(TEMP tmp);
12535 
12536   ins_cost(300);
12537   format %{ "fastunlock $object, $box, $tmp" %}
12538   ins_encode(Fast_Unlock(object, box, tmp));
12539   ins_pipe(pipe_slow);
12540   ins_pc_relative(1);
12541 %}
12542 
12543 
12544 // ============================================================================
12545 // Safepoint Instructions
12546 instruct safePoint_poll(rFlagsReg cr)
12547 %{
12548   match(SafePoint);
12549   effect(KILL cr);
12550 
12551   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12552             "# Safepoint: poll for GC" %}
12553   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12554   ins_cost(125);
12555   ins_encode(enc_safepoint_poll);
12556   ins_pipe(ialu_reg_mem);
12557 %}
12558 
12559 // ============================================================================
12560 // Procedure Call/Return Instructions
12561 // Call Java Static Instruction
12562 // Note: If this code changes, the corresponding ret_addr_offset() and
12563 //       compute_padding() functions will have to be adjusted.
12564 instruct CallStaticJavaDirect(method meth) %{
12565   match(CallStaticJava);
12566   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12567   effect(USE meth);
12568 
12569   ins_cost(300);
12570   format %{ "call,static " %}
12571   opcode(0xE8); /* E8 cd */
12572   ins_encode(Java_Static_Call(meth), call_epilog);
12573   ins_pipe(pipe_slow);
12574   ins_pc_relative(1);
12575   ins_alignment(4);
12576 %}
12577 
12578 // Call Java Static Instruction (method handle version)
12579 // Note: If this code changes, the corresponding ret_addr_offset() and
12580 //       compute_padding() functions will have to be adjusted.
12581 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp) %{
12582   match(CallStaticJava);
12583   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12584   effect(USE meth);
12585   // RBP is saved by all callees (for interpreter stack correction).
12586   // We use it here for a similar purpose, in {preserve,restore}_SP.
12587 
12588   ins_cost(300);
12589   format %{ "call,static/MethodHandle " %}
12590   opcode(0xE8); /* E8 cd */
12591   ins_encode(preserve_SP,
12592              Java_Static_Call(meth),
12593              restore_SP,
12594              call_epilog);
12595   ins_pipe(pipe_slow);
12596   ins_pc_relative(1);
12597   ins_alignment(4);
12598 %}
12599 
12600 // Call Java Dynamic Instruction
12601 // Note: If this code changes, the corresponding ret_addr_offset() and
12602 //       compute_padding() functions will have to be adjusted.
12603 instruct CallDynamicJavaDirect(method meth)
12604 %{
12605   match(CallDynamicJava);
12606   effect(USE meth);
12607 
12608   ins_cost(300);
12609   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12610             "call,dynamic " %}
12611   opcode(0xE8); /* E8 cd */
12612   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12613   ins_pipe(pipe_slow);
12614   ins_pc_relative(1);
12615   ins_alignment(4);
12616 %}
12617 
12618 // Call Runtime Instruction
12619 instruct CallRuntimeDirect(method meth)
12620 %{
12621   match(CallRuntime);
12622   effect(USE meth);
12623 
12624   ins_cost(300);
12625   format %{ "call,runtime " %}
12626   opcode(0xE8); /* E8 cd */
12627   ins_encode(Java_To_Runtime(meth));
12628   ins_pipe(pipe_slow);
12629   ins_pc_relative(1);
12630 %}
12631 
12632 // Call runtime without safepoint
12633 instruct CallLeafDirect(method meth)
12634 %{
12635   match(CallLeaf);
12636   effect(USE meth);
12637 
12638   ins_cost(300);
12639   format %{ "call_leaf,runtime " %}
12640   opcode(0xE8); /* E8 cd */
12641   ins_encode(Java_To_Runtime(meth));
12642   ins_pipe(pipe_slow);
12643   ins_pc_relative(1);
12644 %}
12645 
12646 // Call runtime without safepoint
12647 instruct CallLeafNoFPDirect(method meth)
12648 %{
12649   match(CallLeafNoFP);
12650   effect(USE meth);
12651 
12652   ins_cost(300);
12653   format %{ "call_leaf_nofp,runtime " %}
12654   opcode(0xE8); /* E8 cd */
12655   ins_encode(Java_To_Runtime(meth));
12656   ins_pipe(pipe_slow);
12657   ins_pc_relative(1);
12658 %}
12659 
12660 // Return Instruction
12661 // Remove the return address & jump to it.
12662 // Notice: We always emit a nop after a ret to make sure there is room
12663 // for safepoint patching
12664 instruct Ret()
12665 %{
12666   match(Return);
12667 
12668   format %{ "ret" %}
12669   opcode(0xC3);
12670   ins_encode(OpcP);
12671   ins_pipe(pipe_jmp);
12672 %}
12673 
12674 // Tail Call; Jump from runtime stub to Java code.
12675 // Also known as an 'interprocedural jump'.
12676 // Target of jump will eventually return to caller.
12677 // TailJump below removes the return address.
12678 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12679 %{
12680   match(TailCall jump_target method_oop);
12681 
12682   ins_cost(300);
12683   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12684   opcode(0xFF, 0x4); /* Opcode FF /4 */
12685   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12686   ins_pipe(pipe_jmp);
12687 %}
12688 
12689 // Tail Jump; remove the return address; jump to target.
12690 // TailCall above leaves the return address around.
12691 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12692 %{
12693   match(TailJump jump_target ex_oop);
12694 
12695   ins_cost(300);
12696   format %{ "popq    rdx\t# pop return address\n\t"
12697             "jmp     $jump_target" %}
12698   opcode(0xFF, 0x4); /* Opcode FF /4 */
12699   ins_encode(Opcode(0x5a), // popq rdx
12700              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12701   ins_pipe(pipe_jmp);
12702 %}
12703 
12704 // Create exception oop: created by stack-crawling runtime code.
12705 // Created exception is now available to this handler, and is setup
12706 // just prior to jumping to this handler.  No code emitted.
12707 instruct CreateException(rax_RegP ex_oop)
12708 %{
12709   match(Set ex_oop (CreateEx));
12710 
12711   size(0);
12712   // use the following format syntax
12713   format %{ "# exception oop is in rax; no code emitted" %}
12714   ins_encode();
12715   ins_pipe(empty);
12716 %}
12717 
12718 // Rethrow exception:
12719 // The exception oop will come in the first argument position.
12720 // Then JUMP (not call) to the rethrow stub code.
12721 instruct RethrowException()
12722 %{
12723   match(Rethrow);
12724 
12725   // use the following format syntax
12726   format %{ "jmp     rethrow_stub" %}
12727   ins_encode(enc_rethrow);
12728   ins_pipe(pipe_jmp);
12729 %}
12730 
12731 
12732 //----------PEEPHOLE RULES-----------------------------------------------------
12733 // These must follow all instruction definitions as they use the names
12734 // defined in the instructions definitions.
12735 //
12736 // peepmatch ( root_instr_name [preceding_instruction]* );
12737 //
12738 // peepconstraint %{
12739 // (instruction_number.operand_name relational_op instruction_number.operand_name
12740 //  [, ...] );
12741 // // instruction numbers are zero-based using left to right order in peepmatch
12742 //
12743 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12744 // // provide an instruction_number.operand_name for each operand that appears
12745 // // in the replacement instruction's match rule
12746 //
12747 // ---------VM FLAGS---------------------------------------------------------
12748 //
12749 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12750 //
12751 // Each peephole rule is given an identifying number starting with zero and
12752 // increasing by one in the order seen by the parser.  An individual peephole
12753 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12754 // on the command-line.
12755 //
12756 // ---------CURRENT LIMITATIONS----------------------------------------------
12757 //
12758 // Only match adjacent instructions in same basic block
12759 // Only equality constraints
12760 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12761 // Only one replacement instruction
12762 //
12763 // ---------EXAMPLE----------------------------------------------------------
12764 //
12765 // // pertinent parts of existing instructions in architecture description
12766 // instruct movI(rRegI dst, rRegI src)
12767 // %{
12768 //   match(Set dst (CopyI src));
12769 // %}
12770 //
12771 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12772 // %{
12773 //   match(Set dst (AddI dst src));
12774 //   effect(KILL cr);
12775 // %}
12776 //
12777 // // Change (inc mov) to lea
12778 // peephole %{
12779 //   // increment preceeded by register-register move
12780 //   peepmatch ( incI_rReg movI );
12781 //   // require that the destination register of the increment
12782 //   // match the destination register of the move
12783 //   peepconstraint ( 0.dst == 1.dst );
12784 //   // construct a replacement instruction that sets
12785 //   // the destination to ( move's source register + one )
12786 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12787 // %}
12788 //
12789 
12790 // Implementation no longer uses movX instructions since
12791 // machine-independent system no longer uses CopyX nodes.
12792 //
12793 // peephole
12794 // %{
12795 //   peepmatch (incI_rReg movI);
12796 //   peepconstraint (0.dst == 1.dst);
12797 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12798 // %}
12799 
12800 // peephole
12801 // %{
12802 //   peepmatch (decI_rReg movI);
12803 //   peepconstraint (0.dst == 1.dst);
12804 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12805 // %}
12806 
12807 // peephole
12808 // %{
12809 //   peepmatch (addI_rReg_imm movI);
12810 //   peepconstraint (0.dst == 1.dst);
12811 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12812 // %}
12813 
12814 // peephole
12815 // %{
12816 //   peepmatch (incL_rReg movL);
12817 //   peepconstraint (0.dst == 1.dst);
12818 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12819 // %}
12820 
12821 // peephole
12822 // %{
12823 //   peepmatch (decL_rReg movL);
12824 //   peepconstraint (0.dst == 1.dst);
12825 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12826 // %}
12827 
12828 // peephole
12829 // %{
12830 //   peepmatch (addL_rReg_imm movL);
12831 //   peepconstraint (0.dst == 1.dst);
12832 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12833 // %}
12834 
12835 // peephole
12836 // %{
12837 //   peepmatch (addP_rReg_imm movP);
12838 //   peepconstraint (0.dst == 1.dst);
12839 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12840 // %}
12841 
12842 // // Change load of spilled value to only a spill
12843 // instruct storeI(memory mem, rRegI src)
12844 // %{
12845 //   match(Set mem (StoreI mem src));
12846 // %}
12847 //
12848 // instruct loadI(rRegI dst, memory mem)
12849 // %{
12850 //   match(Set dst (LoadI mem));
12851 // %}
12852 //
12853 
12854 peephole
12855 %{
12856   peepmatch (loadI storeI);
12857   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12858   peepreplace (storeI(1.mem 1.mem 1.src));
12859 %}
12860 
12861 peephole
12862 %{
12863   peepmatch (loadL storeL);
12864   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12865   peepreplace (storeL(1.mem 1.mem 1.src));
12866 %}
12867 
12868 //----------SMARTSPILL RULES---------------------------------------------------
12869 // These must follow all instruction definitions as they use the names
12870 // defined in the instructions definitions.