1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 // !!!!! Special hack to get all types of calls to specify the byte offset
 555 //       from the start of the call to the point where the return address
 556 //       will point.
 557 int MachCallStaticJavaNode::ret_addr_offset()
 558 {
 559   return 5; // 5 bytes from start of call to where return address points
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   return 15; // 15 bytes from start of call to where return address points
 565 }
 566 
 567 // In os_cpu .ad file
 568 // int MachCallRuntimeNode::ret_addr_offset()
 569 
 570 // Indicate if the safepoint node needs the polling page as an input.
 571 // Since amd64 does not have absolute addressing but RIP-relative
 572 // addressing and the polling page is within 2G, it doesn't.
 573 bool SafePointNode::needs_polling_address_input()
 574 {
 575   return false;
 576 }
 577 
 578 //
 579 // Compute padding required for nodes which need alignment
 580 //
 581 
 582 // The address of the call instruction needs to be 4-byte aligned to
 583 // ensure that it does not span a cache line so that it can be patched.
 584 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 585 {
 586   current_offset += 1; // skip call opcode byte
 587   return round_to(current_offset, alignment_required()) - current_offset;
 588 }
 589 
 590 // The address of the call instruction needs to be 4-byte aligned to
 591 // ensure that it does not span a cache line so that it can be patched.
 592 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 593 {
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return round_to(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 #ifndef PRODUCT
 599 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 600 {
 601   st->print("INT3");
 602 }
 603 #endif
 604 
 605 // EMIT_RM()
 606 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 607 {
 608   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 609   *(cbuf.code_end()) = c;
 610   cbuf.set_code_end(cbuf.code_end() + 1);
 611 }
 612 
 613 // EMIT_CC()
 614 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 615 {
 616   unsigned char c = (unsigned char) (f1 | f2);
 617   *(cbuf.code_end()) = c;
 618   cbuf.set_code_end(cbuf.code_end() + 1);
 619 }
 620 
 621 // EMIT_OPCODE()
 622 void emit_opcode(CodeBuffer &cbuf, int code)
 623 {
 624   *(cbuf.code_end()) = (unsigned char) code;
 625   cbuf.set_code_end(cbuf.code_end() + 1);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8)
 638 {
 639   *(cbuf.code_end()) = (unsigned char) d8;
 640   cbuf.set_code_end(cbuf.code_end() + 1);
 641 }
 642 
 643 // EMIT_D16()
 644 void emit_d16(CodeBuffer &cbuf, int d16)
 645 {
 646   *((short *)(cbuf.code_end())) = d16;
 647   cbuf.set_code_end(cbuf.code_end() + 2);
 648 }
 649 
 650 // EMIT_D32()
 651 void emit_d32(CodeBuffer &cbuf, int d32)
 652 {
 653   *((int *)(cbuf.code_end())) = d32;
 654   cbuf.set_code_end(cbuf.code_end() + 4);
 655 }
 656 
 657 // EMIT_D64()
 658 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 659 {
 660   *((int64_t*) (cbuf.code_end())) = d64;
 661   cbuf.set_code_end(cbuf.code_end() + 8);
 662 }
 663 
 664 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 665 void emit_d32_reloc(CodeBuffer& cbuf,
 666                     int d32,
 667                     relocInfo::relocType reloc,
 668                     int format)
 669 {
 670   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 671   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 672 
 673   *((int*) (cbuf.code_end())) = d32;
 674   cbuf.set_code_end(cbuf.code_end() + 4);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf,
 679                     int d32,
 680                     RelocationHolder const& rspec,
 681                     int format)
 682 {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 686     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 687   }
 688 #endif
 689   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 690 
 691   *((int* )(cbuf.code_end())) = d32;
 692   cbuf.set_code_end(cbuf.code_end() + 4);
 693 }
 694 
 695 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 696   address next_ip = cbuf.code_end() + 4;
 697   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 698                  external_word_Relocation::spec(addr),
 699                  RELOC_DISP32);
 700 }
 701 
 702 
 703 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 704 void emit_d64_reloc(CodeBuffer& cbuf,
 705                     int64_t d64,
 706                     relocInfo::relocType reloc,
 707                     int format)
 708 {
 709   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 710 
 711   *((int64_t*) (cbuf.code_end())) = d64;
 712   cbuf.set_code_end(cbuf.code_end() + 8);
 713 }
 714 
 715 // emit 64 bit value and construct relocation entry from RelocationHolder
 716 void emit_d64_reloc(CodeBuffer& cbuf,
 717                     int64_t d64,
 718                     RelocationHolder const& rspec,
 719                     int format)
 720 {
 721 #ifdef ASSERT
 722   if (rspec.reloc()->type() == relocInfo::oop_type &&
 723       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 724     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 725            "cannot embed scavengable oops in code");
 726   }
 727 #endif
 728   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 729 
 730   *((int64_t*) (cbuf.code_end())) = d64;
 731   cbuf.set_code_end(cbuf.code_end() + 8);
 732 }
 733 
 734 // Access stack slot for load or store
 735 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 736 {
 737   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 738   if (-0x80 <= disp && disp < 0x80) {
 739     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 740     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 741     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 742   } else {
 743     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 744     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 745     emit_d32(cbuf, disp);     // Displacement // R/M byte
 746   }
 747 }
 748 
 749    // rRegI ereg, memory mem) %{    // emit_reg_mem
 750 void encode_RegMem(CodeBuffer &cbuf,
 751                    int reg,
 752                    int base, int index, int scale, int disp, bool disp_is_oop)
 753 {
 754   assert(!disp_is_oop, "cannot have disp");
 755   int regenc = reg & 7;
 756   int baseenc = base & 7;
 757   int indexenc = index & 7;
 758 
 759   // There is no index & no scale, use form without SIB byte
 760   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 761     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 762     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 763       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 764     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 765       // If 8-bit displacement, mode 0x1
 766       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 767       emit_d8(cbuf, disp);
 768     } else {
 769       // If 32-bit displacement
 770       if (base == -1) { // Special flag for absolute address
 771         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 772         if (disp_is_oop) {
 773           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 774         } else {
 775           emit_d32(cbuf, disp);
 776         }
 777       } else {
 778         // Normal base + offset
 779         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 780         if (disp_is_oop) {
 781           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 782         } else {
 783           emit_d32(cbuf, disp);
 784         }
 785       }
 786     }
 787   } else {
 788     // Else, encode with the SIB byte
 789     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 790     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 791       // If no displacement
 792       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 793       emit_rm(cbuf, scale, indexenc, baseenc);
 794     } else {
 795       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 796         // If 8-bit displacement, mode 0x1
 797         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 798         emit_rm(cbuf, scale, indexenc, baseenc);
 799         emit_d8(cbuf, disp);
 800       } else {
 801         // If 32-bit displacement
 802         if (base == 0x04 ) {
 803           emit_rm(cbuf, 0x2, regenc, 0x4);
 804           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 805         } else {
 806           emit_rm(cbuf, 0x2, regenc, 0x4);
 807           emit_rm(cbuf, scale, indexenc, baseenc); // *
 808         }
 809         if (disp_is_oop) {
 810           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 811         } else {
 812           emit_d32(cbuf, disp);
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 820 {
 821   if (dstenc != srcenc) {
 822     if (dstenc < 8) {
 823       if (srcenc >= 8) {
 824         emit_opcode(cbuf, Assembler::REX_B);
 825         srcenc -= 8;
 826       }
 827     } else {
 828       if (srcenc < 8) {
 829         emit_opcode(cbuf, Assembler::REX_R);
 830       } else {
 831         emit_opcode(cbuf, Assembler::REX_RB);
 832         srcenc -= 8;
 833       }
 834       dstenc -= 8;
 835     }
 836 
 837     emit_opcode(cbuf, 0x8B);
 838     emit_rm(cbuf, 0x3, dstenc, srcenc);
 839   }
 840 }
 841 
 842 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 843   if( dst_encoding == src_encoding ) {
 844     // reg-reg copy, use an empty encoding
 845   } else {
 846     MacroAssembler _masm(&cbuf);
 847 
 848     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 849   }
 850 }
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq\trbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                   "# Safepoint: poll for GC");
1002     st->print("\t");
1003   }
1004 }
1005 #endif
1006 
1007 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008 {
1009   Compile* C = ra_->C;
1010   int framesize = C->frame_slots() << LogBytesPerInt;
1011   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012   // Remove word for return adr already pushed
1013   // and RBP
1014   framesize -= 2*wordSize;
1015 
1016   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017 
1018   if (framesize) {
1019     emit_opcode(cbuf, Assembler::REX_W);
1020     if (framesize < 0x80) {
1021       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023       emit_d8(cbuf, framesize);
1024     } else {
1025       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027       emit_d32(cbuf, framesize);
1028     }
1029   }
1030 
1031   // popq rbp
1032   emit_opcode(cbuf, 0x58 | RBP_enc);
1033 
1034   if (do_polling() && C->is_method_compilation()) {
1035     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036     // XXX reg_mem doesn't support RIP-relative addressing yet
1037     cbuf.set_inst_mark();
1038     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039     emit_opcode(cbuf, 0x85); // testl
1040     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041     // cbuf.inst_mark() is beginning of instruction
1042     emit_d32_reloc(cbuf, os::get_polling_page());
1043 //                    relocInfo::poll_return_type,
1044   }
1045 }
1046 
1047 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048 {
1049   Compile* C = ra_->C;
1050   int framesize = C->frame_slots() << LogBytesPerInt;
1051   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052   // Remove word for return adr already pushed
1053   // and RBP
1054   framesize -= 2*wordSize;
1055 
1056   uint size = 0;
1057 
1058   if (do_polling() && C->is_method_compilation()) {
1059     size += 6;
1060   }
1061 
1062   // count popq rbp
1063   size++;
1064 
1065   if (framesize) {
1066     if (framesize < 0x80) {
1067       size += 4;
1068     } else if (framesize) {
1069       size += 7;
1070     }
1071   }
1072 
1073   return size;
1074 }
1075 
1076 int MachEpilogNode::reloc() const
1077 {
1078   return 2; // a large enough number
1079 }
1080 
1081 const Pipeline* MachEpilogNode::pipeline() const
1082 {
1083   return MachNode::pipeline_class();
1084 }
1085 
1086 int MachEpilogNode::safepoint_offset() const
1087 {
1088   return 0;
1089 }
1090 
1091 //=============================================================================
1092 
1093 enum RC {
1094   rc_bad,
1095   rc_int,
1096   rc_float,
1097   rc_stack
1098 };
1099 
1100 static enum RC rc_class(OptoReg::Name reg)
1101 {
1102   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103 
1104   if (OptoReg::is_stack(reg)) return rc_stack;
1105 
1106   VMReg r = OptoReg::as_VMReg(reg);
1107 
1108   if (r->is_Register()) return rc_int;
1109 
1110   assert(r->is_XMMRegister(), "must be");
1111   return rc_float;
1112 }
1113 
1114 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                        PhaseRegAlloc* ra_,
1116                                        bool do_size,
1117                                        outputStream* st) const
1118 {
1119 
1120   // Get registers to move
1121   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123   OptoReg::Name dst_second = ra_->get_reg_second(this);
1124   OptoReg::Name dst_first = ra_->get_reg_first(this);
1125 
1126   enum RC src_second_rc = rc_class(src_second);
1127   enum RC src_first_rc = rc_class(src_first);
1128   enum RC dst_second_rc = rc_class(dst_second);
1129   enum RC dst_first_rc = rc_class(dst_first);
1130 
1131   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132          "must move at least 1 register" );
1133 
1134   if (src_first == dst_first && src_second == dst_second) {
1135     // Self copy, no move
1136     return 0;
1137   } else if (src_first_rc == rc_stack) {
1138     // mem ->
1139     if (dst_first_rc == rc_stack) {
1140       // mem -> mem
1141       assert(src_second != dst_first, "overlap");
1142       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144         // 64-bit
1145         int src_offset = ra_->reg2offset(src_first);
1146         int dst_offset = ra_->reg2offset(dst_first);
1147         if (cbuf) {
1148           emit_opcode(*cbuf, 0xFF);
1149           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150 
1151           emit_opcode(*cbuf, 0x8F);
1152           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153 
1154 #ifndef PRODUCT
1155         } else if (!do_size) {
1156           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                      "popq    [rsp + #%d]",
1158                      src_offset,
1159                      dst_offset);
1160 #endif
1161         }
1162         return
1163           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165       } else {
1166         // 32-bit
1167         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169         // No pushl/popl, so:
1170         int src_offset = ra_->reg2offset(src_first);
1171         int dst_offset = ra_->reg2offset(dst_first);
1172         if (cbuf) {
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x89);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179           emit_opcode(*cbuf, 0x8B);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, src_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, 0x89);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, dst_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, Assembler::REX_W);
1192           emit_opcode(*cbuf, 0x8B);
1193           emit_opcode(*cbuf, 0x44);
1194           emit_opcode(*cbuf, 0x24);
1195           emit_opcode(*cbuf, 0xF8);
1196 
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                      "movl    rax, [rsp + #%d]\n\t"
1201                      "movl    [rsp + #%d], rax\n\t"
1202                      "movq    rax, [rsp - #8]",
1203                      src_offset,
1204                      dst_offset);
1205 #endif
1206         }
1207         return
1208           5 + // movq
1209           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211           5; // movq
1212       }
1213     } else if (dst_first_rc == rc_int) {
1214       // mem -> gpr
1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217         // 64-bit
1218         int offset = ra_->reg2offset(src_first);
1219         if (cbuf) {
1220           if (Matcher::_regEncode[dst_first] < 8) {
1221             emit_opcode(*cbuf, Assembler::REX_W);
1222           } else {
1223             emit_opcode(*cbuf, Assembler::REX_WR);
1224           }
1225           emit_opcode(*cbuf, 0x8B);
1226           encode_RegMem(*cbuf,
1227                         Matcher::_regEncode[dst_first],
1228                         RSP_enc, 0x4, 0, offset,
1229                         false);
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           st->print("movq    %s, [rsp + #%d]\t# spill",
1233                      Matcher::regName[dst_first],
1234                      offset);
1235 #endif
1236         }
1237         return
1238           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239       } else {
1240         // 32-bit
1241         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243         int offset = ra_->reg2offset(src_first);
1244         if (cbuf) {
1245           if (Matcher::_regEncode[dst_first] >= 8) {
1246             emit_opcode(*cbuf, Assembler::REX_R);
1247           }
1248           emit_opcode(*cbuf, 0x8B);
1249           encode_RegMem(*cbuf,
1250                         Matcher::_regEncode[dst_first],
1251                         RSP_enc, 0x4, 0, offset,
1252                         false);
1253 #ifndef PRODUCT
1254         } else if (!do_size) {
1255           st->print("movl    %s, [rsp + #%d]\t# spill",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260         return
1261           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262           ((Matcher::_regEncode[dst_first] < 8)
1263            ? 3
1264            : 4); // REX
1265       }
1266     } else if (dst_first_rc == rc_float) {
1267       // mem-> xmm
1268       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270         // 64-bit
1271         int offset = ra_->reg2offset(src_first);
1272         if (cbuf) {
1273           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274           if (Matcher::_regEncode[dst_first] >= 8) {
1275             emit_opcode(*cbuf, Assembler::REX_R);
1276           }
1277           emit_opcode(*cbuf, 0x0F);
1278           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279           encode_RegMem(*cbuf,
1280                         Matcher::_regEncode[dst_first],
1281                         RSP_enc, 0x4, 0, offset,
1282                         false);
1283 #ifndef PRODUCT
1284         } else if (!do_size) {
1285           st->print("%s  %s, [rsp + #%d]\t# spill",
1286                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                      Matcher::regName[dst_first],
1288                      offset);
1289 #endif
1290         }
1291         return
1292           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293           ((Matcher::_regEncode[dst_first] < 8)
1294            ? 5
1295            : 6); // REX
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(src_first);
1301         if (cbuf) {
1302           emit_opcode(*cbuf, 0xF3);
1303           if (Matcher::_regEncode[dst_first] >= 8) {
1304             emit_opcode(*cbuf, Assembler::REX_R);
1305           }
1306           emit_opcode(*cbuf, 0x0F);
1307           emit_opcode(*cbuf, 0x10);
1308           encode_RegMem(*cbuf,
1309                         Matcher::_regEncode[dst_first],
1310                         RSP_enc, 0x4, 0, offset,
1311                         false);
1312 #ifndef PRODUCT
1313         } else if (!do_size) {
1314           st->print("movss   %s, [rsp + #%d]\t# spill",
1315                      Matcher::regName[dst_first],
1316                      offset);
1317 #endif
1318         }
1319         return
1320           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321           ((Matcher::_regEncode[dst_first] < 8)
1322            ? 5
1323            : 6); // REX
1324       }
1325     }
1326   } else if (src_first_rc == rc_int) {
1327     // gpr ->
1328     if (dst_first_rc == rc_stack) {
1329       // gpr -> mem
1330       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332         // 64-bit
1333         int offset = ra_->reg2offset(dst_first);
1334         if (cbuf) {
1335           if (Matcher::_regEncode[src_first] < 8) {
1336             emit_opcode(*cbuf, Assembler::REX_W);
1337           } else {
1338             emit_opcode(*cbuf, Assembler::REX_WR);
1339           }
1340           emit_opcode(*cbuf, 0x89);
1341           encode_RegMem(*cbuf,
1342                         Matcher::_regEncode[src_first],
1343                         RSP_enc, 0x4, 0, offset,
1344                         false);
1345 #ifndef PRODUCT
1346         } else if (!do_size) {
1347           st->print("movq    [rsp + #%d], %s\t# spill",
1348                      offset,
1349                      Matcher::regName[src_first]);
1350 #endif
1351         }
1352         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353       } else {
1354         // 32-bit
1355         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357         int offset = ra_->reg2offset(dst_first);
1358         if (cbuf) {
1359           if (Matcher::_regEncode[src_first] >= 8) {
1360             emit_opcode(*cbuf, Assembler::REX_R);
1361           }
1362           emit_opcode(*cbuf, 0x89);
1363           encode_RegMem(*cbuf,
1364                         Matcher::_regEncode[src_first],
1365                         RSP_enc, 0x4, 0, offset,
1366                         false);
1367 #ifndef PRODUCT
1368         } else if (!do_size) {
1369           st->print("movl    [rsp + #%d], %s\t# spill",
1370                      offset,
1371                      Matcher::regName[src_first]);
1372 #endif
1373         }
1374         return
1375           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376           ((Matcher::_regEncode[src_first] < 8)
1377            ? 3
1378            : 4); // REX
1379       }
1380     } else if (dst_first_rc == rc_int) {
1381       // gpr -> gpr
1382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384         // 64-bit
1385         if (cbuf) {
1386           if (Matcher::_regEncode[dst_first] < 8) {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_W);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WB);
1391             }
1392           } else {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_WR);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WRB);
1397             }
1398           }
1399           emit_opcode(*cbuf, 0x8B);
1400           emit_rm(*cbuf, 0x3,
1401                   Matcher::_regEncode[dst_first] & 7,
1402                   Matcher::_regEncode[src_first] & 7);
1403 #ifndef PRODUCT
1404         } else if (!do_size) {
1405           st->print("movq    %s, %s\t# spill",
1406                      Matcher::regName[dst_first],
1407                      Matcher::regName[src_first]);
1408 #endif
1409         }
1410         return 3; // REX
1411       } else {
1412         // 32-bit
1413         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415         if (cbuf) {
1416           if (Matcher::_regEncode[dst_first] < 8) {
1417             if (Matcher::_regEncode[src_first] >= 8) {
1418               emit_opcode(*cbuf, Assembler::REX_B);
1419             }
1420           } else {
1421             if (Matcher::_regEncode[src_first] < 8) {
1422               emit_opcode(*cbuf, Assembler::REX_R);
1423             } else {
1424               emit_opcode(*cbuf, Assembler::REX_RB);
1425             }
1426           }
1427           emit_opcode(*cbuf, 0x8B);
1428           emit_rm(*cbuf, 0x3,
1429                   Matcher::_regEncode[dst_first] & 7,
1430                   Matcher::_regEncode[src_first] & 7);
1431 #ifndef PRODUCT
1432         } else if (!do_size) {
1433           st->print("movl    %s, %s\t# spill",
1434                      Matcher::regName[dst_first],
1435                      Matcher::regName[src_first]);
1436 #endif
1437         }
1438         return
1439           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440           ? 2
1441           : 3; // REX
1442       }
1443     } else if (dst_first_rc == rc_float) {
1444       // gpr -> xmm
1445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447         // 64-bit
1448         if (cbuf) {
1449           emit_opcode(*cbuf, 0x66);
1450           if (Matcher::_regEncode[dst_first] < 8) {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_W);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WB);
1455             }
1456           } else {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_WR);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WRB);
1461             }
1462           }
1463           emit_opcode(*cbuf, 0x0F);
1464           emit_opcode(*cbuf, 0x6E);
1465           emit_rm(*cbuf, 0x3,
1466                   Matcher::_regEncode[dst_first] & 7,
1467                   Matcher::_regEncode[src_first] & 7);
1468 #ifndef PRODUCT
1469         } else if (!do_size) {
1470           st->print("movdq   %s, %s\t# spill",
1471                      Matcher::regName[dst_first],
1472                      Matcher::regName[src_first]);
1473 #endif
1474         }
1475         return 5; // REX
1476       } else {
1477         // 32-bit
1478         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480         if (cbuf) {
1481           emit_opcode(*cbuf, 0x66);
1482           if (Matcher::_regEncode[dst_first] < 8) {
1483             if (Matcher::_regEncode[src_first] >= 8) {
1484               emit_opcode(*cbuf, Assembler::REX_B);
1485             }
1486           } else {
1487             if (Matcher::_regEncode[src_first] < 8) {
1488               emit_opcode(*cbuf, Assembler::REX_R);
1489             } else {
1490               emit_opcode(*cbuf, Assembler::REX_RB);
1491             }
1492           }
1493           emit_opcode(*cbuf, 0x0F);
1494           emit_opcode(*cbuf, 0x6E);
1495           emit_rm(*cbuf, 0x3,
1496                   Matcher::_regEncode[dst_first] & 7,
1497                   Matcher::_regEncode[src_first] & 7);
1498 #ifndef PRODUCT
1499         } else if (!do_size) {
1500           st->print("movdl   %s, %s\t# spill",
1501                      Matcher::regName[dst_first],
1502                      Matcher::regName[src_first]);
1503 #endif
1504         }
1505         return
1506           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507           ? 4
1508           : 5; // REX
1509       }
1510     }
1511   } else if (src_first_rc == rc_float) {
1512     // xmm ->
1513     if (dst_first_rc == rc_stack) {
1514       // xmm -> mem
1515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517         // 64-bit
1518         int offset = ra_->reg2offset(dst_first);
1519         if (cbuf) {
1520           emit_opcode(*cbuf, 0xF2);
1521           if (Matcher::_regEncode[src_first] >= 8) {
1522               emit_opcode(*cbuf, Assembler::REX_R);
1523           }
1524           emit_opcode(*cbuf, 0x0F);
1525           emit_opcode(*cbuf, 0x11);
1526           encode_RegMem(*cbuf,
1527                         Matcher::_regEncode[src_first],
1528                         RSP_enc, 0x4, 0, offset,
1529                         false);
1530 #ifndef PRODUCT
1531         } else if (!do_size) {
1532           st->print("movsd   [rsp + #%d], %s\t# spill",
1533                      offset,
1534                      Matcher::regName[src_first]);
1535 #endif
1536         }
1537         return
1538           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539           ((Matcher::_regEncode[src_first] < 8)
1540            ? 5
1541            : 6); // REX
1542       } else {
1543         // 32-bit
1544         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546         int offset = ra_->reg2offset(dst_first);
1547         if (cbuf) {
1548           emit_opcode(*cbuf, 0xF3);
1549           if (Matcher::_regEncode[src_first] >= 8) {
1550               emit_opcode(*cbuf, Assembler::REX_R);
1551           }
1552           emit_opcode(*cbuf, 0x0F);
1553           emit_opcode(*cbuf, 0x11);
1554           encode_RegMem(*cbuf,
1555                         Matcher::_regEncode[src_first],
1556                         RSP_enc, 0x4, 0, offset,
1557                         false);
1558 #ifndef PRODUCT
1559         } else if (!do_size) {
1560           st->print("movss   [rsp + #%d], %s\t# spill",
1561                      offset,
1562                      Matcher::regName[src_first]);
1563 #endif
1564         }
1565         return
1566           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567           ((Matcher::_regEncode[src_first] < 8)
1568            ? 5
1569            : 6); // REX
1570       }
1571     } else if (dst_first_rc == rc_int) {
1572       // xmm -> gpr
1573       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575         // 64-bit
1576         if (cbuf) {
1577           emit_opcode(*cbuf, 0x66);
1578           if (Matcher::_regEncode[dst_first] < 8) {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_W);
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583             }
1584           } else {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WRB);
1589             }
1590           }
1591           emit_opcode(*cbuf, 0x0F);
1592           emit_opcode(*cbuf, 0x7E);
1593           emit_rm(*cbuf, 0x3,
1594                   Matcher::_regEncode[dst_first] & 7,
1595                   Matcher::_regEncode[src_first] & 7);
1596 #ifndef PRODUCT
1597         } else if (!do_size) {
1598           st->print("movdq   %s, %s\t# spill",
1599                      Matcher::regName[dst_first],
1600                      Matcher::regName[src_first]);
1601 #endif
1602         }
1603         return 5; // REX
1604       } else {
1605         // 32-bit
1606         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608         if (cbuf) {
1609           emit_opcode(*cbuf, 0x66);
1610           if (Matcher::_regEncode[dst_first] < 8) {
1611             if (Matcher::_regEncode[src_first] >= 8) {
1612               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613             }
1614           } else {
1615             if (Matcher::_regEncode[src_first] < 8) {
1616               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617             } else {
1618               emit_opcode(*cbuf, Assembler::REX_RB);
1619             }
1620           }
1621           emit_opcode(*cbuf, 0x0F);
1622           emit_opcode(*cbuf, 0x7E);
1623           emit_rm(*cbuf, 0x3,
1624                   Matcher::_regEncode[dst_first] & 7,
1625                   Matcher::_regEncode[src_first] & 7);
1626 #ifndef PRODUCT
1627         } else if (!do_size) {
1628           st->print("movdl   %s, %s\t# spill",
1629                      Matcher::regName[dst_first],
1630                      Matcher::regName[src_first]);
1631 #endif
1632         }
1633         return
1634           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635           ? 4
1636           : 5; // REX
1637       }
1638     } else if (dst_first_rc == rc_float) {
1639       // xmm -> xmm
1640       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642         // 64-bit
1643         if (cbuf) {
1644           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645           if (Matcher::_regEncode[dst_first] < 8) {
1646             if (Matcher::_regEncode[src_first] >= 8) {
1647               emit_opcode(*cbuf, Assembler::REX_B);
1648             }
1649           } else {
1650             if (Matcher::_regEncode[src_first] < 8) {
1651               emit_opcode(*cbuf, Assembler::REX_R);
1652             } else {
1653               emit_opcode(*cbuf, Assembler::REX_RB);
1654             }
1655           }
1656           emit_opcode(*cbuf, 0x0F);
1657           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658           emit_rm(*cbuf, 0x3,
1659                   Matcher::_regEncode[dst_first] & 7,
1660                   Matcher::_regEncode[src_first] & 7);
1661 #ifndef PRODUCT
1662         } else if (!do_size) {
1663           st->print("%s  %s, %s\t# spill",
1664                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                      Matcher::regName[dst_first],
1666                      Matcher::regName[src_first]);
1667 #endif
1668         }
1669         return
1670           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671           ? 4
1672           : 5; // REX
1673       } else {
1674         // 32-bit
1675         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677         if (cbuf) {
1678           if (!UseXmmRegToRegMoveAll)
1679             emit_opcode(*cbuf, 0xF3);
1680           if (Matcher::_regEncode[dst_first] < 8) {
1681             if (Matcher::_regEncode[src_first] >= 8) {
1682               emit_opcode(*cbuf, Assembler::REX_B);
1683             }
1684           } else {
1685             if (Matcher::_regEncode[src_first] < 8) {
1686               emit_opcode(*cbuf, Assembler::REX_R);
1687             } else {
1688               emit_opcode(*cbuf, Assembler::REX_RB);
1689             }
1690           }
1691           emit_opcode(*cbuf, 0x0F);
1692           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693           emit_rm(*cbuf, 0x3,
1694                   Matcher::_regEncode[dst_first] & 7,
1695                   Matcher::_regEncode[src_first] & 7);
1696 #ifndef PRODUCT
1697         } else if (!do_size) {
1698           st->print("%s  %s, %s\t# spill",
1699                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                      Matcher::regName[dst_first],
1701                      Matcher::regName[src_first]);
1702 #endif
1703         }
1704         return
1705           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708       }
1709     }
1710   }
1711 
1712   assert(0," foo ");
1713   Unimplemented();
1714 
1715   return 0;
1716 }
1717 
1718 #ifndef PRODUCT
1719 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720 {
1721   implementation(NULL, ra_, false, st);
1722 }
1723 #endif
1724 
1725 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726 {
1727   implementation(&cbuf, ra_, false, NULL);
1728 }
1729 
1730 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731 {
1732   return implementation(NULL, ra_, true, NULL);
1733 }
1734 
1735 //=============================================================================
1736 #ifndef PRODUCT
1737 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738 {
1739   st->print("nop \t# %d bytes pad for loops and calls", _count);
1740 }
1741 #endif
1742 
1743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744 {
1745   MacroAssembler _masm(&cbuf);
1746   __ nop(_count);
1747 }
1748 
1749 uint MachNopNode::size(PhaseRegAlloc*) const
1750 {
1751   return _count;
1752 }
1753 
1754 
1755 //=============================================================================
1756 #ifndef PRODUCT
1757 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758 {
1759   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760   int reg = ra_->get_reg_first(this);
1761   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762             Matcher::regName[reg], offset);
1763 }
1764 #endif
1765 
1766 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767 {
1768   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769   int reg = ra_->get_encode(this);
1770   if (offset >= 0x80) {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d32(cbuf, offset);
1776   } else {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d8(cbuf, offset);
1782   }
1783 }
1784 
1785 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786 {
1787   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788   return (offset < 0x80) ? 5 : 8; // REX
1789 }
1790 
1791 //=============================================================================
1792 
1793 // emit call stub, compiled java to interpreter
1794 void emit_java_to_interp(CodeBuffer& cbuf)
1795 {
1796   // Stub is fixed up when the corresponding call is converted from
1797   // calling compiled code to calling interpreted code.
1798   // movq rbx, 0
1799   // jmp -5 # to self
1800 
1801   address mark = cbuf.inst_mark();  // get mark within main instrs section
1802 
1803   // Note that the code buffer's inst_mark is always relative to insts.
1804   // That's why we must use the macroassembler to generate a stub.
1805   MacroAssembler _masm(&cbuf);
1806 
1807   address base =
1808   __ start_a_stub(Compile::MAX_stubs_size);
1809   if (base == NULL)  return;  // CodeBuffer::expand failed
1810   // static stub relocation stores the instruction address of the call
1811   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812   // static stub relocation also tags the methodOop in the code-stream.
1813   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814   // This is recognized as unresolved by relocs/nativeinst/ic code
1815   __ jump(RuntimeAddress(__ pc()));
1816 
1817   // Update current stubs pointer and restore code_end.
1818   __ end_a_stub();
1819 }
1820 
1821 // size of call stub, compiled java to interpretor
1822 uint size_java_to_interp()
1823 {
1824   return 15;  // movq (1+1+8); jmp (1+4)
1825 }
1826 
1827 // relocation entries for call stub, compiled java to interpretor
1828 uint reloc_java_to_interp()
1829 {
1830   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831 }
1832 
1833 //=============================================================================
1834 #ifndef PRODUCT
1835 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836 {
1837   if (UseCompressedOops) {
1838     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839     if (Universe::narrow_oop_shift() != 0) {
1840       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841     }
1842     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843   } else {
1844     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846   }
1847   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848   st->print_cr("\tnop");
1849   if (!OptoBreakpoint) {
1850     st->print_cr("\tnop");
1851   }
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     masm.load_klass(rscratch1, j_rarg0);
1863     masm.cmpptr(rax, rscratch1);
1864   } else {
1865     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   }
1867 
1868   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869 
1870   /* WARNING these NOPs are critical so that verified entry point is properly
1871      aligned for patching by NativeJump::patch_verified_entry() */
1872   int nops_cnt = 1;
1873   if (!OptoBreakpoint) {
1874     // Leave space for int3
1875      nops_cnt += 1;
1876   }
1877   if (UseCompressedOops) {
1878     // ??? divisible by 4 is aligned?
1879     nops_cnt += 1;
1880   }
1881   masm.nop(nops_cnt);
1882 
1883   assert(cbuf.code_size() - code_size == size(ra_),
1884          "checking code size of inline cache node");
1885 }
1886 
1887 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888 {
1889   if (UseCompressedOops) {
1890     if (Universe::narrow_oop_shift() == 0) {
1891       return OptoBreakpoint ? 15 : 16;
1892     } else {
1893       return OptoBreakpoint ? 19 : 20;
1894     }
1895   } else {
1896     return OptoBreakpoint ? 11 : 12;
1897   }
1898 }
1899 
1900 
1901 //=============================================================================
1902 uint size_exception_handler()
1903 {
1904   // NativeCall instruction size is the same as NativeJump.
1905   // Note that this value is also credited (in output.cpp) to
1906   // the size of the code section.
1907   return NativeJump::instruction_size;
1908 }
1909 
1910 // Emit exception handler code.
1911 int emit_exception_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's inst_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_exception_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923   __ end_a_stub();
1924   return offset;
1925 }
1926 
1927 uint size_deopt_handler()
1928 {
1929   // three 5 byte instructions
1930   return 15;
1931 }
1932 
1933 // Emit deopt handler code.
1934 int emit_deopt_handler(CodeBuffer& cbuf)
1935 {
1936 
1937   // Note that the code buffer's inst_mark is always relative to insts.
1938   // That's why we must use the macroassembler to generate a handler.
1939   MacroAssembler _masm(&cbuf);
1940   address base =
1941   __ start_a_stub(size_deopt_handler());
1942   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943   int offset = __ offset();
1944   address the_pc = (address) __ pc();
1945   Label next;
1946   // push a "the_pc" on the stack without destroying any registers
1947   // as they all may be live.
1948 
1949   // push address of "next"
1950   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951   __ bind(next);
1952   // adjust it so it matches "the_pc"
1953   __ subptr(Address(rsp, 0), __ offset() - offset);
1954   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956   __ end_a_stub();
1957   return offset;
1958 }
1959 
1960 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961   int mark = cbuf.insts()->mark_off();
1962   MacroAssembler _masm(&cbuf);
1963   address double_address = __ double_constant(x);
1964   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965   emit_d32_reloc(cbuf,
1966                  (int) (double_address - cbuf.code_end() - 4),
1967                  internal_word_Relocation::spec(double_address),
1968                  RELOC_DISP32);
1969 }
1970 
1971 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972   int mark = cbuf.insts()->mark_off();
1973   MacroAssembler _masm(&cbuf);
1974   address float_address = __ float_constant(x);
1975   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976   emit_d32_reloc(cbuf,
1977                  (int) (float_address - cbuf.code_end() - 4),
1978                  internal_word_Relocation::spec(float_address),
1979                  RELOC_DISP32);
1980 }
1981 
1982 
1983 const bool Matcher::match_rule_supported(int opcode) {
1984   if (!has_match_rule(opcode))
1985     return false;
1986 
1987   return true;  // Per default match rules are supported.
1988 }
1989 
1990 int Matcher::regnum_to_fpu_offset(int regnum)
1991 {
1992   return regnum - 32; // The FP registers are in the second chunk
1993 }
1994 
1995 // This is UltraSparc specific, true just means we have fast l2f conversion
1996 const bool Matcher::convL2FSupported(void) {
1997   return true;
1998 }
1999 
2000 // Vector width in bytes
2001 const uint Matcher::vector_width_in_bytes(void) {
2002   return 8;
2003 }
2004 
2005 // Vector ideal reg
2006 const uint Matcher::vector_ideal_reg(void) {
2007   return Op_RegD;
2008 }
2009 
2010 // Is this branch offset short enough that a short branch can be used?
2011 //
2012 // NOTE: If the platform does not provide any short branch variants, then
2013 //       this method should return false for offset 0.
2014 bool Matcher::is_short_branch_offset(int rule, int offset) {
2015   // the short version of jmpConUCF2 contains multiple branches,
2016   // making the reach slightly less
2017   if (rule == jmpConUCF2_rule)
2018     return (-126 <= offset && offset <= 125);
2019   return (-128 <= offset && offset <= 127);
2020 }
2021 
2022 const bool Matcher::isSimpleConstant64(jlong value) {
2023   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2024   //return value == (int) value;  // Cf. storeImmL and immL32.
2025 
2026   // Probably always true, even if a temp register is required.
2027   return true;
2028 }
2029 
2030 // The ecx parameter to rep stosq for the ClearArray node is in words.
2031 const bool Matcher::init_array_count_is_in_bytes = false;
2032 
2033 // Threshold size for cleararray.
2034 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2035 
2036 // Should the Matcher clone shifts on addressing modes, expecting them
2037 // to be subsumed into complex addressing expressions or compute them
2038 // into registers?  True for Intel but false for most RISCs
2039 const bool Matcher::clone_shift_expressions = true;
2040 
2041 // Is it better to copy float constants, or load them directly from
2042 // memory?  Intel can load a float constant from a direct address,
2043 // requiring no extra registers.  Most RISCs will have to materialize
2044 // an address into a register first, so they would do better to copy
2045 // the constant from stack.
2046 const bool Matcher::rematerialize_float_constants = true; // XXX
2047 
2048 // If CPU can load and store mis-aligned doubles directly then no
2049 // fixup is needed.  Else we split the double into 2 integer pieces
2050 // and move it piece-by-piece.  Only happens when passing doubles into
2051 // C code as the Java calling convention forces doubles to be aligned.
2052 const bool Matcher::misaligned_doubles_ok = true;
2053 
2054 // No-op on amd64
2055 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2056 
2057 // Advertise here if the CPU requires explicit rounding operations to
2058 // implement the UseStrictFP mode.
2059 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2060 
2061 // Do floats take an entire double register or just half?
2062 const bool Matcher::float_in_double = true;
2063 // Do ints take an entire long register or just half?
2064 const bool Matcher::int_in_long = true;
2065 
2066 // Return whether or not this register is ever used as an argument.
2067 // This function is used on startup to build the trampoline stubs in
2068 // generateOptoStub.  Registers not mentioned will be killed by the VM
2069 // call in the trampoline, and arguments in those registers not be
2070 // available to the callee.
2071 bool Matcher::can_be_java_arg(int reg)
2072 {
2073   return
2074     reg ==  RDI_num || reg ==  RDI_H_num ||
2075     reg ==  RSI_num || reg ==  RSI_H_num ||
2076     reg ==  RDX_num || reg ==  RDX_H_num ||
2077     reg ==  RCX_num || reg ==  RCX_H_num ||
2078     reg ==   R8_num || reg ==   R8_H_num ||
2079     reg ==   R9_num || reg ==   R9_H_num ||
2080     reg ==  R12_num || reg ==  R12_H_num ||
2081     reg == XMM0_num || reg == XMM0_H_num ||
2082     reg == XMM1_num || reg == XMM1_H_num ||
2083     reg == XMM2_num || reg == XMM2_H_num ||
2084     reg == XMM3_num || reg == XMM3_H_num ||
2085     reg == XMM4_num || reg == XMM4_H_num ||
2086     reg == XMM5_num || reg == XMM5_H_num ||
2087     reg == XMM6_num || reg == XMM6_H_num ||
2088     reg == XMM7_num || reg == XMM7_H_num;
2089 }
2090 
2091 bool Matcher::is_spillable_arg(int reg)
2092 {
2093   return can_be_java_arg(reg);
2094 }
2095 
2096 // Register for DIVI projection of divmodI
2097 RegMask Matcher::divI_proj_mask() {
2098   return INT_RAX_REG_mask;
2099 }
2100 
2101 // Register for MODI projection of divmodI
2102 RegMask Matcher::modI_proj_mask() {
2103   return INT_RDX_REG_mask;
2104 }
2105 
2106 // Register for DIVL projection of divmodL
2107 RegMask Matcher::divL_proj_mask() {
2108   return LONG_RAX_REG_mask;
2109 }
2110 
2111 // Register for MODL projection of divmodL
2112 RegMask Matcher::modL_proj_mask() {
2113   return LONG_RDX_REG_mask;
2114 }
2115 
2116 static Address build_address(int b, int i, int s, int d) {
2117   Register index = as_Register(i);
2118   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2119   if (index == rsp) {
2120     index = noreg;
2121     scale = Address::no_scale;
2122   }
2123   Address addr(as_Register(b), index, scale, d);
2124   return addr;
2125 }
2126 
2127 %}
2128 
2129 //----------ENCODING BLOCK-----------------------------------------------------
2130 // This block specifies the encoding classes used by the compiler to
2131 // output byte streams.  Encoding classes are parameterized macros
2132 // used by Machine Instruction Nodes in order to generate the bit
2133 // encoding of the instruction.  Operands specify their base encoding
2134 // interface with the interface keyword.  There are currently
2135 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2136 // COND_INTER.  REG_INTER causes an operand to generate a function
2137 // which returns its register number when queried.  CONST_INTER causes
2138 // an operand to generate a function which returns the value of the
2139 // constant when queried.  MEMORY_INTER causes an operand to generate
2140 // four functions which return the Base Register, the Index Register,
2141 // the Scale Value, and the Offset Value of the operand when queried.
2142 // COND_INTER causes an operand to generate six functions which return
2143 // the encoding code (ie - encoding bits for the instruction)
2144 // associated with each basic boolean condition for a conditional
2145 // instruction.
2146 //
2147 // Instructions specify two basic values for encoding.  Again, a
2148 // function is available to check if the constant displacement is an
2149 // oop. They use the ins_encode keyword to specify their encoding
2150 // classes (which must be a sequence of enc_class names, and their
2151 // parameters, specified in the encoding block), and they use the
2152 // opcode keyword to specify, in order, their primary, secondary, and
2153 // tertiary opcode.  Only the opcode sections which a particular
2154 // instruction needs for encoding need to be specified.
2155 encode %{
2156   // Build emit functions for each basic byte or larger field in the
2157   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2158   // from C++ code in the enc_class source block.  Emit functions will
2159   // live in the main source block for now.  In future, we can
2160   // generalize this by adding a syntax that specifies the sizes of
2161   // fields in an order, so that the adlc can build the emit functions
2162   // automagically
2163 
2164   // Emit primary opcode
2165   enc_class OpcP
2166   %{
2167     emit_opcode(cbuf, $primary);
2168   %}
2169 
2170   // Emit secondary opcode
2171   enc_class OpcS
2172   %{
2173     emit_opcode(cbuf, $secondary);
2174   %}
2175 
2176   // Emit tertiary opcode
2177   enc_class OpcT
2178   %{
2179     emit_opcode(cbuf, $tertiary);
2180   %}
2181 
2182   // Emit opcode directly
2183   enc_class Opcode(immI d8)
2184   %{
2185     emit_opcode(cbuf, $d8$$constant);
2186   %}
2187 
2188   // Emit size prefix
2189   enc_class SizePrefix
2190   %{
2191     emit_opcode(cbuf, 0x66);
2192   %}
2193 
2194   enc_class reg(rRegI reg)
2195   %{
2196     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2197   %}
2198 
2199   enc_class reg_reg(rRegI dst, rRegI src)
2200   %{
2201     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2202   %}
2203 
2204   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2205   %{
2206     emit_opcode(cbuf, $opcode$$constant);
2207     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2208   %}
2209 
2210   enc_class cmpfp_fixup()
2211   %{
2212     // jnp,s exit
2213     emit_opcode(cbuf, 0x7B);
2214     emit_d8(cbuf, 0x0A);
2215 
2216     // pushfq
2217     emit_opcode(cbuf, 0x9C);
2218 
2219     // andq $0xffffff2b, (%rsp)
2220     emit_opcode(cbuf, Assembler::REX_W);
2221     emit_opcode(cbuf, 0x81);
2222     emit_opcode(cbuf, 0x24);
2223     emit_opcode(cbuf, 0x24);
2224     emit_d32(cbuf, 0xffffff2b);
2225 
2226     // popfq
2227     emit_opcode(cbuf, 0x9D);
2228 
2229     // nop (target for branch to avoid branch to branch)
2230     emit_opcode(cbuf, 0x90);
2231   %}
2232 
2233   enc_class cmpfp3(rRegI dst)
2234   %{
2235     int dstenc = $dst$$reg;
2236 
2237     // movl $dst, -1
2238     if (dstenc >= 8) {
2239       emit_opcode(cbuf, Assembler::REX_B);
2240     }
2241     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2242     emit_d32(cbuf, -1);
2243 
2244     // jp,s done
2245     emit_opcode(cbuf, 0x7A);
2246     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2247 
2248     // jb,s done
2249     emit_opcode(cbuf, 0x72);
2250     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2251 
2252     // setne $dst
2253     if (dstenc >= 4) {
2254       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2255     }
2256     emit_opcode(cbuf, 0x0F);
2257     emit_opcode(cbuf, 0x95);
2258     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2259 
2260     // movzbl $dst, $dst
2261     if (dstenc >= 4) {
2262       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2263     }
2264     emit_opcode(cbuf, 0x0F);
2265     emit_opcode(cbuf, 0xB6);
2266     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2267   %}
2268 
2269   enc_class cdql_enc(no_rax_rdx_RegI div)
2270   %{
2271     // Full implementation of Java idiv and irem; checks for
2272     // special case as described in JVM spec., p.243 & p.271.
2273     //
2274     //         normal case                           special case
2275     //
2276     // input : rax: dividend                         min_int
2277     //         reg: divisor                          -1
2278     //
2279     // output: rax: quotient  (= rax idiv reg)       min_int
2280     //         rdx: remainder (= rax irem reg)       0
2281     //
2282     //  Code sequnce:
2283     //
2284     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2285     //    5:   75 07/08                jne    e <normal>
2286     //    7:   33 d2                   xor    %edx,%edx
2287     //  [div >= 8 -> offset + 1]
2288     //  [REX_B]
2289     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2290     //    c:   74 03/04                je     11 <done>
2291     // 000000000000000e <normal>:
2292     //    e:   99                      cltd
2293     //  [div >= 8 -> offset + 1]
2294     //  [REX_B]
2295     //    f:   f7 f9                   idiv   $div
2296     // 0000000000000011 <done>:
2297 
2298     // cmp    $0x80000000,%eax
2299     emit_opcode(cbuf, 0x3d);
2300     emit_d8(cbuf, 0x00);
2301     emit_d8(cbuf, 0x00);
2302     emit_d8(cbuf, 0x00);
2303     emit_d8(cbuf, 0x80);
2304 
2305     // jne    e <normal>
2306     emit_opcode(cbuf, 0x75);
2307     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2308 
2309     // xor    %edx,%edx
2310     emit_opcode(cbuf, 0x33);
2311     emit_d8(cbuf, 0xD2);
2312 
2313     // cmp    $0xffffffffffffffff,%ecx
2314     if ($div$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317     emit_opcode(cbuf, 0x83);
2318     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2319     emit_d8(cbuf, 0xFF);
2320 
2321     // je     11 <done>
2322     emit_opcode(cbuf, 0x74);
2323     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2324 
2325     // <normal>
2326     // cltd
2327     emit_opcode(cbuf, 0x99);
2328 
2329     // idivl (note: must be emitted by the user of this rule)
2330     // <done>
2331   %}
2332 
2333   enc_class cdqq_enc(no_rax_rdx_RegL div)
2334   %{
2335     // Full implementation of Java ldiv and lrem; checks for
2336     // special case as described in JVM spec., p.243 & p.271.
2337     //
2338     //         normal case                           special case
2339     //
2340     // input : rax: dividend                         min_long
2341     //         reg: divisor                          -1
2342     //
2343     // output: rax: quotient  (= rax idiv reg)       min_long
2344     //         rdx: remainder (= rax irem reg)       0
2345     //
2346     //  Code sequnce:
2347     //
2348     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2349     //    7:   00 00 80
2350     //    a:   48 39 d0                cmp    %rdx,%rax
2351     //    d:   75 08                   jne    17 <normal>
2352     //    f:   33 d2                   xor    %edx,%edx
2353     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2354     //   15:   74 05                   je     1c <done>
2355     // 0000000000000017 <normal>:
2356     //   17:   48 99                   cqto
2357     //   19:   48 f7 f9                idiv   $div
2358     // 000000000000001c <done>:
2359 
2360     // mov    $0x8000000000000000,%rdx
2361     emit_opcode(cbuf, Assembler::REX_W);
2362     emit_opcode(cbuf, 0xBA);
2363     emit_d8(cbuf, 0x00);
2364     emit_d8(cbuf, 0x00);
2365     emit_d8(cbuf, 0x00);
2366     emit_d8(cbuf, 0x00);
2367     emit_d8(cbuf, 0x00);
2368     emit_d8(cbuf, 0x00);
2369     emit_d8(cbuf, 0x00);
2370     emit_d8(cbuf, 0x80);
2371 
2372     // cmp    %rdx,%rax
2373     emit_opcode(cbuf, Assembler::REX_W);
2374     emit_opcode(cbuf, 0x39);
2375     emit_d8(cbuf, 0xD0);
2376 
2377     // jne    17 <normal>
2378     emit_opcode(cbuf, 0x75);
2379     emit_d8(cbuf, 0x08);
2380 
2381     // xor    %edx,%edx
2382     emit_opcode(cbuf, 0x33);
2383     emit_d8(cbuf, 0xD2);
2384 
2385     // cmp    $0xffffffffffffffff,$div
2386     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2387     emit_opcode(cbuf, 0x83);
2388     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2389     emit_d8(cbuf, 0xFF);
2390 
2391     // je     1e <done>
2392     emit_opcode(cbuf, 0x74);
2393     emit_d8(cbuf, 0x05);
2394 
2395     // <normal>
2396     // cqto
2397     emit_opcode(cbuf, Assembler::REX_W);
2398     emit_opcode(cbuf, 0x99);
2399 
2400     // idivq (note: must be emitted by the user of this rule)
2401     // <done>
2402   %}
2403 
2404   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2405   enc_class OpcSE(immI imm)
2406   %{
2407     // Emit primary opcode and set sign-extend bit
2408     // Check for 8-bit immediate, and set sign extend bit in opcode
2409     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2410       emit_opcode(cbuf, $primary | 0x02);
2411     } else {
2412       // 32-bit immediate
2413       emit_opcode(cbuf, $primary);
2414     }
2415   %}
2416 
2417   enc_class OpcSErm(rRegI dst, immI imm)
2418   %{
2419     // OpcSEr/m
2420     int dstenc = $dst$$reg;
2421     if (dstenc >= 8) {
2422       emit_opcode(cbuf, Assembler::REX_B);
2423       dstenc -= 8;
2424     }
2425     // Emit primary opcode and set sign-extend bit
2426     // Check for 8-bit immediate, and set sign extend bit in opcode
2427     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2428       emit_opcode(cbuf, $primary | 0x02);
2429     } else {
2430       // 32-bit immediate
2431       emit_opcode(cbuf, $primary);
2432     }
2433     // Emit r/m byte with secondary opcode, after primary opcode.
2434     emit_rm(cbuf, 0x3, $secondary, dstenc);
2435   %}
2436 
2437   enc_class OpcSErm_wide(rRegL dst, immI imm)
2438   %{
2439     // OpcSEr/m
2440     int dstenc = $dst$$reg;
2441     if (dstenc < 8) {
2442       emit_opcode(cbuf, Assembler::REX_W);
2443     } else {
2444       emit_opcode(cbuf, Assembler::REX_WB);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class Con8or32(immI imm)
2460   %{
2461     // Check for 8-bit immediate, and set sign extend bit in opcode
2462     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2463       $$$emit8$imm$$constant;
2464     } else {
2465       // 32-bit immediate
2466       $$$emit32$imm$$constant;
2467     }
2468   %}
2469 
2470   enc_class Lbl(label labl)
2471   %{
2472     // JMP, CALL
2473     Label* l = $labl$$label;
2474     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2475   %}
2476 
2477   enc_class LblShort(label labl)
2478   %{
2479     // JMP, CALL
2480     Label* l = $labl$$label;
2481     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2482     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2483     emit_d8(cbuf, disp);
2484   %}
2485 
2486   enc_class opc2_reg(rRegI dst)
2487   %{
2488     // BSWAP
2489     emit_cc(cbuf, $secondary, $dst$$reg);
2490   %}
2491 
2492   enc_class opc3_reg(rRegI dst)
2493   %{
2494     // BSWAP
2495     emit_cc(cbuf, $tertiary, $dst$$reg);
2496   %}
2497 
2498   enc_class reg_opc(rRegI div)
2499   %{
2500     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2501     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2502   %}
2503 
2504   enc_class Jcc(cmpOp cop, label labl)
2505   %{
2506     // JCC
2507     Label* l = $labl$$label;
2508     $$$emit8$primary;
2509     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2510     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2511   %}
2512 
2513   enc_class JccShort (cmpOp cop, label labl)
2514   %{
2515   // JCC
2516     Label *l = $labl$$label;
2517     emit_cc(cbuf, $primary, $cop$$cmpcode);
2518     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2519     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2520     emit_d8(cbuf, disp);
2521   %}
2522 
2523   enc_class enc_cmov(cmpOp cop)
2524   %{
2525     // CMOV
2526     $$$emit8$primary;
2527     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2528   %}
2529 
2530   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2531   %{
2532     // Invert sense of branch from sense of cmov
2533     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2534     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2535                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2536                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2537     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2538     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2539     if ($dst$$reg < 8) {
2540       if ($src$$reg >= 8) {
2541         emit_opcode(cbuf, Assembler::REX_B);
2542       }
2543     } else {
2544       if ($src$$reg < 8) {
2545         emit_opcode(cbuf, Assembler::REX_R);
2546       } else {
2547         emit_opcode(cbuf, Assembler::REX_RB);
2548       }
2549     }
2550     emit_opcode(cbuf, 0x0F);
2551     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2552     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2553   %}
2554 
2555   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2556   %{
2557     // Invert sense of branch from sense of cmov
2558     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2559     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2560 
2561     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2562     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2563     if ($dst$$reg < 8) {
2564       if ($src$$reg >= 8) {
2565         emit_opcode(cbuf, Assembler::REX_B);
2566       }
2567     } else {
2568       if ($src$$reg < 8) {
2569         emit_opcode(cbuf, Assembler::REX_R);
2570       } else {
2571         emit_opcode(cbuf, Assembler::REX_RB);
2572       }
2573     }
2574     emit_opcode(cbuf, 0x0F);
2575     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2576     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2577   %}
2578 
2579   enc_class enc_PartialSubtypeCheck()
2580   %{
2581     Register Rrdi = as_Register(RDI_enc); // result register
2582     Register Rrax = as_Register(RAX_enc); // super class
2583     Register Rrcx = as_Register(RCX_enc); // killed
2584     Register Rrsi = as_Register(RSI_enc); // sub class
2585     Label miss;
2586     const bool set_cond_codes = true;
2587 
2588     MacroAssembler _masm(&cbuf);
2589     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2590                                      NULL, &miss,
2591                                      /*set_cond_codes:*/ true);
2592     if ($primary) {
2593       __ xorptr(Rrdi, Rrdi);
2594     }
2595     __ bind(miss);
2596   %}
2597 
2598   enc_class Java_To_Interpreter(method meth)
2599   %{
2600     // CALL Java_To_Interpreter
2601     // This is the instruction starting address for relocation info.
2602     cbuf.set_inst_mark();
2603     $$$emit8$primary;
2604     // CALL directly to the runtime
2605     emit_d32_reloc(cbuf,
2606                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2607                    runtime_call_Relocation::spec(),
2608                    RELOC_DISP32);
2609   %}
2610 
2611   enc_class Java_Static_Call(method meth)
2612   %{
2613     // JAVA STATIC CALL
2614     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2615     // determine who we intended to call.
2616     cbuf.set_inst_mark();
2617     $$$emit8$primary;
2618 
2619     if (!_method) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2622                      runtime_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else if (_optimized_virtual) {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2627                      opt_virtual_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     } else {
2630       emit_d32_reloc(cbuf,
2631                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2632                      static_call_Relocation::spec(),
2633                      RELOC_DISP32);
2634     }
2635     if (_method) {
2636       // Emit stub for static call
2637       emit_java_to_interp(cbuf);
2638     }
2639   %}
2640 
2641   enc_class Java_Dynamic_Call(method meth)
2642   %{
2643     // JAVA DYNAMIC CALL
2644     // !!!!!
2645     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2646     // emit_call_dynamic_prologue( cbuf );
2647     cbuf.set_inst_mark();
2648 
2649     // movq rax, -1
2650     emit_opcode(cbuf, Assembler::REX_W);
2651     emit_opcode(cbuf, 0xB8 | RAX_enc);
2652     emit_d64_reloc(cbuf,
2653                    (int64_t) Universe::non_oop_word(),
2654                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2655     address virtual_call_oop_addr = cbuf.inst_mark();
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2657     // who we intended to call.
2658     cbuf.set_inst_mark();
2659     $$$emit8$primary;
2660     emit_d32_reloc(cbuf,
2661                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2662                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2663                    RELOC_DISP32);
2664   %}
2665 
2666   enc_class Java_Compiled_Call(method meth)
2667   %{
2668     // JAVA COMPILED CALL
2669     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2670 
2671     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2672     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2673 
2674     // callq *disp(%rax)
2675     cbuf.set_inst_mark();
2676     $$$emit8$primary;
2677     if (disp < 0x80) {
2678       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2679       emit_d8(cbuf, disp); // Displacement
2680     } else {
2681       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2682       emit_d32(cbuf, disp); // Displacement
2683     }
2684   %}
2685 
2686   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2687   %{
2688     // SAL, SAR, SHR
2689     int dstenc = $dst$$reg;
2690     if (dstenc >= 8) {
2691       emit_opcode(cbuf, Assembler::REX_B);
2692       dstenc -= 8;
2693     }
2694     $$$emit8$primary;
2695     emit_rm(cbuf, 0x3, $secondary, dstenc);
2696     $$$emit8$shift$$constant;
2697   %}
2698 
2699   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2700   %{
2701     // SAL, SAR, SHR
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     $$$emit8$primary;
2710     emit_rm(cbuf, 0x3, $secondary, dstenc);
2711     $$$emit8$shift$$constant;
2712   %}
2713 
2714   enc_class load_immI(rRegI dst, immI src)
2715   %{
2716     int dstenc = $dst$$reg;
2717     if (dstenc >= 8) {
2718       emit_opcode(cbuf, Assembler::REX_B);
2719       dstenc -= 8;
2720     }
2721     emit_opcode(cbuf, 0xB8 | dstenc);
2722     $$$emit32$src$$constant;
2723   %}
2724 
2725   enc_class load_immL(rRegL dst, immL src)
2726   %{
2727     int dstenc = $dst$$reg;
2728     if (dstenc < 8) {
2729       emit_opcode(cbuf, Assembler::REX_W);
2730     } else {
2731       emit_opcode(cbuf, Assembler::REX_WB);
2732       dstenc -= 8;
2733     }
2734     emit_opcode(cbuf, 0xB8 | dstenc);
2735     emit_d64(cbuf, $src$$constant);
2736   %}
2737 
2738   enc_class load_immUL32(rRegL dst, immUL32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immL32(rRegL dst, immL32 src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xC7);
2760     emit_rm(cbuf, 0x03, 0x00, dstenc);
2761     $$$emit32$src$$constant;
2762   %}
2763 
2764   enc_class load_immP31(rRegP dst, immP32 src)
2765   %{
2766     // same as load_immI, but this time we care about zeroes in the high word
2767     int dstenc = $dst$$reg;
2768     if (dstenc >= 8) {
2769       emit_opcode(cbuf, Assembler::REX_B);
2770       dstenc -= 8;
2771     }
2772     emit_opcode(cbuf, 0xB8 | dstenc);
2773     $$$emit32$src$$constant;
2774   %}
2775 
2776   enc_class load_immP(rRegP dst, immP src)
2777   %{
2778     int dstenc = $dst$$reg;
2779     if (dstenc < 8) {
2780       emit_opcode(cbuf, Assembler::REX_W);
2781     } else {
2782       emit_opcode(cbuf, Assembler::REX_WB);
2783       dstenc -= 8;
2784     }
2785     emit_opcode(cbuf, 0xB8 | dstenc);
2786     // This next line should be generated from ADLC
2787     if ($src->constant_is_oop()) {
2788       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2789     } else {
2790       emit_d64(cbuf, $src$$constant);
2791     }
2792   %}
2793 
2794   enc_class load_immF(regF dst, immF con)
2795   %{
2796     // XXX reg_mem doesn't support RIP-relative addressing yet
2797     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798     emit_float_constant(cbuf, $con$$constant);
2799   %}
2800 
2801   enc_class load_immD(regD dst, immD con)
2802   %{
2803     // XXX reg_mem doesn't support RIP-relative addressing yet
2804     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2805     emit_double_constant(cbuf, $con$$constant);
2806   %}
2807 
2808   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2809     emit_opcode(cbuf, 0xF3);
2810     if ($dst$$reg >= 8) {
2811       emit_opcode(cbuf, Assembler::REX_R);
2812     }
2813     emit_opcode(cbuf, 0x0F);
2814     emit_opcode(cbuf, 0x10);
2815     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2816     emit_float_constant(cbuf, $con$$constant);
2817   %}
2818 
2819   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2820     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2821     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2822     if ($dst$$reg >= 8) {
2823       emit_opcode(cbuf, Assembler::REX_R);
2824     }
2825     emit_opcode(cbuf, 0x0F);
2826     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2827     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2828     emit_double_constant(cbuf, $con$$constant);
2829   %}
2830 
2831   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2832   enc_class enc_copy(rRegI dst, rRegI src)
2833   %{
2834     encode_copy(cbuf, $dst$$reg, $src$$reg);
2835   %}
2836 
2837   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2838   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2839     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2840   %}
2841 
2842   enc_class enc_copy_always(rRegI dst, rRegI src)
2843   %{
2844     int srcenc = $src$$reg;
2845     int dstenc = $dst$$reg;
2846 
2847     if (dstenc < 8) {
2848       if (srcenc >= 8) {
2849         emit_opcode(cbuf, Assembler::REX_B);
2850         srcenc -= 8;
2851       }
2852     } else {
2853       if (srcenc < 8) {
2854         emit_opcode(cbuf, Assembler::REX_R);
2855       } else {
2856         emit_opcode(cbuf, Assembler::REX_RB);
2857         srcenc -= 8;
2858       }
2859       dstenc -= 8;
2860     }
2861 
2862     emit_opcode(cbuf, 0x8B);
2863     emit_rm(cbuf, 0x3, dstenc, srcenc);
2864   %}
2865 
2866   enc_class enc_copy_wide(rRegL dst, rRegL src)
2867   %{
2868     int srcenc = $src$$reg;
2869     int dstenc = $dst$$reg;
2870 
2871     if (dstenc != srcenc) {
2872       if (dstenc < 8) {
2873         if (srcenc < 8) {
2874           emit_opcode(cbuf, Assembler::REX_W);
2875         } else {
2876           emit_opcode(cbuf, Assembler::REX_WB);
2877           srcenc -= 8;
2878         }
2879       } else {
2880         if (srcenc < 8) {
2881           emit_opcode(cbuf, Assembler::REX_WR);
2882         } else {
2883           emit_opcode(cbuf, Assembler::REX_WRB);
2884           srcenc -= 8;
2885         }
2886         dstenc -= 8;
2887       }
2888       emit_opcode(cbuf, 0x8B);
2889       emit_rm(cbuf, 0x3, dstenc, srcenc);
2890     }
2891   %}
2892 
2893   enc_class Con32(immI src)
2894   %{
2895     // Output immediate
2896     $$$emit32$src$$constant;
2897   %}
2898 
2899   enc_class Con64(immL src)
2900   %{
2901     // Output immediate
2902     emit_d64($src$$constant);
2903   %}
2904 
2905   enc_class Con32F_as_bits(immF src)
2906   %{
2907     // Output Float immediate bits
2908     jfloat jf = $src$$constant;
2909     jint jf_as_bits = jint_cast(jf);
2910     emit_d32(cbuf, jf_as_bits);
2911   %}
2912 
2913   enc_class Con16(immI src)
2914   %{
2915     // Output immediate
2916     $$$emit16$src$$constant;
2917   %}
2918 
2919   // How is this different from Con32??? XXX
2920   enc_class Con_d32(immI src)
2921   %{
2922     emit_d32(cbuf,$src$$constant);
2923   %}
2924 
2925   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2926     // Output immediate memory reference
2927     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2928     emit_d32(cbuf, 0x00);
2929   %}
2930 
2931   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2932     MacroAssembler masm(&cbuf);
2933 
2934     Register switch_reg = as_Register($switch_val$$reg);
2935     Register dest_reg   = as_Register($dest$$reg);
2936     address table_base  = masm.address_table_constant(_index2label);
2937 
2938     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2939     // to do that and the compiler is using that register as one it can allocate.
2940     // So we build it all by hand.
2941     // Address index(noreg, switch_reg, Address::times_1);
2942     // ArrayAddress dispatch(table, index);
2943 
2944     Address dispatch(dest_reg, switch_reg, Address::times_1);
2945 
2946     masm.lea(dest_reg, InternalAddress(table_base));
2947     masm.jmp(dispatch);
2948   %}
2949 
2950   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2951     MacroAssembler masm(&cbuf);
2952 
2953     Register switch_reg = as_Register($switch_val$$reg);
2954     Register dest_reg   = as_Register($dest$$reg);
2955     address table_base  = masm.address_table_constant(_index2label);
2956 
2957     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2958     // to do that and the compiler is using that register as one it can allocate.
2959     // So we build it all by hand.
2960     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2961     // ArrayAddress dispatch(table, index);
2962 
2963     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2964 
2965     masm.lea(dest_reg, InternalAddress(table_base));
2966     masm.jmp(dispatch);
2967   %}
2968 
2969   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2970     MacroAssembler masm(&cbuf);
2971 
2972     Register switch_reg = as_Register($switch_val$$reg);
2973     Register dest_reg   = as_Register($dest$$reg);
2974     address table_base  = masm.address_table_constant(_index2label);
2975 
2976     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2977     // to do that and the compiler is using that register as one it can allocate.
2978     // So we build it all by hand.
2979     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2980     // ArrayAddress dispatch(table, index);
2981 
2982     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985 
2986   %}
2987 
2988   enc_class lock_prefix()
2989   %{
2990     if (os::is_MP()) {
2991       emit_opcode(cbuf, 0xF0); // lock
2992     }
2993   %}
2994 
2995   enc_class REX_mem(memory mem)
2996   %{
2997     if ($mem$$base >= 8) {
2998       if ($mem$$index < 8) {
2999         emit_opcode(cbuf, Assembler::REX_B);
3000       } else {
3001         emit_opcode(cbuf, Assembler::REX_XB);
3002       }
3003     } else {
3004       if ($mem$$index >= 8) {
3005         emit_opcode(cbuf, Assembler::REX_X);
3006       }
3007     }
3008   %}
3009 
3010   enc_class REX_mem_wide(memory mem)
3011   %{
3012     if ($mem$$base >= 8) {
3013       if ($mem$$index < 8) {
3014         emit_opcode(cbuf, Assembler::REX_WB);
3015       } else {
3016         emit_opcode(cbuf, Assembler::REX_WXB);
3017       }
3018     } else {
3019       if ($mem$$index < 8) {
3020         emit_opcode(cbuf, Assembler::REX_W);
3021       } else {
3022         emit_opcode(cbuf, Assembler::REX_WX);
3023       }
3024     }
3025   %}
3026 
3027   // for byte regs
3028   enc_class REX_breg(rRegI reg)
3029   %{
3030     if ($reg$$reg >= 4) {
3031       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3032     }
3033   %}
3034 
3035   // for byte regs
3036   enc_class REX_reg_breg(rRegI dst, rRegI src)
3037   %{
3038     if ($dst$$reg < 8) {
3039       if ($src$$reg >= 4) {
3040         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3041       }
3042     } else {
3043       if ($src$$reg < 8) {
3044         emit_opcode(cbuf, Assembler::REX_R);
3045       } else {
3046         emit_opcode(cbuf, Assembler::REX_RB);
3047       }
3048     }
3049   %}
3050 
3051   // for byte regs
3052   enc_class REX_breg_mem(rRegI reg, memory mem)
3053   %{
3054     if ($reg$$reg < 8) {
3055       if ($mem$$base < 8) {
3056         if ($mem$$index >= 8) {
3057           emit_opcode(cbuf, Assembler::REX_X);
3058         } else if ($reg$$reg >= 4) {
3059           emit_opcode(cbuf, Assembler::REX);
3060         }
3061       } else {
3062         if ($mem$$index < 8) {
3063           emit_opcode(cbuf, Assembler::REX_B);
3064         } else {
3065           emit_opcode(cbuf, Assembler::REX_XB);
3066         }
3067       }
3068     } else {
3069       if ($mem$$base < 8) {
3070         if ($mem$$index < 8) {
3071           emit_opcode(cbuf, Assembler::REX_R);
3072         } else {
3073           emit_opcode(cbuf, Assembler::REX_RX);
3074         }
3075       } else {
3076         if ($mem$$index < 8) {
3077           emit_opcode(cbuf, Assembler::REX_RB);
3078         } else {
3079           emit_opcode(cbuf, Assembler::REX_RXB);
3080         }
3081       }
3082     }
3083   %}
3084 
3085   enc_class REX_reg(rRegI reg)
3086   %{
3087     if ($reg$$reg >= 8) {
3088       emit_opcode(cbuf, Assembler::REX_B);
3089     }
3090   %}
3091 
3092   enc_class REX_reg_wide(rRegI reg)
3093   %{
3094     if ($reg$$reg < 8) {
3095       emit_opcode(cbuf, Assembler::REX_W);
3096     } else {
3097       emit_opcode(cbuf, Assembler::REX_WB);
3098     }
3099   %}
3100 
3101   enc_class REX_reg_reg(rRegI dst, rRegI src)
3102   %{
3103     if ($dst$$reg < 8) {
3104       if ($src$$reg >= 8) {
3105         emit_opcode(cbuf, Assembler::REX_B);
3106       }
3107     } else {
3108       if ($src$$reg < 8) {
3109         emit_opcode(cbuf, Assembler::REX_R);
3110       } else {
3111         emit_opcode(cbuf, Assembler::REX_RB);
3112       }
3113     }
3114   %}
3115 
3116   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3117   %{
3118     if ($dst$$reg < 8) {
3119       if ($src$$reg < 8) {
3120         emit_opcode(cbuf, Assembler::REX_W);
3121       } else {
3122         emit_opcode(cbuf, Assembler::REX_WB);
3123       }
3124     } else {
3125       if ($src$$reg < 8) {
3126         emit_opcode(cbuf, Assembler::REX_WR);
3127       } else {
3128         emit_opcode(cbuf, Assembler::REX_WRB);
3129       }
3130     }
3131   %}
3132 
3133   enc_class REX_reg_mem(rRegI reg, memory mem)
3134   %{
3135     if ($reg$$reg < 8) {
3136       if ($mem$$base < 8) {
3137         if ($mem$$index >= 8) {
3138           emit_opcode(cbuf, Assembler::REX_X);
3139         }
3140       } else {
3141         if ($mem$$index < 8) {
3142           emit_opcode(cbuf, Assembler::REX_B);
3143         } else {
3144           emit_opcode(cbuf, Assembler::REX_XB);
3145         }
3146       }
3147     } else {
3148       if ($mem$$base < 8) {
3149         if ($mem$$index < 8) {
3150           emit_opcode(cbuf, Assembler::REX_R);
3151         } else {
3152           emit_opcode(cbuf, Assembler::REX_RX);
3153         }
3154       } else {
3155         if ($mem$$index < 8) {
3156           emit_opcode(cbuf, Assembler::REX_RB);
3157         } else {
3158           emit_opcode(cbuf, Assembler::REX_RXB);
3159         }
3160       }
3161     }
3162   %}
3163 
3164   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3165   %{
3166     if ($reg$$reg < 8) {
3167       if ($mem$$base < 8) {
3168         if ($mem$$index < 8) {
3169           emit_opcode(cbuf, Assembler::REX_W);
3170         } else {
3171           emit_opcode(cbuf, Assembler::REX_WX);
3172         }
3173       } else {
3174         if ($mem$$index < 8) {
3175           emit_opcode(cbuf, Assembler::REX_WB);
3176         } else {
3177           emit_opcode(cbuf, Assembler::REX_WXB);
3178         }
3179       }
3180     } else {
3181       if ($mem$$base < 8) {
3182         if ($mem$$index < 8) {
3183           emit_opcode(cbuf, Assembler::REX_WR);
3184         } else {
3185           emit_opcode(cbuf, Assembler::REX_WRX);
3186         }
3187       } else {
3188         if ($mem$$index < 8) {
3189           emit_opcode(cbuf, Assembler::REX_WRB);
3190         } else {
3191           emit_opcode(cbuf, Assembler::REX_WRXB);
3192         }
3193       }
3194     }
3195   %}
3196 
3197   enc_class reg_mem(rRegI ereg, memory mem)
3198   %{
3199     // High registers handle in encode_RegMem
3200     int reg = $ereg$$reg;
3201     int base = $mem$$base;
3202     int index = $mem$$index;
3203     int scale = $mem$$scale;
3204     int disp = $mem$$disp;
3205     bool disp_is_oop = $mem->disp_is_oop();
3206 
3207     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3208   %}
3209 
3210   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3211   %{
3212     int rm_byte_opcode = $rm_opcode$$constant;
3213 
3214     // High registers handle in encode_RegMem
3215     int base = $mem$$base;
3216     int index = $mem$$index;
3217     int scale = $mem$$scale;
3218     int displace = $mem$$disp;
3219 
3220     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3221                                             // working with static
3222                                             // globals
3223     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3224                   disp_is_oop);
3225   %}
3226 
3227   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3228   %{
3229     int reg_encoding = $dst$$reg;
3230     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3231     int index        = 0x04;            // 0x04 indicates no index
3232     int scale        = 0x00;            // 0x00 indicates no scale
3233     int displace     = $src1$$constant; // 0x00 indicates no displacement
3234     bool disp_is_oop = false;
3235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3236                   disp_is_oop);
3237   %}
3238 
3239   enc_class neg_reg(rRegI dst)
3240   %{
3241     int dstenc = $dst$$reg;
3242     if (dstenc >= 8) {
3243       emit_opcode(cbuf, Assembler::REX_B);
3244       dstenc -= 8;
3245     }
3246     // NEG $dst
3247     emit_opcode(cbuf, 0xF7);
3248     emit_rm(cbuf, 0x3, 0x03, dstenc);
3249   %}
3250 
3251   enc_class neg_reg_wide(rRegI dst)
3252   %{
3253     int dstenc = $dst$$reg;
3254     if (dstenc < 8) {
3255       emit_opcode(cbuf, Assembler::REX_W);
3256     } else {
3257       emit_opcode(cbuf, Assembler::REX_WB);
3258       dstenc -= 8;
3259     }
3260     // NEG $dst
3261     emit_opcode(cbuf, 0xF7);
3262     emit_rm(cbuf, 0x3, 0x03, dstenc);
3263   %}
3264 
3265   enc_class setLT_reg(rRegI dst)
3266   %{
3267     int dstenc = $dst$$reg;
3268     if (dstenc >= 8) {
3269       emit_opcode(cbuf, Assembler::REX_B);
3270       dstenc -= 8;
3271     } else if (dstenc >= 4) {
3272       emit_opcode(cbuf, Assembler::REX);
3273     }
3274     // SETLT $dst
3275     emit_opcode(cbuf, 0x0F);
3276     emit_opcode(cbuf, 0x9C);
3277     emit_rm(cbuf, 0x3, 0x0, dstenc);
3278   %}
3279 
3280   enc_class setNZ_reg(rRegI dst)
3281   %{
3282     int dstenc = $dst$$reg;
3283     if (dstenc >= 8) {
3284       emit_opcode(cbuf, Assembler::REX_B);
3285       dstenc -= 8;
3286     } else if (dstenc >= 4) {
3287       emit_opcode(cbuf, Assembler::REX);
3288     }
3289     // SETNZ $dst
3290     emit_opcode(cbuf, 0x0F);
3291     emit_opcode(cbuf, 0x95);
3292     emit_rm(cbuf, 0x3, 0x0, dstenc);
3293   %}
3294 
3295   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3296                        rcx_RegI tmp)
3297   %{
3298     // cadd_cmpLT
3299 
3300     int tmpReg = $tmp$$reg;
3301 
3302     int penc = $p$$reg;
3303     int qenc = $q$$reg;
3304     int yenc = $y$$reg;
3305 
3306     // subl $p,$q
3307     if (penc < 8) {
3308       if (qenc >= 8) {
3309         emit_opcode(cbuf, Assembler::REX_B);
3310       }
3311     } else {
3312       if (qenc < 8) {
3313         emit_opcode(cbuf, Assembler::REX_R);
3314       } else {
3315         emit_opcode(cbuf, Assembler::REX_RB);
3316       }
3317     }
3318     emit_opcode(cbuf, 0x2B);
3319     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3320 
3321     // sbbl $tmp, $tmp
3322     emit_opcode(cbuf, 0x1B);
3323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3324 
3325     // andl $tmp, $y
3326     if (yenc >= 8) {
3327       emit_opcode(cbuf, Assembler::REX_B);
3328     }
3329     emit_opcode(cbuf, 0x23);
3330     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3331 
3332     // addl $p,$tmp
3333     if (penc >= 8) {
3334         emit_opcode(cbuf, Assembler::REX_R);
3335     }
3336     emit_opcode(cbuf, 0x03);
3337     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3338   %}
3339 
3340   // Compare the lonogs and set -1, 0, or 1 into dst
3341   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3342   %{
3343     int src1enc = $src1$$reg;
3344     int src2enc = $src2$$reg;
3345     int dstenc = $dst$$reg;
3346 
3347     // cmpq $src1, $src2
3348     if (src1enc < 8) {
3349       if (src2enc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_W);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_WB);
3353       }
3354     } else {
3355       if (src2enc < 8) {
3356         emit_opcode(cbuf, Assembler::REX_WR);
3357       } else {
3358         emit_opcode(cbuf, Assembler::REX_WRB);
3359       }
3360     }
3361     emit_opcode(cbuf, 0x3B);
3362     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3363 
3364     // movl $dst, -1
3365     if (dstenc >= 8) {
3366       emit_opcode(cbuf, Assembler::REX_B);
3367     }
3368     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3369     emit_d32(cbuf, -1);
3370 
3371     // jl,s done
3372     emit_opcode(cbuf, 0x7C);
3373     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3374 
3375     // setne $dst
3376     if (dstenc >= 4) {
3377       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3378     }
3379     emit_opcode(cbuf, 0x0F);
3380     emit_opcode(cbuf, 0x95);
3381     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3382 
3383     // movzbl $dst, $dst
3384     if (dstenc >= 4) {
3385       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3386     }
3387     emit_opcode(cbuf, 0x0F);
3388     emit_opcode(cbuf, 0xB6);
3389     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3390   %}
3391 
3392   enc_class Push_ResultXD(regD dst) %{
3393     int dstenc = $dst$$reg;
3394 
3395     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3396 
3397     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3398     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3399     if (dstenc >= 8) {
3400       emit_opcode(cbuf, Assembler::REX_R);
3401     }
3402     emit_opcode  (cbuf, 0x0F );
3403     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3404     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3405 
3406     // add rsp,8
3407     emit_opcode(cbuf, Assembler::REX_W);
3408     emit_opcode(cbuf,0x83);
3409     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3410     emit_d8(cbuf,0x08);
3411   %}
3412 
3413   enc_class Push_SrcXD(regD src) %{
3414     int srcenc = $src$$reg;
3415 
3416     // subq rsp,#8
3417     emit_opcode(cbuf, Assembler::REX_W);
3418     emit_opcode(cbuf, 0x83);
3419     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3420     emit_d8(cbuf, 0x8);
3421 
3422     // movsd [rsp],src
3423     emit_opcode(cbuf, 0xF2);
3424     if (srcenc >= 8) {
3425       emit_opcode(cbuf, Assembler::REX_R);
3426     }
3427     emit_opcode(cbuf, 0x0F);
3428     emit_opcode(cbuf, 0x11);
3429     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3430 
3431     // fldd [rsp]
3432     emit_opcode(cbuf, 0x66);
3433     emit_opcode(cbuf, 0xDD);
3434     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3435   %}
3436 
3437 
3438   enc_class movq_ld(regD dst, memory mem) %{
3439     MacroAssembler _masm(&cbuf);
3440     __ movq($dst$$XMMRegister, $mem$$Address);
3441   %}
3442 
3443   enc_class movq_st(memory mem, regD src) %{
3444     MacroAssembler _masm(&cbuf);
3445     __ movq($mem$$Address, $src$$XMMRegister);
3446   %}
3447 
3448   enc_class pshufd_8x8(regF dst, regF src) %{
3449     MacroAssembler _masm(&cbuf);
3450 
3451     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3452     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3453     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3454   %}
3455 
3456   enc_class pshufd_4x16(regF dst, regF src) %{
3457     MacroAssembler _masm(&cbuf);
3458 
3459     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3460   %}
3461 
3462   enc_class pshufd(regD dst, regD src, int mode) %{
3463     MacroAssembler _masm(&cbuf);
3464 
3465     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3466   %}
3467 
3468   enc_class pxor(regD dst, regD src) %{
3469     MacroAssembler _masm(&cbuf);
3470 
3471     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3472   %}
3473 
3474   enc_class mov_i2x(regD dst, rRegI src) %{
3475     MacroAssembler _masm(&cbuf);
3476 
3477     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3478   %}
3479 
3480   // obj: object to lock
3481   // box: box address (header location) -- killed
3482   // tmp: rax -- killed
3483   // scr: rbx -- killed
3484   //
3485   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3486   // from i486.ad.  See that file for comments.
3487   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3488   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3489 
3490 
3491   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3492   %{
3493     Register objReg = as_Register((int)$obj$$reg);
3494     Register boxReg = as_Register((int)$box$$reg);
3495     Register tmpReg = as_Register($tmp$$reg);
3496     Register scrReg = as_Register($scr$$reg);
3497     MacroAssembler masm(&cbuf);
3498 
3499     // Verify uniqueness of register assignments -- necessary but not sufficient
3500     assert (objReg != boxReg && objReg != tmpReg &&
3501             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3502 
3503     if (_counters != NULL) {
3504       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3505     }
3506     if (EmitSync & 1) {
3507         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3508         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3509         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3510     } else
3511     if (EmitSync & 2) {
3512         Label DONE_LABEL;
3513         if (UseBiasedLocking) {
3514            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3515           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3516         }
3517         // QQQ was movl...
3518         masm.movptr(tmpReg, 0x1);
3519         masm.orptr(tmpReg, Address(objReg, 0));
3520         masm.movptr(Address(boxReg, 0), tmpReg);
3521         if (os::is_MP()) {
3522           masm.lock();
3523         }
3524         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3525         masm.jcc(Assembler::equal, DONE_LABEL);
3526 
3527         // Recursive locking
3528         masm.subptr(tmpReg, rsp);
3529         masm.andptr(tmpReg, 7 - os::vm_page_size());
3530         masm.movptr(Address(boxReg, 0), tmpReg);
3531 
3532         masm.bind(DONE_LABEL);
3533         masm.nop(); // avoid branch to branch
3534     } else {
3535         Label DONE_LABEL, IsInflated, Egress;
3536 
3537         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3538         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3539         masm.jcc   (Assembler::notZero, IsInflated) ; 
3540          
3541         // it's stack-locked, biased or neutral
3542         // TODO: optimize markword triage order to reduce the number of
3543         // conditional branches in the most common cases.
3544         // Beware -- there's a subtle invariant that fetch of the markword
3545         // at [FETCH], below, will never observe a biased encoding (*101b).
3546         // If this invariant is not held we'll suffer exclusion (safety) failure.
3547 
3548         if (UseBiasedLocking && !UseOptoBiasInlining) {
3549           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3550           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3551         }
3552 
3553         // was q will it destroy high?
3554         masm.orl   (tmpReg, 1) ; 
3555         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3556         if (os::is_MP()) { masm.lock(); } 
3557         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3558         if (_counters != NULL) {
3559            masm.cond_inc32(Assembler::equal,
3560                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3561         }
3562         masm.jcc   (Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568         if (_counters != NULL) {
3569            masm.cond_inc32(Assembler::equal,
3570                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3571         }
3572         masm.jmp   (DONE_LABEL) ;
3573 
3574         masm.bind  (IsInflated) ;
3575         // It's inflated
3576 
3577         // TODO: someday avoid the ST-before-CAS penalty by
3578         // relocating (deferring) the following ST.
3579         // We should also think about trying a CAS without having
3580         // fetched _owner.  If the CAS is successful we may
3581         // avoid an RTO->RTS upgrade on the $line.
3582         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3583         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3584 
3585         masm.mov    (boxReg, tmpReg) ; 
3586         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3587         masm.testptr(tmpReg, tmpReg) ;   
3588         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3589 
3590         // It's inflated and appears unlocked
3591         if (os::is_MP()) { masm.lock(); } 
3592         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3593         // Intentional fall-through into DONE_LABEL ...
3594 
3595         masm.bind  (DONE_LABEL) ;
3596         masm.nop   () ;                 // avoid jmp to jmp
3597     }
3598   %}
3599 
3600   // obj: object to unlock
3601   // box: box address (displaced header location), killed
3602   // RBX: killed tmp; cannot be obj nor box
3603   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3604   %{
3605 
3606     Register objReg = as_Register($obj$$reg);
3607     Register boxReg = as_Register($box$$reg);
3608     Register tmpReg = as_Register($tmp$$reg);
3609     MacroAssembler masm(&cbuf);
3610 
3611     if (EmitSync & 4) { 
3612        masm.cmpptr(rsp, 0) ; 
3613     } else
3614     if (EmitSync & 8) {
3615        Label DONE_LABEL;
3616        if (UseBiasedLocking) {
3617          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3618        }
3619 
3620        // Check whether the displaced header is 0
3621        //(=> recursive unlock)
3622        masm.movptr(tmpReg, Address(boxReg, 0));
3623        masm.testptr(tmpReg, tmpReg);
3624        masm.jcc(Assembler::zero, DONE_LABEL);
3625 
3626        // If not recursive lock, reset the header to displaced header
3627        if (os::is_MP()) {
3628          masm.lock();
3629        }
3630        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3631        masm.bind(DONE_LABEL);
3632        masm.nop(); // avoid branch to branch
3633     } else {
3634        Label DONE_LABEL, Stacked, CheckSucc ;
3635 
3636        if (UseBiasedLocking && !UseOptoBiasInlining) {
3637          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3638        }
3639         
3640        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3641        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3642        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3643        masm.testl (tmpReg, 0x02) ; 
3644        masm.jcc   (Assembler::zero, Stacked) ; 
3645         
3646        // It's inflated
3647        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3648        masm.xorptr(boxReg, r15_thread) ; 
3649        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3650        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3651        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3652        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3653        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3654        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3655        masm.jmp   (DONE_LABEL) ; 
3656         
3657        if ((EmitSync & 65536) == 0) { 
3658          Label LSuccess, LGoSlowPath ;
3659          masm.bind  (CheckSucc) ;
3660          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3662 
3663          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3664          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3665          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3666          // are all faster when the write buffer is populated.
3667          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3668          if (os::is_MP()) {
3669             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3670          }
3671          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3672          masm.jcc   (Assembler::notZero, LSuccess) ;
3673 
3674          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3675          if (os::is_MP()) { masm.lock(); }
3676          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3677          masm.jcc   (Assembler::notEqual, LSuccess) ;
3678          // Intentional fall-through into slow-path
3679 
3680          masm.bind  (LGoSlowPath) ;
3681          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3682          masm.jmp   (DONE_LABEL) ;
3683 
3684          masm.bind  (LSuccess) ;
3685          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3686          masm.jmp   (DONE_LABEL) ;
3687        }
3688 
3689        masm.bind  (Stacked) ; 
3690        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3691        if (os::is_MP()) { masm.lock(); } 
3692        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3693 
3694        if (EmitSync & 65536) {
3695           masm.bind (CheckSucc) ;
3696        }
3697        masm.bind(DONE_LABEL);
3698        if (EmitSync & 32768) {
3699           masm.nop();                      // avoid branch to branch
3700        }
3701     }
3702   %}
3703 
3704 
3705   enc_class enc_rethrow()
3706   %{
3707     cbuf.set_inst_mark();
3708     emit_opcode(cbuf, 0xE9); // jmp entry
3709     emit_d32_reloc(cbuf,
3710                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3711                    runtime_call_Relocation::spec(),
3712                    RELOC_DISP32);
3713   %}
3714 
3715   enc_class absF_encoding(regF dst)
3716   %{
3717     int dstenc = $dst$$reg;
3718     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3719 
3720     cbuf.set_inst_mark();
3721     if (dstenc >= 8) {
3722       emit_opcode(cbuf, Assembler::REX_R);
3723       dstenc -= 8;
3724     }
3725     // XXX reg_mem doesn't support RIP-relative addressing yet
3726     emit_opcode(cbuf, 0x0F);
3727     emit_opcode(cbuf, 0x54);
3728     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3729     emit_d32_reloc(cbuf, signmask_address);
3730   %}
3731 
3732   enc_class absD_encoding(regD dst)
3733   %{
3734     int dstenc = $dst$$reg;
3735     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3736 
3737     cbuf.set_inst_mark();
3738     emit_opcode(cbuf, 0x66);
3739     if (dstenc >= 8) {
3740       emit_opcode(cbuf, Assembler::REX_R);
3741       dstenc -= 8;
3742     }
3743     // XXX reg_mem doesn't support RIP-relative addressing yet
3744     emit_opcode(cbuf, 0x0F);
3745     emit_opcode(cbuf, 0x54);
3746     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3747     emit_d32_reloc(cbuf, signmask_address);
3748   %}
3749 
3750   enc_class negF_encoding(regF dst)
3751   %{
3752     int dstenc = $dst$$reg;
3753     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3754 
3755     cbuf.set_inst_mark();
3756     if (dstenc >= 8) {
3757       emit_opcode(cbuf, Assembler::REX_R);
3758       dstenc -= 8;
3759     }
3760     // XXX reg_mem doesn't support RIP-relative addressing yet
3761     emit_opcode(cbuf, 0x0F);
3762     emit_opcode(cbuf, 0x57);
3763     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3764     emit_d32_reloc(cbuf, signflip_address);
3765   %}
3766 
3767   enc_class negD_encoding(regD dst)
3768   %{
3769     int dstenc = $dst$$reg;
3770     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3771 
3772     cbuf.set_inst_mark();
3773     emit_opcode(cbuf, 0x66);
3774     if (dstenc >= 8) {
3775       emit_opcode(cbuf, Assembler::REX_R);
3776       dstenc -= 8;
3777     }
3778     // XXX reg_mem doesn't support RIP-relative addressing yet
3779     emit_opcode(cbuf, 0x0F);
3780     emit_opcode(cbuf, 0x57);
3781     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3782     emit_d32_reloc(cbuf, signflip_address);
3783   %}
3784 
3785   enc_class f2i_fixup(rRegI dst, regF src)
3786   %{
3787     int dstenc = $dst$$reg;
3788     int srcenc = $src$$reg;
3789 
3790     // cmpl $dst, #0x80000000
3791     if (dstenc >= 8) {
3792       emit_opcode(cbuf, Assembler::REX_B);
3793     }
3794     emit_opcode(cbuf, 0x81);
3795     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3796     emit_d32(cbuf, 0x80000000);
3797 
3798     // jne,s done
3799     emit_opcode(cbuf, 0x75);
3800     if (srcenc < 8 && dstenc < 8) {
3801       emit_d8(cbuf, 0xF);
3802     } else if (srcenc >= 8 && dstenc >= 8) {
3803       emit_d8(cbuf, 0x11);
3804     } else {
3805       emit_d8(cbuf, 0x10);
3806     }
3807 
3808     // subq rsp, #8
3809     emit_opcode(cbuf, Assembler::REX_W);
3810     emit_opcode(cbuf, 0x83);
3811     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3812     emit_d8(cbuf, 8);
3813 
3814     // movss [rsp], $src
3815     emit_opcode(cbuf, 0xF3);
3816     if (srcenc >= 8) {
3817       emit_opcode(cbuf, Assembler::REX_R);
3818     }
3819     emit_opcode(cbuf, 0x0F);
3820     emit_opcode(cbuf, 0x11);
3821     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3822 
3823     // call f2i_fixup
3824     cbuf.set_inst_mark();
3825     emit_opcode(cbuf, 0xE8);
3826     emit_d32_reloc(cbuf,
3827                    (int)
3828                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3829                    runtime_call_Relocation::spec(),
3830                    RELOC_DISP32);
3831 
3832     // popq $dst
3833     if (dstenc >= 8) {
3834       emit_opcode(cbuf, Assembler::REX_B);
3835     }
3836     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3837 
3838     // done:
3839   %}
3840 
3841   enc_class f2l_fixup(rRegL dst, regF src)
3842   %{
3843     int dstenc = $dst$$reg;
3844     int srcenc = $src$$reg;
3845     address const_address = (address) StubRoutines::x86::double_sign_flip();
3846 
3847     // cmpq $dst, [0x8000000000000000]
3848     cbuf.set_inst_mark();
3849     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3850     emit_opcode(cbuf, 0x39);
3851     // XXX reg_mem doesn't support RIP-relative addressing yet
3852     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3853     emit_d32_reloc(cbuf, const_address);
3854 
3855 
3856     // jne,s done
3857     emit_opcode(cbuf, 0x75);
3858     if (srcenc < 8 && dstenc < 8) {
3859       emit_d8(cbuf, 0xF);
3860     } else if (srcenc >= 8 && dstenc >= 8) {
3861       emit_d8(cbuf, 0x11);
3862     } else {
3863       emit_d8(cbuf, 0x10);
3864     }
3865 
3866     // subq rsp, #8
3867     emit_opcode(cbuf, Assembler::REX_W);
3868     emit_opcode(cbuf, 0x83);
3869     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3870     emit_d8(cbuf, 8);
3871 
3872     // movss [rsp], $src
3873     emit_opcode(cbuf, 0xF3);
3874     if (srcenc >= 8) {
3875       emit_opcode(cbuf, Assembler::REX_R);
3876     }
3877     emit_opcode(cbuf, 0x0F);
3878     emit_opcode(cbuf, 0x11);
3879     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3880 
3881     // call f2l_fixup
3882     cbuf.set_inst_mark();
3883     emit_opcode(cbuf, 0xE8);
3884     emit_d32_reloc(cbuf,
3885                    (int)
3886                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
3887                    runtime_call_Relocation::spec(),
3888                    RELOC_DISP32);
3889 
3890     // popq $dst
3891     if (dstenc >= 8) {
3892       emit_opcode(cbuf, Assembler::REX_B);
3893     }
3894     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3895 
3896     // done:
3897   %}
3898 
3899   enc_class d2i_fixup(rRegI dst, regD src)
3900   %{
3901     int dstenc = $dst$$reg;
3902     int srcenc = $src$$reg;
3903 
3904     // cmpl $dst, #0x80000000
3905     if (dstenc >= 8) {
3906       emit_opcode(cbuf, Assembler::REX_B);
3907     }
3908     emit_opcode(cbuf, 0x81);
3909     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3910     emit_d32(cbuf, 0x80000000);
3911 
3912     // jne,s done
3913     emit_opcode(cbuf, 0x75);
3914     if (srcenc < 8 && dstenc < 8) {
3915       emit_d8(cbuf, 0xF);
3916     } else if (srcenc >= 8 && dstenc >= 8) {
3917       emit_d8(cbuf, 0x11);
3918     } else {
3919       emit_d8(cbuf, 0x10);
3920     }
3921 
3922     // subq rsp, #8
3923     emit_opcode(cbuf, Assembler::REX_W);
3924     emit_opcode(cbuf, 0x83);
3925     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3926     emit_d8(cbuf, 8);
3927 
3928     // movsd [rsp], $src
3929     emit_opcode(cbuf, 0xF2);
3930     if (srcenc >= 8) {
3931       emit_opcode(cbuf, Assembler::REX_R);
3932     }
3933     emit_opcode(cbuf, 0x0F);
3934     emit_opcode(cbuf, 0x11);
3935     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3936 
3937     // call d2i_fixup
3938     cbuf.set_inst_mark();
3939     emit_opcode(cbuf, 0xE8);
3940     emit_d32_reloc(cbuf,
3941                    (int)
3942                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
3943                    runtime_call_Relocation::spec(),
3944                    RELOC_DISP32);
3945 
3946     // popq $dst
3947     if (dstenc >= 8) {
3948       emit_opcode(cbuf, Assembler::REX_B);
3949     }
3950     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3951 
3952     // done:
3953   %}
3954 
3955   enc_class d2l_fixup(rRegL dst, regD src)
3956   %{
3957     int dstenc = $dst$$reg;
3958     int srcenc = $src$$reg;
3959     address const_address = (address) StubRoutines::x86::double_sign_flip();
3960 
3961     // cmpq $dst, [0x8000000000000000]
3962     cbuf.set_inst_mark();
3963     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3964     emit_opcode(cbuf, 0x39);
3965     // XXX reg_mem doesn't support RIP-relative addressing yet
3966     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3967     emit_d32_reloc(cbuf, const_address);
3968 
3969 
3970     // jne,s done
3971     emit_opcode(cbuf, 0x75);
3972     if (srcenc < 8 && dstenc < 8) {
3973       emit_d8(cbuf, 0xF);
3974     } else if (srcenc >= 8 && dstenc >= 8) {
3975       emit_d8(cbuf, 0x11);
3976     } else {
3977       emit_d8(cbuf, 0x10);
3978     }
3979 
3980     // subq rsp, #8
3981     emit_opcode(cbuf, Assembler::REX_W);
3982     emit_opcode(cbuf, 0x83);
3983     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3984     emit_d8(cbuf, 8);
3985 
3986     // movsd [rsp], $src
3987     emit_opcode(cbuf, 0xF2);
3988     if (srcenc >= 8) {
3989       emit_opcode(cbuf, Assembler::REX_R);
3990     }
3991     emit_opcode(cbuf, 0x0F);
3992     emit_opcode(cbuf, 0x11);
3993     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3994 
3995     // call d2l_fixup
3996     cbuf.set_inst_mark();
3997     emit_opcode(cbuf, 0xE8);
3998     emit_d32_reloc(cbuf,
3999                    (int)
4000                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4001                    runtime_call_Relocation::spec(),
4002                    RELOC_DISP32);
4003 
4004     // popq $dst
4005     if (dstenc >= 8) {
4006       emit_opcode(cbuf, Assembler::REX_B);
4007     }
4008     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4009 
4010     // done:
4011   %}
4012 
4013   // Safepoint Poll.  This polls the safepoint page, and causes an
4014   // exception if it is not readable. Unfortunately, it kills
4015   // RFLAGS in the process.
4016   enc_class enc_safepoint_poll
4017   %{
4018     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4019     // XXX reg_mem doesn't support RIP-relative addressing yet
4020     cbuf.set_inst_mark();
4021     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4022     emit_opcode(cbuf, 0x85); // testl
4023     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4024     // cbuf.inst_mark() is beginning of instruction
4025     emit_d32_reloc(cbuf, os::get_polling_page());
4026 //                    relocInfo::poll_type,
4027   %}
4028 %}
4029 
4030 
4031 
4032 //----------FRAME--------------------------------------------------------------
4033 // Definition of frame structure and management information.
4034 //
4035 //  S T A C K   L A Y O U T    Allocators stack-slot number
4036 //                             |   (to get allocators register number
4037 //  G  Owned by    |        |  v    add OptoReg::stack0())
4038 //  r   CALLER     |        |
4039 //  o     |        +--------+      pad to even-align allocators stack-slot
4040 //  w     V        |  pad0  |        numbers; owned by CALLER
4041 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4042 //  h     ^        |   in   |  5
4043 //        |        |  args  |  4   Holes in incoming args owned by SELF
4044 //  |     |        |        |  3
4045 //  |     |        +--------+
4046 //  V     |        | old out|      Empty on Intel, window on Sparc
4047 //        |    old |preserve|      Must be even aligned.
4048 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4049 //        |        |   in   |  3   area for Intel ret address
4050 //     Owned by    |preserve|      Empty on Sparc.
4051 //       SELF      +--------+
4052 //        |        |  pad2  |  2   pad to align old SP
4053 //        |        +--------+  1
4054 //        |        | locks  |  0
4055 //        |        +--------+----> OptoReg::stack0(), even aligned
4056 //        |        |  pad1  | 11   pad to align new SP
4057 //        |        +--------+
4058 //        |        |        | 10
4059 //        |        | spills |  9   spills
4060 //        V        |        |  8   (pad0 slot for callee)
4061 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4062 //        ^        |  out   |  7
4063 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4064 //     Owned by    +--------+
4065 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4066 //        |    new |preserve|      Must be even-aligned.
4067 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4068 //        |        |        |
4069 //
4070 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4071 //         known from SELF's arguments and the Java calling convention.
4072 //         Region 6-7 is determined per call site.
4073 // Note 2: If the calling convention leaves holes in the incoming argument
4074 //         area, those holes are owned by SELF.  Holes in the outgoing area
4075 //         are owned by the CALLEE.  Holes should not be nessecary in the
4076 //         incoming area, as the Java calling convention is completely under
4077 //         the control of the AD file.  Doubles can be sorted and packed to
4078 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4079 //         varargs C calling conventions.
4080 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4081 //         even aligned with pad0 as needed.
4082 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4083 //         region 6-11 is even aligned; it may be padded out more so that
4084 //         the region from SP to FP meets the minimum stack alignment.
4085 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4086 //         alignment.  Region 11, pad1, may be dynamically extended so that
4087 //         SP meets the minimum alignment.
4088 
4089 frame
4090 %{
4091   // What direction does stack grow in (assumed to be same for C & Java)
4092   stack_direction(TOWARDS_LOW);
4093 
4094   // These three registers define part of the calling convention
4095   // between compiled code and the interpreter.
4096   inline_cache_reg(RAX);                // Inline Cache Register
4097   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4098                                         // calling interpreter
4099 
4100   // Optional: name the operand used by cisc-spilling to access
4101   // [stack_pointer + offset]
4102   cisc_spilling_operand_name(indOffset32);
4103 
4104   // Number of stack slots consumed by locking an object
4105   sync_stack_slots(2);
4106 
4107   // Compiled code's Frame Pointer
4108   frame_pointer(RSP);
4109 
4110   // Interpreter stores its frame pointer in a register which is
4111   // stored to the stack by I2CAdaptors.
4112   // I2CAdaptors convert from interpreted java to compiled java.
4113   interpreter_frame_pointer(RBP);
4114 
4115   // Stack alignment requirement
4116   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4117 
4118   // Number of stack slots between incoming argument block and the start of
4119   // a new frame.  The PROLOG must add this many slots to the stack.  The
4120   // EPILOG must remove this many slots.  amd64 needs two slots for
4121   // return address.
4122   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4123 
4124   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4125   // for calls to C.  Supports the var-args backing area for register parms.
4126   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4127 
4128   // The after-PROLOG location of the return address.  Location of
4129   // return address specifies a type (REG or STACK) and a number
4130   // representing the register number (i.e. - use a register name) or
4131   // stack slot.
4132   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4133   // Otherwise, it is above the locks and verification slot and alignment word
4134   return_addr(STACK - 2 +
4135               round_to(2 + 2 * VerifyStackAtCalls +
4136                        Compile::current()->fixed_slots(),
4137                        WordsPerLong * 2));
4138 
4139   // Body of function which returns an integer array locating
4140   // arguments either in registers or in stack slots.  Passed an array
4141   // of ideal registers called "sig" and a "length" count.  Stack-slot
4142   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4143   // arguments for a CALLEE.  Incoming stack arguments are
4144   // automatically biased by the preserve_stack_slots field above.
4145 
4146   calling_convention
4147   %{
4148     // No difference between ingoing/outgoing just pass false
4149     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4150   %}
4151 
4152   c_calling_convention
4153   %{
4154     // This is obviously always outgoing
4155     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4156   %}
4157 
4158   // Location of compiled Java return values.  Same as C for now.
4159   return_value
4160   %{
4161     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4162            "only return normal values");
4163 
4164     static const int lo[Op_RegL + 1] = {
4165       0,
4166       0,
4167       RAX_num,  // Op_RegN
4168       RAX_num,  // Op_RegI
4169       RAX_num,  // Op_RegP
4170       XMM0_num, // Op_RegF
4171       XMM0_num, // Op_RegD
4172       RAX_num   // Op_RegL
4173     };
4174     static const int hi[Op_RegL + 1] = {
4175       0,
4176       0,
4177       OptoReg::Bad, // Op_RegN
4178       OptoReg::Bad, // Op_RegI
4179       RAX_H_num,    // Op_RegP
4180       OptoReg::Bad, // Op_RegF
4181       XMM0_H_num,   // Op_RegD
4182       RAX_H_num     // Op_RegL
4183     };
4184     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4185     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4186   %}
4187 %}
4188 
4189 //----------ATTRIBUTES---------------------------------------------------------
4190 //----------Operand Attributes-------------------------------------------------
4191 op_attrib op_cost(0);        // Required cost attribute
4192 
4193 //----------Instruction Attributes---------------------------------------------
4194 ins_attrib ins_cost(100);       // Required cost attribute
4195 ins_attrib ins_size(8);         // Required size attribute (in bits)
4196 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4197 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4198                                 // a non-matching short branch variant
4199                                 // of some long branch?
4200 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4201                                 // be a power of 2) specifies the
4202                                 // alignment that some part of the
4203                                 // instruction (not necessarily the
4204                                 // start) requires.  If > 1, a
4205                                 // compute_padding() function must be
4206                                 // provided for the instruction
4207 
4208 //----------OPERANDS-----------------------------------------------------------
4209 // Operand definitions must precede instruction definitions for correct parsing
4210 // in the ADLC because operands constitute user defined types which are used in
4211 // instruction definitions.
4212 
4213 //----------Simple Operands----------------------------------------------------
4214 // Immediate Operands
4215 // Integer Immediate
4216 operand immI()
4217 %{
4218   match(ConI);
4219 
4220   op_cost(10);
4221   format %{ %}
4222   interface(CONST_INTER);
4223 %}
4224 
4225 // Constant for test vs zero
4226 operand immI0()
4227 %{
4228   predicate(n->get_int() == 0);
4229   match(ConI);
4230 
4231   op_cost(0);
4232   format %{ %}
4233   interface(CONST_INTER);
4234 %}
4235 
4236 // Constant for increment
4237 operand immI1()
4238 %{
4239   predicate(n->get_int() == 1);
4240   match(ConI);
4241 
4242   op_cost(0);
4243   format %{ %}
4244   interface(CONST_INTER);
4245 %}
4246 
4247 // Constant for decrement
4248 operand immI_M1()
4249 %{
4250   predicate(n->get_int() == -1);
4251   match(ConI);
4252 
4253   op_cost(0);
4254   format %{ %}
4255   interface(CONST_INTER);
4256 %}
4257 
4258 // Valid scale values for addressing modes
4259 operand immI2()
4260 %{
4261   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4262   match(ConI);
4263 
4264   format %{ %}
4265   interface(CONST_INTER);
4266 %}
4267 
4268 operand immI8()
4269 %{
4270   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4271   match(ConI);
4272 
4273   op_cost(5);
4274   format %{ %}
4275   interface(CONST_INTER);
4276 %}
4277 
4278 operand immI16()
4279 %{
4280   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4281   match(ConI);
4282 
4283   op_cost(10);
4284   format %{ %}
4285   interface(CONST_INTER);
4286 %}
4287 
4288 // Constant for long shifts
4289 operand immI_32()
4290 %{
4291   predicate( n->get_int() == 32 );
4292   match(ConI);
4293 
4294   op_cost(0);
4295   format %{ %}
4296   interface(CONST_INTER);
4297 %}
4298 
4299 // Constant for long shifts
4300 operand immI_64()
4301 %{
4302   predicate( n->get_int() == 64 );
4303   match(ConI);
4304 
4305   op_cost(0);
4306   format %{ %}
4307   interface(CONST_INTER);
4308 %}
4309 
4310 // Pointer Immediate
4311 operand immP()
4312 %{
4313   match(ConP);
4314 
4315   op_cost(10);
4316   format %{ %}
4317   interface(CONST_INTER);
4318 %}
4319 
4320 // NULL Pointer Immediate
4321 operand immP0()
4322 %{
4323   predicate(n->get_ptr() == 0);
4324   match(ConP);
4325 
4326   op_cost(5);
4327   format %{ %}
4328   interface(CONST_INTER);
4329 %}
4330 
4331 // Pointer Immediate
4332 operand immN() %{
4333   match(ConN);
4334 
4335   op_cost(10);
4336   format %{ %}
4337   interface(CONST_INTER);
4338 %}
4339 
4340 // NULL Pointer Immediate
4341 operand immN0() %{
4342   predicate(n->get_narrowcon() == 0);
4343   match(ConN);
4344 
4345   op_cost(5);
4346   format %{ %}
4347   interface(CONST_INTER);
4348 %}
4349 
4350 operand immP31()
4351 %{
4352   predicate(!n->as_Type()->type()->isa_oopptr()
4353             && (n->get_ptr() >> 31) == 0);
4354   match(ConP);
4355 
4356   op_cost(5);
4357   format %{ %}
4358   interface(CONST_INTER);
4359 %}
4360 
4361 
4362 // Long Immediate
4363 operand immL()
4364 %{
4365   match(ConL);
4366 
4367   op_cost(20);
4368   format %{ %}
4369   interface(CONST_INTER);
4370 %}
4371 
4372 // Long Immediate 8-bit
4373 operand immL8()
4374 %{
4375   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4376   match(ConL);
4377 
4378   op_cost(5);
4379   format %{ %}
4380   interface(CONST_INTER);
4381 %}
4382 
4383 // Long Immediate 32-bit unsigned
4384 operand immUL32()
4385 %{
4386   predicate(n->get_long() == (unsigned int) (n->get_long()));
4387   match(ConL);
4388 
4389   op_cost(10);
4390   format %{ %}
4391   interface(CONST_INTER);
4392 %}
4393 
4394 // Long Immediate 32-bit signed
4395 operand immL32()
4396 %{
4397   predicate(n->get_long() == (int) (n->get_long()));
4398   match(ConL);
4399 
4400   op_cost(15);
4401   format %{ %}
4402   interface(CONST_INTER);
4403 %}
4404 
4405 // Long Immediate zero
4406 operand immL0()
4407 %{
4408   predicate(n->get_long() == 0L);
4409   match(ConL);
4410 
4411   op_cost(10);
4412   format %{ %}
4413   interface(CONST_INTER);
4414 %}
4415 
4416 // Constant for increment
4417 operand immL1()
4418 %{
4419   predicate(n->get_long() == 1);
4420   match(ConL);
4421 
4422   format %{ %}
4423   interface(CONST_INTER);
4424 %}
4425 
4426 // Constant for decrement
4427 operand immL_M1()
4428 %{
4429   predicate(n->get_long() == -1);
4430   match(ConL);
4431 
4432   format %{ %}
4433   interface(CONST_INTER);
4434 %}
4435 
4436 // Long Immediate: the value 10
4437 operand immL10()
4438 %{
4439   predicate(n->get_long() == 10);
4440   match(ConL);
4441 
4442   format %{ %}
4443   interface(CONST_INTER);
4444 %}
4445 
4446 // Long immediate from 0 to 127.
4447 // Used for a shorter form of long mul by 10.
4448 operand immL_127()
4449 %{
4450   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4451   match(ConL);
4452 
4453   op_cost(10);
4454   format %{ %}
4455   interface(CONST_INTER);
4456 %}
4457 
4458 // Long Immediate: low 32-bit mask
4459 operand immL_32bits()
4460 %{
4461   predicate(n->get_long() == 0xFFFFFFFFL);
4462   match(ConL);
4463   op_cost(20);
4464 
4465   format %{ %}
4466   interface(CONST_INTER);
4467 %}
4468 
4469 // Float Immediate zero
4470 operand immF0()
4471 %{
4472   predicate(jint_cast(n->getf()) == 0);
4473   match(ConF);
4474 
4475   op_cost(5);
4476   format %{ %}
4477   interface(CONST_INTER);
4478 %}
4479 
4480 // Float Immediate
4481 operand immF()
4482 %{
4483   match(ConF);
4484 
4485   op_cost(15);
4486   format %{ %}
4487   interface(CONST_INTER);
4488 %}
4489 
4490 // Double Immediate zero
4491 operand immD0()
4492 %{
4493   predicate(jlong_cast(n->getd()) == 0);
4494   match(ConD);
4495 
4496   op_cost(5);
4497   format %{ %}
4498   interface(CONST_INTER);
4499 %}
4500 
4501 // Double Immediate
4502 operand immD()
4503 %{
4504   match(ConD);
4505 
4506   op_cost(15);
4507   format %{ %}
4508   interface(CONST_INTER);
4509 %}
4510 
4511 // Immediates for special shifts (sign extend)
4512 
4513 // Constants for increment
4514 operand immI_16()
4515 %{
4516   predicate(n->get_int() == 16);
4517   match(ConI);
4518 
4519   format %{ %}
4520   interface(CONST_INTER);
4521 %}
4522 
4523 operand immI_24()
4524 %{
4525   predicate(n->get_int() == 24);
4526   match(ConI);
4527 
4528   format %{ %}
4529   interface(CONST_INTER);
4530 %}
4531 
4532 // Constant for byte-wide masking
4533 operand immI_255()
4534 %{
4535   predicate(n->get_int() == 255);
4536   match(ConI);
4537 
4538   format %{ %}
4539   interface(CONST_INTER);
4540 %}
4541 
4542 // Constant for short-wide masking
4543 operand immI_65535()
4544 %{
4545   predicate(n->get_int() == 65535);
4546   match(ConI);
4547 
4548   format %{ %}
4549   interface(CONST_INTER);
4550 %}
4551 
4552 // Constant for byte-wide masking
4553 operand immL_255()
4554 %{
4555   predicate(n->get_long() == 255);
4556   match(ConL);
4557 
4558   format %{ %}
4559   interface(CONST_INTER);
4560 %}
4561 
4562 // Constant for short-wide masking
4563 operand immL_65535()
4564 %{
4565   predicate(n->get_long() == 65535);
4566   match(ConL);
4567 
4568   format %{ %}
4569   interface(CONST_INTER);
4570 %}
4571 
4572 // Register Operands
4573 // Integer Register
4574 operand rRegI()
4575 %{
4576   constraint(ALLOC_IN_RC(int_reg));
4577   match(RegI);
4578 
4579   match(rax_RegI);
4580   match(rbx_RegI);
4581   match(rcx_RegI);
4582   match(rdx_RegI);
4583   match(rdi_RegI);
4584 
4585   format %{ %}
4586   interface(REG_INTER);
4587 %}
4588 
4589 // Special Registers
4590 operand rax_RegI()
4591 %{
4592   constraint(ALLOC_IN_RC(int_rax_reg));
4593   match(RegI);
4594   match(rRegI);
4595 
4596   format %{ "RAX" %}
4597   interface(REG_INTER);
4598 %}
4599 
4600 // Special Registers
4601 operand rbx_RegI()
4602 %{
4603   constraint(ALLOC_IN_RC(int_rbx_reg));
4604   match(RegI);
4605   match(rRegI);
4606 
4607   format %{ "RBX" %}
4608   interface(REG_INTER);
4609 %}
4610 
4611 operand rcx_RegI()
4612 %{
4613   constraint(ALLOC_IN_RC(int_rcx_reg));
4614   match(RegI);
4615   match(rRegI);
4616 
4617   format %{ "RCX" %}
4618   interface(REG_INTER);
4619 %}
4620 
4621 operand rdx_RegI()
4622 %{
4623   constraint(ALLOC_IN_RC(int_rdx_reg));
4624   match(RegI);
4625   match(rRegI);
4626 
4627   format %{ "RDX" %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 operand rdi_RegI()
4632 %{
4633   constraint(ALLOC_IN_RC(int_rdi_reg));
4634   match(RegI);
4635   match(rRegI);
4636 
4637   format %{ "RDI" %}
4638   interface(REG_INTER);
4639 %}
4640 
4641 operand no_rcx_RegI()
4642 %{
4643   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4644   match(RegI);
4645   match(rax_RegI);
4646   match(rbx_RegI);
4647   match(rdx_RegI);
4648   match(rdi_RegI);
4649 
4650   format %{ %}
4651   interface(REG_INTER);
4652 %}
4653 
4654 operand no_rax_rdx_RegI()
4655 %{
4656   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4657   match(RegI);
4658   match(rbx_RegI);
4659   match(rcx_RegI);
4660   match(rdi_RegI);
4661 
4662   format %{ %}
4663   interface(REG_INTER);
4664 %}
4665 
4666 // Pointer Register
4667 operand any_RegP()
4668 %{
4669   constraint(ALLOC_IN_RC(any_reg));
4670   match(RegP);
4671   match(rax_RegP);
4672   match(rbx_RegP);
4673   match(rdi_RegP);
4674   match(rsi_RegP);
4675   match(rbp_RegP);
4676   match(r15_RegP);
4677   match(rRegP);
4678 
4679   format %{ %}
4680   interface(REG_INTER);
4681 %}
4682 
4683 operand rRegP()
4684 %{
4685   constraint(ALLOC_IN_RC(ptr_reg));
4686   match(RegP);
4687   match(rax_RegP);
4688   match(rbx_RegP);
4689   match(rdi_RegP);
4690   match(rsi_RegP);
4691   match(rbp_RegP);
4692   match(r15_RegP);  // See Q&A below about r15_RegP.
4693 
4694   format %{ %}
4695   interface(REG_INTER);
4696 %}
4697 
4698 operand rRegN() %{
4699   constraint(ALLOC_IN_RC(int_reg));
4700   match(RegN);
4701 
4702   format %{ %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4707 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4708 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4709 // The output of an instruction is controlled by the allocator, which respects
4710 // register class masks, not match rules.  Unless an instruction mentions
4711 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4712 // by the allocator as an input.
4713 
4714 operand no_rax_RegP()
4715 %{
4716   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4717   match(RegP);
4718   match(rbx_RegP);
4719   match(rsi_RegP);
4720   match(rdi_RegP);
4721 
4722   format %{ %}
4723   interface(REG_INTER);
4724 %}
4725 
4726 operand no_rbp_RegP()
4727 %{
4728   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4729   match(RegP);
4730   match(rbx_RegP);
4731   match(rsi_RegP);
4732   match(rdi_RegP);
4733 
4734   format %{ %}
4735   interface(REG_INTER);
4736 %}
4737 
4738 operand no_rax_rbx_RegP()
4739 %{
4740   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4741   match(RegP);
4742   match(rsi_RegP);
4743   match(rdi_RegP);
4744 
4745   format %{ %}
4746   interface(REG_INTER);
4747 %}
4748 
4749 // Special Registers
4750 // Return a pointer value
4751 operand rax_RegP()
4752 %{
4753   constraint(ALLOC_IN_RC(ptr_rax_reg));
4754   match(RegP);
4755   match(rRegP);
4756 
4757   format %{ %}
4758   interface(REG_INTER);
4759 %}
4760 
4761 // Special Registers
4762 // Return a compressed pointer value
4763 operand rax_RegN()
4764 %{
4765   constraint(ALLOC_IN_RC(int_rax_reg));
4766   match(RegN);
4767   match(rRegN);
4768 
4769   format %{ %}
4770   interface(REG_INTER);
4771 %}
4772 
4773 // Used in AtomicAdd
4774 operand rbx_RegP()
4775 %{
4776   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4777   match(RegP);
4778   match(rRegP);
4779 
4780   format %{ %}
4781   interface(REG_INTER);
4782 %}
4783 
4784 operand rsi_RegP()
4785 %{
4786   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4787   match(RegP);
4788   match(rRegP);
4789 
4790   format %{ %}
4791   interface(REG_INTER);
4792 %}
4793 
4794 // Used in rep stosq
4795 operand rdi_RegP()
4796 %{
4797   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4798   match(RegP);
4799   match(rRegP);
4800 
4801   format %{ %}
4802   interface(REG_INTER);
4803 %}
4804 
4805 operand rbp_RegP()
4806 %{
4807   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4808   match(RegP);
4809   match(rRegP);
4810 
4811   format %{ %}
4812   interface(REG_INTER);
4813 %}
4814 
4815 operand r15_RegP()
4816 %{
4817   constraint(ALLOC_IN_RC(ptr_r15_reg));
4818   match(RegP);
4819   match(rRegP);
4820 
4821   format %{ %}
4822   interface(REG_INTER);
4823 %}
4824 
4825 operand rRegL()
4826 %{
4827   constraint(ALLOC_IN_RC(long_reg));
4828   match(RegL);
4829   match(rax_RegL);
4830   match(rdx_RegL);
4831 
4832   format %{ %}
4833   interface(REG_INTER);
4834 %}
4835 
4836 // Special Registers
4837 operand no_rax_rdx_RegL()
4838 %{
4839   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4840   match(RegL);
4841   match(rRegL);
4842 
4843   format %{ %}
4844   interface(REG_INTER);
4845 %}
4846 
4847 operand no_rax_RegL()
4848 %{
4849   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4850   match(RegL);
4851   match(rRegL);
4852   match(rdx_RegL);
4853 
4854   format %{ %}
4855   interface(REG_INTER);
4856 %}
4857 
4858 operand no_rcx_RegL()
4859 %{
4860   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4861   match(RegL);
4862   match(rRegL);
4863 
4864   format %{ %}
4865   interface(REG_INTER);
4866 %}
4867 
4868 operand rax_RegL()
4869 %{
4870   constraint(ALLOC_IN_RC(long_rax_reg));
4871   match(RegL);
4872   match(rRegL);
4873 
4874   format %{ "RAX" %}
4875   interface(REG_INTER);
4876 %}
4877 
4878 operand rcx_RegL()
4879 %{
4880   constraint(ALLOC_IN_RC(long_rcx_reg));
4881   match(RegL);
4882   match(rRegL);
4883 
4884   format %{ %}
4885   interface(REG_INTER);
4886 %}
4887 
4888 operand rdx_RegL()
4889 %{
4890   constraint(ALLOC_IN_RC(long_rdx_reg));
4891   match(RegL);
4892   match(rRegL);
4893 
4894   format %{ %}
4895   interface(REG_INTER);
4896 %}
4897 
4898 // Flags register, used as output of compare instructions
4899 operand rFlagsReg()
4900 %{
4901   constraint(ALLOC_IN_RC(int_flags));
4902   match(RegFlags);
4903 
4904   format %{ "RFLAGS" %}
4905   interface(REG_INTER);
4906 %}
4907 
4908 // Flags register, used as output of FLOATING POINT compare instructions
4909 operand rFlagsRegU()
4910 %{
4911   constraint(ALLOC_IN_RC(int_flags));
4912   match(RegFlags);
4913 
4914   format %{ "RFLAGS_U" %}
4915   interface(REG_INTER);
4916 %}
4917 
4918 operand rFlagsRegUCF() %{
4919   constraint(ALLOC_IN_RC(int_flags));
4920   match(RegFlags);
4921   predicate(false);
4922 
4923   format %{ "RFLAGS_U_CF" %}
4924   interface(REG_INTER);
4925 %}
4926 
4927 // Float register operands
4928 operand regF()
4929 %{
4930   constraint(ALLOC_IN_RC(float_reg));
4931   match(RegF);
4932 
4933   format %{ %}
4934   interface(REG_INTER);
4935 %}
4936 
4937 // Double register operands
4938 operand regD() 
4939 %{
4940   constraint(ALLOC_IN_RC(double_reg));
4941   match(RegD);
4942 
4943   format %{ %}
4944   interface(REG_INTER);
4945 %}
4946 
4947 
4948 //----------Memory Operands----------------------------------------------------
4949 // Direct Memory Operand
4950 // operand direct(immP addr)
4951 // %{
4952 //   match(addr);
4953 
4954 //   format %{ "[$addr]" %}
4955 //   interface(MEMORY_INTER) %{
4956 //     base(0xFFFFFFFF);
4957 //     index(0x4);
4958 //     scale(0x0);
4959 //     disp($addr);
4960 //   %}
4961 // %}
4962 
4963 // Indirect Memory Operand
4964 operand indirect(any_RegP reg)
4965 %{
4966   constraint(ALLOC_IN_RC(ptr_reg));
4967   match(reg);
4968 
4969   format %{ "[$reg]" %}
4970   interface(MEMORY_INTER) %{
4971     base($reg);
4972     index(0x4);
4973     scale(0x0);
4974     disp(0x0);
4975   %}
4976 %}
4977 
4978 // Indirect Memory Plus Short Offset Operand
4979 operand indOffset8(any_RegP reg, immL8 off)
4980 %{
4981   constraint(ALLOC_IN_RC(ptr_reg));
4982   match(AddP reg off);
4983 
4984   format %{ "[$reg + $off (8-bit)]" %}
4985   interface(MEMORY_INTER) %{
4986     base($reg);
4987     index(0x4);
4988     scale(0x0);
4989     disp($off);
4990   %}
4991 %}
4992 
4993 // Indirect Memory Plus Long Offset Operand
4994 operand indOffset32(any_RegP reg, immL32 off)
4995 %{
4996   constraint(ALLOC_IN_RC(ptr_reg));
4997   match(AddP reg off);
4998 
4999   format %{ "[$reg + $off (32-bit)]" %}
5000   interface(MEMORY_INTER) %{
5001     base($reg);
5002     index(0x4);
5003     scale(0x0);
5004     disp($off);
5005   %}
5006 %}
5007 
5008 // Indirect Memory Plus Index Register Plus Offset Operand
5009 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5010 %{
5011   constraint(ALLOC_IN_RC(ptr_reg));
5012   match(AddP (AddP reg lreg) off);
5013 
5014   op_cost(10);
5015   format %{"[$reg + $off + $lreg]" %}
5016   interface(MEMORY_INTER) %{
5017     base($reg);
5018     index($lreg);
5019     scale(0x0);
5020     disp($off);
5021   %}
5022 %}
5023 
5024 // Indirect Memory Plus Index Register Plus Offset Operand
5025 operand indIndex(any_RegP reg, rRegL lreg)
5026 %{
5027   constraint(ALLOC_IN_RC(ptr_reg));
5028   match(AddP reg lreg);
5029 
5030   op_cost(10);
5031   format %{"[$reg + $lreg]" %}
5032   interface(MEMORY_INTER) %{
5033     base($reg);
5034     index($lreg);
5035     scale(0x0);
5036     disp(0x0);
5037   %}
5038 %}
5039 
5040 // Indirect Memory Times Scale Plus Index Register
5041 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5042 %{
5043   constraint(ALLOC_IN_RC(ptr_reg));
5044   match(AddP reg (LShiftL lreg scale));
5045 
5046   op_cost(10);
5047   format %{"[$reg + $lreg << $scale]" %}
5048   interface(MEMORY_INTER) %{
5049     base($reg);
5050     index($lreg);
5051     scale($scale);
5052     disp(0x0);
5053   %}
5054 %}
5055 
5056 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5057 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5058 %{
5059   constraint(ALLOC_IN_RC(ptr_reg));
5060   match(AddP (AddP reg (LShiftL lreg scale)) off);
5061 
5062   op_cost(10);
5063   format %{"[$reg + $off + $lreg << $scale]" %}
5064   interface(MEMORY_INTER) %{
5065     base($reg);
5066     index($lreg);
5067     scale($scale);
5068     disp($off);
5069   %}
5070 %}
5071 
5072 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5073 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5074 %{
5075   constraint(ALLOC_IN_RC(ptr_reg));
5076   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5077   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5078 
5079   op_cost(10);
5080   format %{"[$reg + $off + $idx << $scale]" %}
5081   interface(MEMORY_INTER) %{
5082     base($reg);
5083     index($idx);
5084     scale($scale);
5085     disp($off);
5086   %}
5087 %}
5088 
5089 // Indirect Narrow Oop Plus Offset Operand
5090 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5091 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5092 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5093   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5094   constraint(ALLOC_IN_RC(ptr_reg));
5095   match(AddP (DecodeN reg) off);
5096 
5097   op_cost(10);
5098   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5099   interface(MEMORY_INTER) %{
5100     base(0xc); // R12
5101     index($reg);
5102     scale(0x3);
5103     disp($off);
5104   %}
5105 %}
5106 
5107 // Indirect Memory Operand
5108 operand indirectNarrow(rRegN reg)
5109 %{
5110   predicate(Universe::narrow_oop_shift() == 0);
5111   constraint(ALLOC_IN_RC(ptr_reg));
5112   match(DecodeN reg);
5113 
5114   format %{ "[$reg]" %}
5115   interface(MEMORY_INTER) %{
5116     base($reg);
5117     index(0x4);
5118     scale(0x0);
5119     disp(0x0);
5120   %}
5121 %}
5122 
5123 // Indirect Memory Plus Short Offset Operand
5124 operand indOffset8Narrow(rRegN reg, immL8 off)
5125 %{
5126   predicate(Universe::narrow_oop_shift() == 0);
5127   constraint(ALLOC_IN_RC(ptr_reg));
5128   match(AddP (DecodeN reg) off);
5129 
5130   format %{ "[$reg + $off (8-bit)]" %}
5131   interface(MEMORY_INTER) %{
5132     base($reg);
5133     index(0x4);
5134     scale(0x0);
5135     disp($off);
5136   %}
5137 %}
5138 
5139 // Indirect Memory Plus Long Offset Operand
5140 operand indOffset32Narrow(rRegN reg, immL32 off)
5141 %{
5142   predicate(Universe::narrow_oop_shift() == 0);
5143   constraint(ALLOC_IN_RC(ptr_reg));
5144   match(AddP (DecodeN reg) off);
5145 
5146   format %{ "[$reg + $off (32-bit)]" %}
5147   interface(MEMORY_INTER) %{
5148     base($reg);
5149     index(0x4);
5150     scale(0x0);
5151     disp($off);
5152   %}
5153 %}
5154 
5155 // Indirect Memory Plus Index Register Plus Offset Operand
5156 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5157 %{
5158   predicate(Universe::narrow_oop_shift() == 0);
5159   constraint(ALLOC_IN_RC(ptr_reg));
5160   match(AddP (AddP (DecodeN reg) lreg) off);
5161 
5162   op_cost(10);
5163   format %{"[$reg + $off + $lreg]" %}
5164   interface(MEMORY_INTER) %{
5165     base($reg);
5166     index($lreg);
5167     scale(0x0);
5168     disp($off);
5169   %}
5170 %}
5171 
5172 // Indirect Memory Plus Index Register Plus Offset Operand
5173 operand indIndexNarrow(rRegN reg, rRegL lreg)
5174 %{
5175   predicate(Universe::narrow_oop_shift() == 0);
5176   constraint(ALLOC_IN_RC(ptr_reg));
5177   match(AddP (DecodeN reg) lreg);
5178 
5179   op_cost(10);
5180   format %{"[$reg + $lreg]" %}
5181   interface(MEMORY_INTER) %{
5182     base($reg);
5183     index($lreg);
5184     scale(0x0);
5185     disp(0x0);
5186   %}
5187 %}
5188 
5189 // Indirect Memory Times Scale Plus Index Register
5190 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5191 %{
5192   predicate(Universe::narrow_oop_shift() == 0);
5193   constraint(ALLOC_IN_RC(ptr_reg));
5194   match(AddP (DecodeN reg) (LShiftL lreg scale));
5195 
5196   op_cost(10);
5197   format %{"[$reg + $lreg << $scale]" %}
5198   interface(MEMORY_INTER) %{
5199     base($reg);
5200     index($lreg);
5201     scale($scale);
5202     disp(0x0);
5203   %}
5204 %}
5205 
5206 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5207 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5208 %{
5209   predicate(Universe::narrow_oop_shift() == 0);
5210   constraint(ALLOC_IN_RC(ptr_reg));
5211   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5212 
5213   op_cost(10);
5214   format %{"[$reg + $off + $lreg << $scale]" %}
5215   interface(MEMORY_INTER) %{
5216     base($reg);
5217     index($lreg);
5218     scale($scale);
5219     disp($off);
5220   %}
5221 %}
5222 
5223 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5224 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5225 %{
5226   constraint(ALLOC_IN_RC(ptr_reg));
5227   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5228   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5229 
5230   op_cost(10);
5231   format %{"[$reg + $off + $idx << $scale]" %}
5232   interface(MEMORY_INTER) %{
5233     base($reg);
5234     index($idx);
5235     scale($scale);
5236     disp($off);
5237   %}
5238 %}
5239 
5240 
5241 //----------Special Memory Operands--------------------------------------------
5242 // Stack Slot Operand - This operand is used for loading and storing temporary
5243 //                      values on the stack where a match requires a value to
5244 //                      flow through memory.
5245 operand stackSlotP(sRegP reg)
5246 %{
5247   constraint(ALLOC_IN_RC(stack_slots));
5248   // No match rule because this operand is only generated in matching
5249 
5250   format %{ "[$reg]" %}
5251   interface(MEMORY_INTER) %{
5252     base(0x4);   // RSP
5253     index(0x4);  // No Index
5254     scale(0x0);  // No Scale
5255     disp($reg);  // Stack Offset
5256   %}
5257 %}
5258 
5259 operand stackSlotI(sRegI reg)
5260 %{
5261   constraint(ALLOC_IN_RC(stack_slots));
5262   // No match rule because this operand is only generated in matching
5263 
5264   format %{ "[$reg]" %}
5265   interface(MEMORY_INTER) %{
5266     base(0x4);   // RSP
5267     index(0x4);  // No Index
5268     scale(0x0);  // No Scale
5269     disp($reg);  // Stack Offset
5270   %}
5271 %}
5272 
5273 operand stackSlotF(sRegF reg)
5274 %{
5275   constraint(ALLOC_IN_RC(stack_slots));
5276   // No match rule because this operand is only generated in matching
5277 
5278   format %{ "[$reg]" %}
5279   interface(MEMORY_INTER) %{
5280     base(0x4);   // RSP
5281     index(0x4);  // No Index
5282     scale(0x0);  // No Scale
5283     disp($reg);  // Stack Offset
5284   %}
5285 %}
5286 
5287 operand stackSlotD(sRegD reg)
5288 %{
5289   constraint(ALLOC_IN_RC(stack_slots));
5290   // No match rule because this operand is only generated in matching
5291 
5292   format %{ "[$reg]" %}
5293   interface(MEMORY_INTER) %{
5294     base(0x4);   // RSP
5295     index(0x4);  // No Index
5296     scale(0x0);  // No Scale
5297     disp($reg);  // Stack Offset
5298   %}
5299 %}
5300 operand stackSlotL(sRegL reg)
5301 %{
5302   constraint(ALLOC_IN_RC(stack_slots));
5303   // No match rule because this operand is only generated in matching
5304 
5305   format %{ "[$reg]" %}
5306   interface(MEMORY_INTER) %{
5307     base(0x4);   // RSP
5308     index(0x4);  // No Index
5309     scale(0x0);  // No Scale
5310     disp($reg);  // Stack Offset
5311   %}
5312 %}
5313 
5314 //----------Conditional Branch Operands----------------------------------------
5315 // Comparison Op  - This is the operation of the comparison, and is limited to
5316 //                  the following set of codes:
5317 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5318 //
5319 // Other attributes of the comparison, such as unsignedness, are specified
5320 // by the comparison instruction that sets a condition code flags register.
5321 // That result is represented by a flags operand whose subtype is appropriate
5322 // to the unsignedness (etc.) of the comparison.
5323 //
5324 // Later, the instruction which matches both the Comparison Op (a Bool) and
5325 // the flags (produced by the Cmp) specifies the coding of the comparison op
5326 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5327 
5328 // Comparision Code
5329 operand cmpOp()
5330 %{
5331   match(Bool);
5332 
5333   format %{ "" %}
5334   interface(COND_INTER) %{
5335     equal(0x4, "e");
5336     not_equal(0x5, "ne");
5337     less(0xC, "l");
5338     greater_equal(0xD, "ge");
5339     less_equal(0xE, "le");
5340     greater(0xF, "g");
5341   %}
5342 %}
5343 
5344 // Comparison Code, unsigned compare.  Used by FP also, with
5345 // C2 (unordered) turned into GT or LT already.  The other bits
5346 // C0 and C3 are turned into Carry & Zero flags.
5347 operand cmpOpU()
5348 %{
5349   match(Bool);
5350 
5351   format %{ "" %}
5352   interface(COND_INTER) %{
5353     equal(0x4, "e");
5354     not_equal(0x5, "ne");
5355     less(0x2, "b");
5356     greater_equal(0x3, "nb");
5357     less_equal(0x6, "be");
5358     greater(0x7, "nbe");
5359   %}
5360 %}
5361 
5362 
5363 // Floating comparisons that don't require any fixup for the unordered case
5364 operand cmpOpUCF() %{
5365   match(Bool);
5366   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5367             n->as_Bool()->_test._test == BoolTest::ge ||
5368             n->as_Bool()->_test._test == BoolTest::le ||
5369             n->as_Bool()->_test._test == BoolTest::gt);
5370   format %{ "" %}
5371   interface(COND_INTER) %{
5372     equal(0x4, "e");
5373     not_equal(0x5, "ne");
5374     less(0x2, "b");
5375     greater_equal(0x3, "nb");
5376     less_equal(0x6, "be");
5377     greater(0x7, "nbe");
5378   %}
5379 %}
5380 
5381 
5382 // Floating comparisons that can be fixed up with extra conditional jumps
5383 operand cmpOpUCF2() %{
5384   match(Bool);
5385   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5386             n->as_Bool()->_test._test == BoolTest::eq);
5387   format %{ "" %}
5388   interface(COND_INTER) %{
5389     equal(0x4, "e");
5390     not_equal(0x5, "ne");
5391     less(0x2, "b");
5392     greater_equal(0x3, "nb");
5393     less_equal(0x6, "be");
5394     greater(0x7, "nbe");
5395   %}
5396 %}
5397 
5398 
5399 //----------OPERAND CLASSES----------------------------------------------------
5400 // Operand Classes are groups of operands that are used as to simplify
5401 // instruction definitions by not requiring the AD writer to specify separate
5402 // instructions for every form of operand when the instruction accepts
5403 // multiple operand types with the same basic encoding and format.  The classic
5404 // case of this is memory operands.
5405 
5406 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5407                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5408                indCompressedOopOffset,
5409                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5410                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5411                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5412 
5413 //----------PIPELINE-----------------------------------------------------------
5414 // Rules which define the behavior of the target architectures pipeline.
5415 pipeline %{
5416 
5417 //----------ATTRIBUTES---------------------------------------------------------
5418 attributes %{
5419   variable_size_instructions;        // Fixed size instructions
5420   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5421   instruction_unit_size = 1;         // An instruction is 1 bytes long
5422   instruction_fetch_unit_size = 16;  // The processor fetches one line
5423   instruction_fetch_units = 1;       // of 16 bytes
5424 
5425   // List of nop instructions
5426   nops( MachNop );
5427 %}
5428 
5429 //----------RESOURCES----------------------------------------------------------
5430 // Resources are the functional units available to the machine
5431 
5432 // Generic P2/P3 pipeline
5433 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5434 // 3 instructions decoded per cycle.
5435 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5436 // 3 ALU op, only ALU0 handles mul instructions.
5437 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5438            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5439            BR, FPU,
5440            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5441 
5442 //----------PIPELINE DESCRIPTION-----------------------------------------------
5443 // Pipeline Description specifies the stages in the machine's pipeline
5444 
5445 // Generic P2/P3 pipeline
5446 pipe_desc(S0, S1, S2, S3, S4, S5);
5447 
5448 //----------PIPELINE CLASSES---------------------------------------------------
5449 // Pipeline Classes describe the stages in which input and output are
5450 // referenced by the hardware pipeline.
5451 
5452 // Naming convention: ialu or fpu
5453 // Then: _reg
5454 // Then: _reg if there is a 2nd register
5455 // Then: _long if it's a pair of instructions implementing a long
5456 // Then: _fat if it requires the big decoder
5457 //   Or: _mem if it requires the big decoder and a memory unit.
5458 
5459 // Integer ALU reg operation
5460 pipe_class ialu_reg(rRegI dst)
5461 %{
5462     single_instruction;
5463     dst    : S4(write);
5464     dst    : S3(read);
5465     DECODE : S0;        // any decoder
5466     ALU    : S3;        // any alu
5467 %}
5468 
5469 // Long ALU reg operation
5470 pipe_class ialu_reg_long(rRegL dst)
5471 %{
5472     instruction_count(2);
5473     dst    : S4(write);
5474     dst    : S3(read);
5475     DECODE : S0(2);     // any 2 decoders
5476     ALU    : S3(2);     // both alus
5477 %}
5478 
5479 // Integer ALU reg operation using big decoder
5480 pipe_class ialu_reg_fat(rRegI dst)
5481 %{
5482     single_instruction;
5483     dst    : S4(write);
5484     dst    : S3(read);
5485     D0     : S0;        // big decoder only
5486     ALU    : S3;        // any alu
5487 %}
5488 
5489 // Long ALU reg operation using big decoder
5490 pipe_class ialu_reg_long_fat(rRegL dst)
5491 %{
5492     instruction_count(2);
5493     dst    : S4(write);
5494     dst    : S3(read);
5495     D0     : S0(2);     // big decoder only; twice
5496     ALU    : S3(2);     // any 2 alus
5497 %}
5498 
5499 // Integer ALU reg-reg operation
5500 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5501 %{
5502     single_instruction;
5503     dst    : S4(write);
5504     src    : S3(read);
5505     DECODE : S0;        // any decoder
5506     ALU    : S3;        // any alu
5507 %}
5508 
5509 // Long ALU reg-reg operation
5510 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5511 %{
5512     instruction_count(2);
5513     dst    : S4(write);
5514     src    : S3(read);
5515     DECODE : S0(2);     // any 2 decoders
5516     ALU    : S3(2);     // both alus
5517 %}
5518 
5519 // Integer ALU reg-reg operation
5520 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5521 %{
5522     single_instruction;
5523     dst    : S4(write);
5524     src    : S3(read);
5525     D0     : S0;        // big decoder only
5526     ALU    : S3;        // any alu
5527 %}
5528 
5529 // Long ALU reg-reg operation
5530 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5531 %{
5532     instruction_count(2);
5533     dst    : S4(write);
5534     src    : S3(read);
5535     D0     : S0(2);     // big decoder only; twice
5536     ALU    : S3(2);     // both alus
5537 %}
5538 
5539 // Integer ALU reg-mem operation
5540 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5541 %{
5542     single_instruction;
5543     dst    : S5(write);
5544     mem    : S3(read);
5545     D0     : S0;        // big decoder only
5546     ALU    : S4;        // any alu
5547     MEM    : S3;        // any mem
5548 %}
5549 
5550 // Integer mem operation (prefetch)
5551 pipe_class ialu_mem(memory mem)
5552 %{
5553     single_instruction;
5554     mem    : S3(read);
5555     D0     : S0;        // big decoder only
5556     MEM    : S3;        // any mem
5557 %}
5558 
5559 // Integer Store to Memory
5560 pipe_class ialu_mem_reg(memory mem, rRegI src)
5561 %{
5562     single_instruction;
5563     mem    : S3(read);
5564     src    : S5(read);
5565     D0     : S0;        // big decoder only
5566     ALU    : S4;        // any alu
5567     MEM    : S3;
5568 %}
5569 
5570 // // Long Store to Memory
5571 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5572 // %{
5573 //     instruction_count(2);
5574 //     mem    : S3(read);
5575 //     src    : S5(read);
5576 //     D0     : S0(2);          // big decoder only; twice
5577 //     ALU    : S4(2);     // any 2 alus
5578 //     MEM    : S3(2);  // Both mems
5579 // %}
5580 
5581 // Integer Store to Memory
5582 pipe_class ialu_mem_imm(memory mem)
5583 %{
5584     single_instruction;
5585     mem    : S3(read);
5586     D0     : S0;        // big decoder only
5587     ALU    : S4;        // any alu
5588     MEM    : S3;
5589 %}
5590 
5591 // Integer ALU0 reg-reg operation
5592 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5593 %{
5594     single_instruction;
5595     dst    : S4(write);
5596     src    : S3(read);
5597     D0     : S0;        // Big decoder only
5598     ALU0   : S3;        // only alu0
5599 %}
5600 
5601 // Integer ALU0 reg-mem operation
5602 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5603 %{
5604     single_instruction;
5605     dst    : S5(write);
5606     mem    : S3(read);
5607     D0     : S0;        // big decoder only
5608     ALU0   : S4;        // ALU0 only
5609     MEM    : S3;        // any mem
5610 %}
5611 
5612 // Integer ALU reg-reg operation
5613 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5614 %{
5615     single_instruction;
5616     cr     : S4(write);
5617     src1   : S3(read);
5618     src2   : S3(read);
5619     DECODE : S0;        // any decoder
5620     ALU    : S3;        // any alu
5621 %}
5622 
5623 // Integer ALU reg-imm operation
5624 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5625 %{
5626     single_instruction;
5627     cr     : S4(write);
5628     src1   : S3(read);
5629     DECODE : S0;        // any decoder
5630     ALU    : S3;        // any alu
5631 %}
5632 
5633 // Integer ALU reg-mem operation
5634 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5635 %{
5636     single_instruction;
5637     cr     : S4(write);
5638     src1   : S3(read);
5639     src2   : S3(read);
5640     D0     : S0;        // big decoder only
5641     ALU    : S4;        // any alu
5642     MEM    : S3;
5643 %}
5644 
5645 // Conditional move reg-reg
5646 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5647 %{
5648     instruction_count(4);
5649     y      : S4(read);
5650     q      : S3(read);
5651     p      : S3(read);
5652     DECODE : S0(4);     // any decoder
5653 %}
5654 
5655 // Conditional move reg-reg
5656 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5657 %{
5658     single_instruction;
5659     dst    : S4(write);
5660     src    : S3(read);
5661     cr     : S3(read);
5662     DECODE : S0;        // any decoder
5663 %}
5664 
5665 // Conditional move reg-mem
5666 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5667 %{
5668     single_instruction;
5669     dst    : S4(write);
5670     src    : S3(read);
5671     cr     : S3(read);
5672     DECODE : S0;        // any decoder
5673     MEM    : S3;
5674 %}
5675 
5676 // Conditional move reg-reg long
5677 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5678 %{
5679     single_instruction;
5680     dst    : S4(write);
5681     src    : S3(read);
5682     cr     : S3(read);
5683     DECODE : S0(2);     // any 2 decoders
5684 %}
5685 
5686 // XXX
5687 // // Conditional move double reg-reg
5688 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5689 // %{
5690 //     single_instruction;
5691 //     dst    : S4(write);
5692 //     src    : S3(read);
5693 //     cr     : S3(read);
5694 //     DECODE : S0;     // any decoder
5695 // %}
5696 
5697 // Float reg-reg operation
5698 pipe_class fpu_reg(regD dst)
5699 %{
5700     instruction_count(2);
5701     dst    : S3(read);
5702     DECODE : S0(2);     // any 2 decoders
5703     FPU    : S3;
5704 %}
5705 
5706 // Float reg-reg operation
5707 pipe_class fpu_reg_reg(regD dst, regD src)
5708 %{
5709     instruction_count(2);
5710     dst    : S4(write);
5711     src    : S3(read);
5712     DECODE : S0(2);     // any 2 decoders
5713     FPU    : S3;
5714 %}
5715 
5716 // Float reg-reg operation
5717 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5718 %{
5719     instruction_count(3);
5720     dst    : S4(write);
5721     src1   : S3(read);
5722     src2   : S3(read);
5723     DECODE : S0(3);     // any 3 decoders
5724     FPU    : S3(2);
5725 %}
5726 
5727 // Float reg-reg operation
5728 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5729 %{
5730     instruction_count(4);
5731     dst    : S4(write);
5732     src1   : S3(read);
5733     src2   : S3(read);
5734     src3   : S3(read);
5735     DECODE : S0(4);     // any 3 decoders
5736     FPU    : S3(2);
5737 %}
5738 
5739 // Float reg-reg operation
5740 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5741 %{
5742     instruction_count(4);
5743     dst    : S4(write);
5744     src1   : S3(read);
5745     src2   : S3(read);
5746     src3   : S3(read);
5747     DECODE : S1(3);     // any 3 decoders
5748     D0     : S0;        // Big decoder only
5749     FPU    : S3(2);
5750     MEM    : S3;
5751 %}
5752 
5753 // Float reg-mem operation
5754 pipe_class fpu_reg_mem(regD dst, memory mem)
5755 %{
5756     instruction_count(2);
5757     dst    : S5(write);
5758     mem    : S3(read);
5759     D0     : S0;        // big decoder only
5760     DECODE : S1;        // any decoder for FPU POP
5761     FPU    : S4;
5762     MEM    : S3;        // any mem
5763 %}
5764 
5765 // Float reg-mem operation
5766 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5767 %{
5768     instruction_count(3);
5769     dst    : S5(write);
5770     src1   : S3(read);
5771     mem    : S3(read);
5772     D0     : S0;        // big decoder only
5773     DECODE : S1(2);     // any decoder for FPU POP
5774     FPU    : S4;
5775     MEM    : S3;        // any mem
5776 %}
5777 
5778 // Float mem-reg operation
5779 pipe_class fpu_mem_reg(memory mem, regD src)
5780 %{
5781     instruction_count(2);
5782     src    : S5(read);
5783     mem    : S3(read);
5784     DECODE : S0;        // any decoder for FPU PUSH
5785     D0     : S1;        // big decoder only
5786     FPU    : S4;
5787     MEM    : S3;        // any mem
5788 %}
5789 
5790 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5791 %{
5792     instruction_count(3);
5793     src1   : S3(read);
5794     src2   : S3(read);
5795     mem    : S3(read);
5796     DECODE : S0(2);     // any decoder for FPU PUSH
5797     D0     : S1;        // big decoder only
5798     FPU    : S4;
5799     MEM    : S3;        // any mem
5800 %}
5801 
5802 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5803 %{
5804     instruction_count(3);
5805     src1   : S3(read);
5806     src2   : S3(read);
5807     mem    : S4(read);
5808     DECODE : S0;        // any decoder for FPU PUSH
5809     D0     : S0(2);     // big decoder only
5810     FPU    : S4;
5811     MEM    : S3(2);     // any mem
5812 %}
5813 
5814 pipe_class fpu_mem_mem(memory dst, memory src1)
5815 %{
5816     instruction_count(2);
5817     src1   : S3(read);
5818     dst    : S4(read);
5819     D0     : S0(2);     // big decoder only
5820     MEM    : S3(2);     // any mem
5821 %}
5822 
5823 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5824 %{
5825     instruction_count(3);
5826     src1   : S3(read);
5827     src2   : S3(read);
5828     dst    : S4(read);
5829     D0     : S0(3);     // big decoder only
5830     FPU    : S4;
5831     MEM    : S3(3);     // any mem
5832 %}
5833 
5834 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5835 %{
5836     instruction_count(3);
5837     src1   : S4(read);
5838     mem    : S4(read);
5839     DECODE : S0;        // any decoder for FPU PUSH
5840     D0     : S0(2);     // big decoder only
5841     FPU    : S4;
5842     MEM    : S3(2);     // any mem
5843 %}
5844 
5845 // Float load constant
5846 pipe_class fpu_reg_con(regD dst)
5847 %{
5848     instruction_count(2);
5849     dst    : S5(write);
5850     D0     : S0;        // big decoder only for the load
5851     DECODE : S1;        // any decoder for FPU POP
5852     FPU    : S4;
5853     MEM    : S3;        // any mem
5854 %}
5855 
5856 // Float load constant
5857 pipe_class fpu_reg_reg_con(regD dst, regD src)
5858 %{
5859     instruction_count(3);
5860     dst    : S5(write);
5861     src    : S3(read);
5862     D0     : S0;        // big decoder only for the load
5863     DECODE : S1(2);     // any decoder for FPU POP
5864     FPU    : S4;
5865     MEM    : S3;        // any mem
5866 %}
5867 
5868 // UnConditional branch
5869 pipe_class pipe_jmp(label labl)
5870 %{
5871     single_instruction;
5872     BR   : S3;
5873 %}
5874 
5875 // Conditional branch
5876 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5877 %{
5878     single_instruction;
5879     cr    : S1(read);
5880     BR    : S3;
5881 %}
5882 
5883 // Allocation idiom
5884 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5885 %{
5886     instruction_count(1); force_serialization;
5887     fixed_latency(6);
5888     heap_ptr : S3(read);
5889     DECODE   : S0(3);
5890     D0       : S2;
5891     MEM      : S3;
5892     ALU      : S3(2);
5893     dst      : S5(write);
5894     BR       : S5;
5895 %}
5896 
5897 // Generic big/slow expanded idiom
5898 pipe_class pipe_slow()
5899 %{
5900     instruction_count(10); multiple_bundles; force_serialization;
5901     fixed_latency(100);
5902     D0  : S0(2);
5903     MEM : S3(2);
5904 %}
5905 
5906 // The real do-nothing guy
5907 pipe_class empty()
5908 %{
5909     instruction_count(0);
5910 %}
5911 
5912 // Define the class for the Nop node
5913 define
5914 %{
5915    MachNop = empty;
5916 %}
5917 
5918 %}
5919 
5920 //----------INSTRUCTIONS-------------------------------------------------------
5921 //
5922 // match      -- States which machine-independent subtree may be replaced
5923 //               by this instruction.
5924 // ins_cost   -- The estimated cost of this instruction is used by instruction
5925 //               selection to identify a minimum cost tree of machine
5926 //               instructions that matches a tree of machine-independent
5927 //               instructions.
5928 // format     -- A string providing the disassembly for this instruction.
5929 //               The value of an instruction's operand may be inserted
5930 //               by referring to it with a '$' prefix.
5931 // opcode     -- Three instruction opcodes may be provided.  These are referred
5932 //               to within an encode class as $primary, $secondary, and $tertiary
5933 //               rrspectively.  The primary opcode is commonly used to
5934 //               indicate the type of machine instruction, while secondary
5935 //               and tertiary are often used for prefix options or addressing
5936 //               modes.
5937 // ins_encode -- A list of encode classes with parameters. The encode class
5938 //               name must have been defined in an 'enc_class' specification
5939 //               in the encode section of the architecture description.
5940 
5941 
5942 //----------Load/Store/Move Instructions---------------------------------------
5943 //----------Load Instructions--------------------------------------------------
5944 
5945 // Load Byte (8 bit signed)
5946 instruct loadB(rRegI dst, memory mem)
5947 %{
5948   match(Set dst (LoadB mem));
5949 
5950   ins_cost(125);
5951   format %{ "movsbl  $dst, $mem\t# byte" %}
5952 
5953   ins_encode %{
5954     __ movsbl($dst$$Register, $mem$$Address);
5955   %}
5956 
5957   ins_pipe(ialu_reg_mem);
5958 %}
5959 
5960 // Load Byte (8 bit signed) into Long Register
5961 instruct loadB2L(rRegL dst, memory mem)
5962 %{
5963   match(Set dst (ConvI2L (LoadB mem)));
5964 
5965   ins_cost(125);
5966   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5967 
5968   ins_encode %{
5969     __ movsbq($dst$$Register, $mem$$Address);
5970   %}
5971 
5972   ins_pipe(ialu_reg_mem);
5973 %}
5974 
5975 // Load Unsigned Byte (8 bit UNsigned)
5976 instruct loadUB(rRegI dst, memory mem)
5977 %{
5978   match(Set dst (LoadUB mem));
5979 
5980   ins_cost(125);
5981   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5982 
5983   ins_encode %{
5984     __ movzbl($dst$$Register, $mem$$Address);
5985   %}
5986 
5987   ins_pipe(ialu_reg_mem);
5988 %}
5989 
5990 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5991 instruct loadUB2L(rRegL dst, memory mem)
5992 %{
5993   match(Set dst (ConvI2L (LoadUB mem)));
5994 
5995   ins_cost(125);
5996   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5997 
5998   ins_encode %{
5999     __ movzbq($dst$$Register, $mem$$Address);
6000   %}
6001 
6002   ins_pipe(ialu_reg_mem);
6003 %}
6004 
6005 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6006 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6007   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6008   effect(KILL cr);
6009 
6010   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6011             "andl    $dst, $mask" %}
6012   ins_encode %{
6013     Register Rdst = $dst$$Register;
6014     __ movzbq(Rdst, $mem$$Address);
6015     __ andl(Rdst, $mask$$constant);
6016   %}
6017   ins_pipe(ialu_reg_mem);
6018 %}
6019 
6020 // Load Short (16 bit signed)
6021 instruct loadS(rRegI dst, memory mem)
6022 %{
6023   match(Set dst (LoadS mem));
6024 
6025   ins_cost(125);
6026   format %{ "movswl $dst, $mem\t# short" %}
6027 
6028   ins_encode %{
6029     __ movswl($dst$$Register, $mem$$Address);
6030   %}
6031 
6032   ins_pipe(ialu_reg_mem);
6033 %}
6034 
6035 // Load Short (16 bit signed) to Byte (8 bit signed)
6036 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6037   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6038 
6039   ins_cost(125);
6040   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6041   ins_encode %{
6042     __ movsbl($dst$$Register, $mem$$Address);
6043   %}
6044   ins_pipe(ialu_reg_mem);
6045 %}
6046 
6047 // Load Short (16 bit signed) into Long Register
6048 instruct loadS2L(rRegL dst, memory mem)
6049 %{
6050   match(Set dst (ConvI2L (LoadS mem)));
6051 
6052   ins_cost(125);
6053   format %{ "movswq $dst, $mem\t# short -> long" %}
6054 
6055   ins_encode %{
6056     __ movswq($dst$$Register, $mem$$Address);
6057   %}
6058 
6059   ins_pipe(ialu_reg_mem);
6060 %}
6061 
6062 // Load Unsigned Short/Char (16 bit UNsigned)
6063 instruct loadUS(rRegI dst, memory mem)
6064 %{
6065   match(Set dst (LoadUS mem));
6066 
6067   ins_cost(125);
6068   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6069 
6070   ins_encode %{
6071     __ movzwl($dst$$Register, $mem$$Address);
6072   %}
6073 
6074   ins_pipe(ialu_reg_mem);
6075 %}
6076 
6077 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6078 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6079   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6080 
6081   ins_cost(125);
6082   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6083   ins_encode %{
6084     __ movsbl($dst$$Register, $mem$$Address);
6085   %}
6086   ins_pipe(ialu_reg_mem);
6087 %}
6088 
6089 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6090 instruct loadUS2L(rRegL dst, memory mem)
6091 %{
6092   match(Set dst (ConvI2L (LoadUS mem)));
6093 
6094   ins_cost(125);
6095   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6096 
6097   ins_encode %{
6098     __ movzwq($dst$$Register, $mem$$Address);
6099   %}
6100 
6101   ins_pipe(ialu_reg_mem);
6102 %}
6103 
6104 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6105 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6106   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6107 
6108   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6109   ins_encode %{
6110     __ movzbq($dst$$Register, $mem$$Address);
6111   %}
6112   ins_pipe(ialu_reg_mem);
6113 %}
6114 
6115 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6116 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6117   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6118   effect(KILL cr);
6119 
6120   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6121             "andl    $dst, $mask" %}
6122   ins_encode %{
6123     Register Rdst = $dst$$Register;
6124     __ movzwq(Rdst, $mem$$Address);
6125     __ andl(Rdst, $mask$$constant);
6126   %}
6127   ins_pipe(ialu_reg_mem);
6128 %}
6129 
6130 // Load Integer
6131 instruct loadI(rRegI dst, memory mem)
6132 %{
6133   match(Set dst (LoadI mem));
6134 
6135   ins_cost(125);
6136   format %{ "movl    $dst, $mem\t# int" %}
6137 
6138   ins_encode %{
6139     __ movl($dst$$Register, $mem$$Address);
6140   %}
6141 
6142   ins_pipe(ialu_reg_mem);
6143 %}
6144 
6145 // Load Integer (32 bit signed) to Byte (8 bit signed)
6146 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6147   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6148 
6149   ins_cost(125);
6150   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6151   ins_encode %{
6152     __ movsbl($dst$$Register, $mem$$Address);
6153   %}
6154   ins_pipe(ialu_reg_mem);
6155 %}
6156 
6157 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6158 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6159   match(Set dst (AndI (LoadI mem) mask));
6160 
6161   ins_cost(125);
6162   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6163   ins_encode %{
6164     __ movzbl($dst$$Register, $mem$$Address);
6165   %}
6166   ins_pipe(ialu_reg_mem);
6167 %}
6168 
6169 // Load Integer (32 bit signed) to Short (16 bit signed)
6170 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6171   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6172 
6173   ins_cost(125);
6174   format %{ "movswl  $dst, $mem\t# int -> short" %}
6175   ins_encode %{
6176     __ movswl($dst$$Register, $mem$$Address);
6177   %}
6178   ins_pipe(ialu_reg_mem);
6179 %}
6180 
6181 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6182 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6183   match(Set dst (AndI (LoadI mem) mask));
6184 
6185   ins_cost(125);
6186   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6187   ins_encode %{
6188     __ movzwl($dst$$Register, $mem$$Address);
6189   %}
6190   ins_pipe(ialu_reg_mem);
6191 %}
6192 
6193 // Load Integer into Long Register
6194 instruct loadI2L(rRegL dst, memory mem)
6195 %{
6196   match(Set dst (ConvI2L (LoadI mem)));
6197 
6198   ins_cost(125);
6199   format %{ "movslq  $dst, $mem\t# int -> long" %}
6200 
6201   ins_encode %{
6202     __ movslq($dst$$Register, $mem$$Address);
6203   %}
6204 
6205   ins_pipe(ialu_reg_mem);
6206 %}
6207 
6208 // Load Integer with mask 0xFF into Long Register
6209 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6210   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6211 
6212   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6213   ins_encode %{
6214     __ movzbq($dst$$Register, $mem$$Address);
6215   %}
6216   ins_pipe(ialu_reg_mem);
6217 %}
6218 
6219 // Load Integer with mask 0xFFFF into Long Register
6220 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6221   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6222 
6223   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6224   ins_encode %{
6225     __ movzwq($dst$$Register, $mem$$Address);
6226   %}
6227   ins_pipe(ialu_reg_mem);
6228 %}
6229 
6230 // Load Integer with a 32-bit mask into Long Register
6231 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6232   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6233   effect(KILL cr);
6234 
6235   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6236             "andl    $dst, $mask" %}
6237   ins_encode %{
6238     Register Rdst = $dst$$Register;
6239     __ movl(Rdst, $mem$$Address);
6240     __ andl(Rdst, $mask$$constant);
6241   %}
6242   ins_pipe(ialu_reg_mem);
6243 %}
6244 
6245 // Load Unsigned Integer into Long Register
6246 instruct loadUI2L(rRegL dst, memory mem)
6247 %{
6248   match(Set dst (LoadUI2L mem));
6249 
6250   ins_cost(125);
6251   format %{ "movl    $dst, $mem\t# uint -> long" %}
6252 
6253   ins_encode %{
6254     __ movl($dst$$Register, $mem$$Address);
6255   %}
6256 
6257   ins_pipe(ialu_reg_mem);
6258 %}
6259 
6260 // Load Long
6261 instruct loadL(rRegL dst, memory mem)
6262 %{
6263   match(Set dst (LoadL mem));
6264 
6265   ins_cost(125);
6266   format %{ "movq    $dst, $mem\t# long" %}
6267 
6268   ins_encode %{
6269     __ movq($dst$$Register, $mem$$Address);
6270   %}
6271 
6272   ins_pipe(ialu_reg_mem); // XXX
6273 %}
6274 
6275 // Load Range
6276 instruct loadRange(rRegI dst, memory mem)
6277 %{
6278   match(Set dst (LoadRange mem));
6279 
6280   ins_cost(125); // XXX
6281   format %{ "movl    $dst, $mem\t# range" %}
6282   opcode(0x8B);
6283   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6284   ins_pipe(ialu_reg_mem);
6285 %}
6286 
6287 // Load Pointer
6288 instruct loadP(rRegP dst, memory mem)
6289 %{
6290   match(Set dst (LoadP mem));
6291 
6292   ins_cost(125); // XXX
6293   format %{ "movq    $dst, $mem\t# ptr" %}
6294   opcode(0x8B);
6295   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6296   ins_pipe(ialu_reg_mem); // XXX
6297 %}
6298 
6299 // Load Compressed Pointer
6300 instruct loadN(rRegN dst, memory mem)
6301 %{
6302    match(Set dst (LoadN mem));
6303 
6304    ins_cost(125); // XXX
6305    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6306    ins_encode %{
6307      __ movl($dst$$Register, $mem$$Address);
6308    %}
6309    ins_pipe(ialu_reg_mem); // XXX
6310 %}
6311 
6312 
6313 // Load Klass Pointer
6314 instruct loadKlass(rRegP dst, memory mem)
6315 %{
6316   match(Set dst (LoadKlass mem));
6317 
6318   ins_cost(125); // XXX
6319   format %{ "movq    $dst, $mem\t# class" %}
6320   opcode(0x8B);
6321   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6322   ins_pipe(ialu_reg_mem); // XXX
6323 %}
6324 
6325 // Load narrow Klass Pointer
6326 instruct loadNKlass(rRegN dst, memory mem)
6327 %{
6328   match(Set dst (LoadNKlass mem));
6329 
6330   ins_cost(125); // XXX
6331   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6332   ins_encode %{
6333     __ movl($dst$$Register, $mem$$Address);
6334   %}
6335   ins_pipe(ialu_reg_mem); // XXX
6336 %}
6337 
6338 // Load Float
6339 instruct loadF(regF dst, memory mem)
6340 %{
6341   match(Set dst (LoadF mem));
6342 
6343   ins_cost(145); // XXX
6344   format %{ "movss   $dst, $mem\t# float" %}
6345   opcode(0xF3, 0x0F, 0x10);
6346   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6347   ins_pipe(pipe_slow); // XXX
6348 %}
6349 
6350 // Load Double
6351 instruct loadD_partial(regD dst, memory mem)
6352 %{
6353   predicate(!UseXmmLoadAndClearUpper);
6354   match(Set dst (LoadD mem));
6355 
6356   ins_cost(145); // XXX
6357   format %{ "movlpd  $dst, $mem\t# double" %}
6358   opcode(0x66, 0x0F, 0x12);
6359   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6360   ins_pipe(pipe_slow); // XXX
6361 %}
6362 
6363 instruct loadD(regD dst, memory mem)
6364 %{
6365   predicate(UseXmmLoadAndClearUpper);
6366   match(Set dst (LoadD mem));
6367 
6368   ins_cost(145); // XXX
6369   format %{ "movsd   $dst, $mem\t# double" %}
6370   opcode(0xF2, 0x0F, 0x10);
6371   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6372   ins_pipe(pipe_slow); // XXX
6373 %}
6374 
6375 // Load Aligned Packed Byte to XMM register
6376 instruct loadA8B(regD dst, memory mem) %{
6377   match(Set dst (Load8B mem));
6378   ins_cost(125);
6379   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6380   ins_encode( movq_ld(dst, mem));
6381   ins_pipe( pipe_slow );
6382 %}
6383 
6384 // Load Aligned Packed Short to XMM register
6385 instruct loadA4S(regD dst, memory mem) %{
6386   match(Set dst (Load4S mem));
6387   ins_cost(125);
6388   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6389   ins_encode( movq_ld(dst, mem));
6390   ins_pipe( pipe_slow );
6391 %}
6392 
6393 // Load Aligned Packed Char to XMM register
6394 instruct loadA4C(regD dst, memory mem) %{
6395   match(Set dst (Load4C mem));
6396   ins_cost(125);
6397   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6398   ins_encode( movq_ld(dst, mem));
6399   ins_pipe( pipe_slow );
6400 %}
6401 
6402 // Load Aligned Packed Integer to XMM register
6403 instruct load2IU(regD dst, memory mem) %{
6404   match(Set dst (Load2I mem));
6405   ins_cost(125);
6406   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6407   ins_encode( movq_ld(dst, mem));
6408   ins_pipe( pipe_slow );
6409 %}
6410 
6411 // Load Aligned Packed Single to XMM
6412 instruct loadA2F(regD dst, memory mem) %{
6413   match(Set dst (Load2F mem));
6414   ins_cost(145);
6415   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6416   ins_encode( movq_ld(dst, mem));
6417   ins_pipe( pipe_slow );
6418 %}
6419 
6420 // Load Effective Address
6421 instruct leaP8(rRegP dst, indOffset8 mem)
6422 %{
6423   match(Set dst mem);
6424 
6425   ins_cost(110); // XXX
6426   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6427   opcode(0x8D);
6428   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6429   ins_pipe(ialu_reg_reg_fat);
6430 %}
6431 
6432 instruct leaP32(rRegP dst, indOffset32 mem)
6433 %{
6434   match(Set dst mem);
6435 
6436   ins_cost(110);
6437   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6438   opcode(0x8D);
6439   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6440   ins_pipe(ialu_reg_reg_fat);
6441 %}
6442 
6443 // instruct leaPIdx(rRegP dst, indIndex mem)
6444 // %{
6445 //   match(Set dst mem);
6446 
6447 //   ins_cost(110);
6448 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6449 //   opcode(0x8D);
6450 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6451 //   ins_pipe(ialu_reg_reg_fat);
6452 // %}
6453 
6454 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6455 %{
6456   match(Set dst mem);
6457 
6458   ins_cost(110);
6459   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6460   opcode(0x8D);
6461   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6462   ins_pipe(ialu_reg_reg_fat);
6463 %}
6464 
6465 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6466 %{
6467   match(Set dst mem);
6468 
6469   ins_cost(110);
6470   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6471   opcode(0x8D);
6472   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6473   ins_pipe(ialu_reg_reg_fat);
6474 %}
6475 
6476 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6477 %{
6478   match(Set dst mem);
6479 
6480   ins_cost(110);
6481   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6482   opcode(0x8D);
6483   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6484   ins_pipe(ialu_reg_reg_fat);
6485 %}
6486 
6487 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6488 %{
6489   match(Set dst mem);
6490 
6491   ins_cost(110);
6492   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6493   opcode(0x8D);
6494   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6495   ins_pipe(ialu_reg_reg_fat);
6496 %}
6497 
6498 // Load Effective Address which uses Narrow (32-bits) oop
6499 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6500 %{
6501   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6502   match(Set dst mem);
6503 
6504   ins_cost(110);
6505   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6506   opcode(0x8D);
6507   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6508   ins_pipe(ialu_reg_reg_fat);
6509 %}
6510 
6511 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6512 %{
6513   predicate(Universe::narrow_oop_shift() == 0);
6514   match(Set dst mem);
6515 
6516   ins_cost(110); // XXX
6517   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6518   opcode(0x8D);
6519   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6520   ins_pipe(ialu_reg_reg_fat);
6521 %}
6522 
6523 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6524 %{
6525   predicate(Universe::narrow_oop_shift() == 0);
6526   match(Set dst mem);
6527 
6528   ins_cost(110);
6529   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6530   opcode(0x8D);
6531   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6532   ins_pipe(ialu_reg_reg_fat);
6533 %}
6534 
6535 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6536 %{
6537   predicate(Universe::narrow_oop_shift() == 0);
6538   match(Set dst mem);
6539 
6540   ins_cost(110);
6541   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6542   opcode(0x8D);
6543   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6544   ins_pipe(ialu_reg_reg_fat);
6545 %}
6546 
6547 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6548 %{
6549   predicate(Universe::narrow_oop_shift() == 0);
6550   match(Set dst mem);
6551 
6552   ins_cost(110);
6553   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6554   opcode(0x8D);
6555   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6556   ins_pipe(ialu_reg_reg_fat);
6557 %}
6558 
6559 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6560 %{
6561   predicate(Universe::narrow_oop_shift() == 0);
6562   match(Set dst mem);
6563 
6564   ins_cost(110);
6565   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6566   opcode(0x8D);
6567   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6568   ins_pipe(ialu_reg_reg_fat);
6569 %}
6570 
6571 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6572 %{
6573   predicate(Universe::narrow_oop_shift() == 0);
6574   match(Set dst mem);
6575 
6576   ins_cost(110);
6577   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6578   opcode(0x8D);
6579   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6580   ins_pipe(ialu_reg_reg_fat);
6581 %}
6582 
6583 instruct loadConI(rRegI dst, immI src)
6584 %{
6585   match(Set dst src);
6586 
6587   format %{ "movl    $dst, $src\t# int" %}
6588   ins_encode(load_immI(dst, src));
6589   ins_pipe(ialu_reg_fat); // XXX
6590 %}
6591 
6592 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6593 %{
6594   match(Set dst src);
6595   effect(KILL cr);
6596 
6597   ins_cost(50);
6598   format %{ "xorl    $dst, $dst\t# int" %}
6599   opcode(0x33); /* + rd */
6600   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6601   ins_pipe(ialu_reg);
6602 %}
6603 
6604 instruct loadConL(rRegL dst, immL src)
6605 %{
6606   match(Set dst src);
6607 
6608   ins_cost(150);
6609   format %{ "movq    $dst, $src\t# long" %}
6610   ins_encode(load_immL(dst, src));
6611   ins_pipe(ialu_reg);
6612 %}
6613 
6614 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6615 %{
6616   match(Set dst src);
6617   effect(KILL cr);
6618 
6619   ins_cost(50);
6620   format %{ "xorl    $dst, $dst\t# long" %}
6621   opcode(0x33); /* + rd */
6622   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6623   ins_pipe(ialu_reg); // XXX
6624 %}
6625 
6626 instruct loadConUL32(rRegL dst, immUL32 src)
6627 %{
6628   match(Set dst src);
6629 
6630   ins_cost(60);
6631   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6632   ins_encode(load_immUL32(dst, src));
6633   ins_pipe(ialu_reg);
6634 %}
6635 
6636 instruct loadConL32(rRegL dst, immL32 src)
6637 %{
6638   match(Set dst src);
6639 
6640   ins_cost(70);
6641   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6642   ins_encode(load_immL32(dst, src));
6643   ins_pipe(ialu_reg);
6644 %}
6645 
6646 instruct loadConP(rRegP dst, immP src)
6647 %{
6648   match(Set dst src);
6649 
6650   format %{ "movq    $dst, $src\t# ptr" %}
6651   ins_encode(load_immP(dst, src));
6652   ins_pipe(ialu_reg_fat); // XXX
6653 %}
6654 
6655 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6656 %{
6657   match(Set dst src);
6658   effect(KILL cr);
6659 
6660   ins_cost(50);
6661   format %{ "xorl    $dst, $dst\t# ptr" %}
6662   opcode(0x33); /* + rd */
6663   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6664   ins_pipe(ialu_reg);
6665 %}
6666 
6667 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6668 %{
6669   match(Set dst src);
6670   effect(KILL cr);
6671 
6672   ins_cost(60);
6673   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6674   ins_encode(load_immP31(dst, src));
6675   ins_pipe(ialu_reg);
6676 %}
6677 
6678 instruct loadConF(regF dst, immF src)
6679 %{
6680   match(Set dst src);
6681   ins_cost(125);
6682 
6683   format %{ "movss   $dst, [$src]" %}
6684   ins_encode(load_conF(dst, src));
6685   ins_pipe(pipe_slow);
6686 %}
6687 
6688 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6689   match(Set dst src);
6690   effect(KILL cr);
6691   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6692   ins_encode %{
6693     __ xorq($dst$$Register, $dst$$Register);
6694   %}
6695   ins_pipe(ialu_reg);
6696 %}
6697 
6698 instruct loadConN(rRegN dst, immN src) %{
6699   match(Set dst src);
6700 
6701   ins_cost(125);
6702   format %{ "movl    $dst, $src\t# compressed ptr" %}
6703   ins_encode %{
6704     address con = (address)$src$$constant;
6705     if (con == NULL) {
6706       ShouldNotReachHere();
6707     } else {
6708       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6709     }
6710   %}
6711   ins_pipe(ialu_reg_fat); // XXX
6712 %}
6713 
6714 instruct loadConF0(regF dst, immF0 src)
6715 %{
6716   match(Set dst src);
6717   ins_cost(100);
6718 
6719   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6720   opcode(0x0F, 0x57);
6721   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6722   ins_pipe(pipe_slow);
6723 %}
6724 
6725 // Use the same format since predicate() can not be used here.
6726 instruct loadConD(regD dst, immD src)
6727 %{
6728   match(Set dst src);
6729   ins_cost(125);
6730 
6731   format %{ "movsd   $dst, [$src]" %}
6732   ins_encode(load_conD(dst, src));
6733   ins_pipe(pipe_slow);
6734 %}
6735 
6736 instruct loadConD0(regD dst, immD0 src)
6737 %{
6738   match(Set dst src);
6739   ins_cost(100);
6740 
6741   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6742   opcode(0x66, 0x0F, 0x57);
6743   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6744   ins_pipe(pipe_slow);
6745 %}
6746 
6747 instruct loadSSI(rRegI dst, stackSlotI src)
6748 %{
6749   match(Set dst src);
6750 
6751   ins_cost(125);
6752   format %{ "movl    $dst, $src\t# int stk" %}
6753   opcode(0x8B);
6754   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6755   ins_pipe(ialu_reg_mem);
6756 %}
6757 
6758 instruct loadSSL(rRegL dst, stackSlotL src)
6759 %{
6760   match(Set dst src);
6761 
6762   ins_cost(125);
6763   format %{ "movq    $dst, $src\t# long stk" %}
6764   opcode(0x8B);
6765   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6766   ins_pipe(ialu_reg_mem);
6767 %}
6768 
6769 instruct loadSSP(rRegP dst, stackSlotP src)
6770 %{
6771   match(Set dst src);
6772 
6773   ins_cost(125);
6774   format %{ "movq    $dst, $src\t# ptr stk" %}
6775   opcode(0x8B);
6776   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6777   ins_pipe(ialu_reg_mem);
6778 %}
6779 
6780 instruct loadSSF(regF dst, stackSlotF src)
6781 %{
6782   match(Set dst src);
6783 
6784   ins_cost(125);
6785   format %{ "movss   $dst, $src\t# float stk" %}
6786   opcode(0xF3, 0x0F, 0x10);
6787   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6788   ins_pipe(pipe_slow); // XXX
6789 %}
6790 
6791 // Use the same format since predicate() can not be used here.
6792 instruct loadSSD(regD dst, stackSlotD src)
6793 %{
6794   match(Set dst src);
6795 
6796   ins_cost(125);
6797   format %{ "movsd   $dst, $src\t# double stk" %}
6798   ins_encode  %{
6799     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6800   %}
6801   ins_pipe(pipe_slow); // XXX
6802 %}
6803 
6804 // Prefetch instructions.
6805 // Must be safe to execute with invalid address (cannot fault).
6806 
6807 instruct prefetchr( memory mem ) %{
6808   predicate(ReadPrefetchInstr==3);
6809   match(PrefetchRead mem);
6810   ins_cost(125);
6811 
6812   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6813   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6814   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6815   ins_pipe(ialu_mem);
6816 %}
6817 
6818 instruct prefetchrNTA( memory mem ) %{
6819   predicate(ReadPrefetchInstr==0);
6820   match(PrefetchRead mem);
6821   ins_cost(125);
6822 
6823   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6824   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6825   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6826   ins_pipe(ialu_mem);
6827 %}
6828 
6829 instruct prefetchrT0( memory mem ) %{
6830   predicate(ReadPrefetchInstr==1);
6831   match(PrefetchRead mem);
6832   ins_cost(125);
6833 
6834   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6835   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6836   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6837   ins_pipe(ialu_mem);
6838 %}
6839 
6840 instruct prefetchrT2( memory mem ) %{
6841   predicate(ReadPrefetchInstr==2);
6842   match(PrefetchRead mem);
6843   ins_cost(125);
6844 
6845   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6846   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6847   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6848   ins_pipe(ialu_mem);
6849 %}
6850 
6851 instruct prefetchw( memory mem ) %{
6852   predicate(AllocatePrefetchInstr==3);
6853   match(PrefetchWrite mem);
6854   ins_cost(125);
6855 
6856   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6857   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6858   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6859   ins_pipe(ialu_mem);
6860 %}
6861 
6862 instruct prefetchwNTA( memory mem ) %{
6863   predicate(AllocatePrefetchInstr==0);
6864   match(PrefetchWrite mem);
6865   ins_cost(125);
6866 
6867   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6868   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6869   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6870   ins_pipe(ialu_mem);
6871 %}
6872 
6873 instruct prefetchwT0( memory mem ) %{
6874   predicate(AllocatePrefetchInstr==1);
6875   match(PrefetchWrite mem);
6876   ins_cost(125);
6877 
6878   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6879   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6880   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6881   ins_pipe(ialu_mem);
6882 %}
6883 
6884 instruct prefetchwT2( memory mem ) %{
6885   predicate(AllocatePrefetchInstr==2);
6886   match(PrefetchWrite mem);
6887   ins_cost(125);
6888 
6889   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6890   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6891   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6892   ins_pipe(ialu_mem);
6893 %}
6894 
6895 //----------Store Instructions-------------------------------------------------
6896 
6897 // Store Byte
6898 instruct storeB(memory mem, rRegI src)
6899 %{
6900   match(Set mem (StoreB mem src));
6901 
6902   ins_cost(125); // XXX
6903   format %{ "movb    $mem, $src\t# byte" %}
6904   opcode(0x88);
6905   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6906   ins_pipe(ialu_mem_reg);
6907 %}
6908 
6909 // Store Char/Short
6910 instruct storeC(memory mem, rRegI src)
6911 %{
6912   match(Set mem (StoreC mem src));
6913 
6914   ins_cost(125); // XXX
6915   format %{ "movw    $mem, $src\t# char/short" %}
6916   opcode(0x89);
6917   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6918   ins_pipe(ialu_mem_reg);
6919 %}
6920 
6921 // Store Integer
6922 instruct storeI(memory mem, rRegI src)
6923 %{
6924   match(Set mem (StoreI mem src));
6925 
6926   ins_cost(125); // XXX
6927   format %{ "movl    $mem, $src\t# int" %}
6928   opcode(0x89);
6929   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6930   ins_pipe(ialu_mem_reg);
6931 %}
6932 
6933 // Store Long
6934 instruct storeL(memory mem, rRegL src)
6935 %{
6936   match(Set mem (StoreL mem src));
6937 
6938   ins_cost(125); // XXX
6939   format %{ "movq    $mem, $src\t# long" %}
6940   opcode(0x89);
6941   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6942   ins_pipe(ialu_mem_reg); // XXX
6943 %}
6944 
6945 // Store Pointer
6946 instruct storeP(memory mem, any_RegP src)
6947 %{
6948   match(Set mem (StoreP mem src));
6949 
6950   ins_cost(125); // XXX
6951   format %{ "movq    $mem, $src\t# ptr" %}
6952   opcode(0x89);
6953   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6954   ins_pipe(ialu_mem_reg);
6955 %}
6956 
6957 instruct storeImmP0(memory mem, immP0 zero)
6958 %{
6959   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6960   match(Set mem (StoreP mem zero));
6961 
6962   ins_cost(125); // XXX
6963   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6964   ins_encode %{
6965     __ movq($mem$$Address, r12);
6966   %}
6967   ins_pipe(ialu_mem_reg);
6968 %}
6969 
6970 // Store NULL Pointer, mark word, or other simple pointer constant.
6971 instruct storeImmP(memory mem, immP31 src)
6972 %{
6973   match(Set mem (StoreP mem src));
6974 
6975   ins_cost(150); // XXX
6976   format %{ "movq    $mem, $src\t# ptr" %}
6977   opcode(0xC7); /* C7 /0 */
6978   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6979   ins_pipe(ialu_mem_imm);
6980 %}
6981 
6982 // Store Compressed Pointer
6983 instruct storeN(memory mem, rRegN src)
6984 %{
6985   match(Set mem (StoreN mem src));
6986 
6987   ins_cost(125); // XXX
6988   format %{ "movl    $mem, $src\t# compressed ptr" %}
6989   ins_encode %{
6990     __ movl($mem$$Address, $src$$Register);
6991   %}
6992   ins_pipe(ialu_mem_reg);
6993 %}
6994 
6995 instruct storeImmN0(memory mem, immN0 zero)
6996 %{
6997   predicate(Universe::narrow_oop_base() == NULL);
6998   match(Set mem (StoreN mem zero));
6999 
7000   ins_cost(125); // XXX
7001   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7002   ins_encode %{
7003     __ movl($mem$$Address, r12);
7004   %}
7005   ins_pipe(ialu_mem_reg);
7006 %}
7007 
7008 instruct storeImmN(memory mem, immN src)
7009 %{
7010   match(Set mem (StoreN mem src));
7011 
7012   ins_cost(150); // XXX
7013   format %{ "movl    $mem, $src\t# compressed ptr" %}
7014   ins_encode %{
7015     address con = (address)$src$$constant;
7016     if (con == NULL) {
7017       __ movl($mem$$Address, (int32_t)0);
7018     } else {
7019       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7020     }
7021   %}
7022   ins_pipe(ialu_mem_imm);
7023 %}
7024 
7025 // Store Integer Immediate
7026 instruct storeImmI0(memory mem, immI0 zero)
7027 %{
7028   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7029   match(Set mem (StoreI mem zero));
7030 
7031   ins_cost(125); // XXX
7032   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7033   ins_encode %{
7034     __ movl($mem$$Address, r12);
7035   %}
7036   ins_pipe(ialu_mem_reg);
7037 %}
7038 
7039 instruct storeImmI(memory mem, immI src)
7040 %{
7041   match(Set mem (StoreI mem src));
7042 
7043   ins_cost(150);
7044   format %{ "movl    $mem, $src\t# int" %}
7045   opcode(0xC7); /* C7 /0 */
7046   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7047   ins_pipe(ialu_mem_imm);
7048 %}
7049 
7050 // Store Long Immediate
7051 instruct storeImmL0(memory mem, immL0 zero)
7052 %{
7053   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7054   match(Set mem (StoreL mem zero));
7055 
7056   ins_cost(125); // XXX
7057   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7058   ins_encode %{
7059     __ movq($mem$$Address, r12);
7060   %}
7061   ins_pipe(ialu_mem_reg);
7062 %}
7063 
7064 instruct storeImmL(memory mem, immL32 src)
7065 %{
7066   match(Set mem (StoreL mem src));
7067 
7068   ins_cost(150);
7069   format %{ "movq    $mem, $src\t# long" %}
7070   opcode(0xC7); /* C7 /0 */
7071   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7072   ins_pipe(ialu_mem_imm);
7073 %}
7074 
7075 // Store Short/Char Immediate
7076 instruct storeImmC0(memory mem, immI0 zero)
7077 %{
7078   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7079   match(Set mem (StoreC mem zero));
7080 
7081   ins_cost(125); // XXX
7082   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7083   ins_encode %{
7084     __ movw($mem$$Address, r12);
7085   %}
7086   ins_pipe(ialu_mem_reg);
7087 %}
7088 
7089 instruct storeImmI16(memory mem, immI16 src)
7090 %{
7091   predicate(UseStoreImmI16);
7092   match(Set mem (StoreC mem src));
7093 
7094   ins_cost(150);
7095   format %{ "movw    $mem, $src\t# short/char" %}
7096   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7097   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7098   ins_pipe(ialu_mem_imm);
7099 %}
7100 
7101 // Store Byte Immediate
7102 instruct storeImmB0(memory mem, immI0 zero)
7103 %{
7104   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7105   match(Set mem (StoreB mem zero));
7106 
7107   ins_cost(125); // XXX
7108   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7109   ins_encode %{
7110     __ movb($mem$$Address, r12);
7111   %}
7112   ins_pipe(ialu_mem_reg);
7113 %}
7114 
7115 instruct storeImmB(memory mem, immI8 src)
7116 %{
7117   match(Set mem (StoreB mem src));
7118 
7119   ins_cost(150); // XXX
7120   format %{ "movb    $mem, $src\t# byte" %}
7121   opcode(0xC6); /* C6 /0 */
7122   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7123   ins_pipe(ialu_mem_imm);
7124 %}
7125 
7126 // Store Aligned Packed Byte XMM register to memory
7127 instruct storeA8B(memory mem, regD src) %{
7128   match(Set mem (Store8B mem src));
7129   ins_cost(145);
7130   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7131   ins_encode( movq_st(mem, src));
7132   ins_pipe( pipe_slow );
7133 %}
7134 
7135 // Store Aligned Packed Char/Short XMM register to memory
7136 instruct storeA4C(memory mem, regD src) %{
7137   match(Set mem (Store4C mem src));
7138   ins_cost(145);
7139   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7140   ins_encode( movq_st(mem, src));
7141   ins_pipe( pipe_slow );
7142 %}
7143 
7144 // Store Aligned Packed Integer XMM register to memory
7145 instruct storeA2I(memory mem, regD src) %{
7146   match(Set mem (Store2I mem src));
7147   ins_cost(145);
7148   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7149   ins_encode( movq_st(mem, src));
7150   ins_pipe( pipe_slow );
7151 %}
7152 
7153 // Store CMS card-mark Immediate
7154 instruct storeImmCM0_reg(memory mem, immI0 zero)
7155 %{
7156   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7157   match(Set mem (StoreCM mem zero));
7158 
7159   ins_cost(125); // XXX
7160   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7161   ins_encode %{
7162     __ movb($mem$$Address, r12);
7163   %}
7164   ins_pipe(ialu_mem_reg);
7165 %}
7166 
7167 instruct storeImmCM0(memory mem, immI0 src)
7168 %{
7169   match(Set mem (StoreCM mem src));
7170 
7171   ins_cost(150); // XXX
7172   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7173   opcode(0xC6); /* C6 /0 */
7174   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7175   ins_pipe(ialu_mem_imm);
7176 %}
7177 
7178 // Store Aligned Packed Single Float XMM register to memory
7179 instruct storeA2F(memory mem, regD src) %{
7180   match(Set mem (Store2F mem src));
7181   ins_cost(145);
7182   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7183   ins_encode( movq_st(mem, src));
7184   ins_pipe( pipe_slow );
7185 %}
7186 
7187 // Store Float
7188 instruct storeF(memory mem, regF src)
7189 %{
7190   match(Set mem (StoreF mem src));
7191 
7192   ins_cost(95); // XXX
7193   format %{ "movss   $mem, $src\t# float" %}
7194   opcode(0xF3, 0x0F, 0x11);
7195   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7196   ins_pipe(pipe_slow); // XXX
7197 %}
7198 
7199 // Store immediate Float value (it is faster than store from XMM register)
7200 instruct storeF0(memory mem, immF0 zero)
7201 %{
7202   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7203   match(Set mem (StoreF mem zero));
7204 
7205   ins_cost(25); // XXX
7206   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7207   ins_encode %{
7208     __ movl($mem$$Address, r12);
7209   %}
7210   ins_pipe(ialu_mem_reg);
7211 %}
7212 
7213 instruct storeF_imm(memory mem, immF src)
7214 %{
7215   match(Set mem (StoreF mem src));
7216 
7217   ins_cost(50);
7218   format %{ "movl    $mem, $src\t# float" %}
7219   opcode(0xC7); /* C7 /0 */
7220   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7221   ins_pipe(ialu_mem_imm);
7222 %}
7223 
7224 // Store Double
7225 instruct storeD(memory mem, regD src)
7226 %{
7227   match(Set mem (StoreD mem src));
7228 
7229   ins_cost(95); // XXX
7230   format %{ "movsd   $mem, $src\t# double" %}
7231   opcode(0xF2, 0x0F, 0x11);
7232   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7233   ins_pipe(pipe_slow); // XXX
7234 %}
7235 
7236 // Store immediate double 0.0 (it is faster than store from XMM register)
7237 instruct storeD0_imm(memory mem, immD0 src)
7238 %{
7239   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7240   match(Set mem (StoreD mem src));
7241 
7242   ins_cost(50);
7243   format %{ "movq    $mem, $src\t# double 0." %}
7244   opcode(0xC7); /* C7 /0 */
7245   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7246   ins_pipe(ialu_mem_imm);
7247 %}
7248 
7249 instruct storeD0(memory mem, immD0 zero)
7250 %{
7251   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7252   match(Set mem (StoreD mem zero));
7253 
7254   ins_cost(25); // XXX
7255   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7256   ins_encode %{
7257     __ movq($mem$$Address, r12);
7258   %}
7259   ins_pipe(ialu_mem_reg);
7260 %}
7261 
7262 instruct storeSSI(stackSlotI dst, rRegI src)
7263 %{
7264   match(Set dst src);
7265 
7266   ins_cost(100);
7267   format %{ "movl    $dst, $src\t# int stk" %}
7268   opcode(0x89);
7269   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7270   ins_pipe( ialu_mem_reg );
7271 %}
7272 
7273 instruct storeSSL(stackSlotL dst, rRegL src)
7274 %{
7275   match(Set dst src);
7276 
7277   ins_cost(100);
7278   format %{ "movq    $dst, $src\t# long stk" %}
7279   opcode(0x89);
7280   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7281   ins_pipe(ialu_mem_reg);
7282 %}
7283 
7284 instruct storeSSP(stackSlotP dst, rRegP src)
7285 %{
7286   match(Set dst src);
7287 
7288   ins_cost(100);
7289   format %{ "movq    $dst, $src\t# ptr stk" %}
7290   opcode(0x89);
7291   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7292   ins_pipe(ialu_mem_reg);
7293 %}
7294 
7295 instruct storeSSF(stackSlotF dst, regF src)
7296 %{
7297   match(Set dst src);
7298 
7299   ins_cost(95); // XXX
7300   format %{ "movss   $dst, $src\t# float stk" %}
7301   opcode(0xF3, 0x0F, 0x11);
7302   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7303   ins_pipe(pipe_slow); // XXX
7304 %}
7305 
7306 instruct storeSSD(stackSlotD dst, regD src)
7307 %{
7308   match(Set dst src);
7309 
7310   ins_cost(95); // XXX
7311   format %{ "movsd   $dst, $src\t# double stk" %}
7312   opcode(0xF2, 0x0F, 0x11);
7313   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7314   ins_pipe(pipe_slow); // XXX
7315 %}
7316 
7317 //----------BSWAP Instructions-------------------------------------------------
7318 instruct bytes_reverse_int(rRegI dst) %{
7319   match(Set dst (ReverseBytesI dst));
7320 
7321   format %{ "bswapl  $dst" %}
7322   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7323   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7324   ins_pipe( ialu_reg );
7325 %}
7326 
7327 instruct bytes_reverse_long(rRegL dst) %{
7328   match(Set dst (ReverseBytesL dst));
7329 
7330   format %{ "bswapq  $dst" %}
7331 
7332   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7333   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7334   ins_pipe( ialu_reg);
7335 %}
7336 
7337 instruct loadI_reversed(rRegI dst, memory src) %{
7338   match(Set dst (ReverseBytesI (LoadI src)));
7339 
7340   format %{ "bswap_movl $dst, $src" %}
7341   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7342   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7343   ins_pipe( ialu_reg_mem );
7344 %}
7345 
7346 instruct loadL_reversed(rRegL dst, memory src) %{
7347   match(Set dst (ReverseBytesL (LoadL src)));
7348 
7349   format %{ "bswap_movq $dst, $src" %}
7350   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7351   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7352   ins_pipe( ialu_reg_mem );
7353 %}
7354 
7355 instruct storeI_reversed(memory dst, rRegI src) %{
7356   match(Set dst (StoreI dst (ReverseBytesI  src)));
7357 
7358   format %{ "movl_bswap $dst, $src" %}
7359   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7360   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7361   ins_pipe( ialu_mem_reg );
7362 %}
7363 
7364 instruct storeL_reversed(memory dst, rRegL src) %{
7365   match(Set dst (StoreL dst (ReverseBytesL  src)));
7366 
7367   format %{ "movq_bswap $dst, $src" %}
7368   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7369   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7370   ins_pipe( ialu_mem_reg );
7371 %}
7372 
7373 
7374 //---------- Zeros Count Instructions ------------------------------------------
7375 
7376 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7377   predicate(UseCountLeadingZerosInstruction);
7378   match(Set dst (CountLeadingZerosI src));
7379   effect(KILL cr);
7380 
7381   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7382   ins_encode %{
7383     __ lzcntl($dst$$Register, $src$$Register);
7384   %}
7385   ins_pipe(ialu_reg);
7386 %}
7387 
7388 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7389   predicate(!UseCountLeadingZerosInstruction);
7390   match(Set dst (CountLeadingZerosI src));
7391   effect(KILL cr);
7392 
7393   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7394             "jnz     skip\n\t"
7395             "movl    $dst, -1\n"
7396       "skip:\n\t"
7397             "negl    $dst\n\t"
7398             "addl    $dst, 31" %}
7399   ins_encode %{
7400     Register Rdst = $dst$$Register;
7401     Register Rsrc = $src$$Register;
7402     Label skip;
7403     __ bsrl(Rdst, Rsrc);
7404     __ jccb(Assembler::notZero, skip);
7405     __ movl(Rdst, -1);
7406     __ bind(skip);
7407     __ negl(Rdst);
7408     __ addl(Rdst, BitsPerInt - 1);
7409   %}
7410   ins_pipe(ialu_reg);
7411 %}
7412 
7413 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7414   predicate(UseCountLeadingZerosInstruction);
7415   match(Set dst (CountLeadingZerosL src));
7416   effect(KILL cr);
7417 
7418   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7419   ins_encode %{
7420     __ lzcntq($dst$$Register, $src$$Register);
7421   %}
7422   ins_pipe(ialu_reg);
7423 %}
7424 
7425 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7426   predicate(!UseCountLeadingZerosInstruction);
7427   match(Set dst (CountLeadingZerosL src));
7428   effect(KILL cr);
7429 
7430   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7431             "jnz     skip\n\t"
7432             "movl    $dst, -1\n"
7433       "skip:\n\t"
7434             "negl    $dst\n\t"
7435             "addl    $dst, 63" %}
7436   ins_encode %{
7437     Register Rdst = $dst$$Register;
7438     Register Rsrc = $src$$Register;
7439     Label skip;
7440     __ bsrq(Rdst, Rsrc);
7441     __ jccb(Assembler::notZero, skip);
7442     __ movl(Rdst, -1);
7443     __ bind(skip);
7444     __ negl(Rdst);
7445     __ addl(Rdst, BitsPerLong - 1);
7446   %}
7447   ins_pipe(ialu_reg);
7448 %}
7449 
7450 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7451   match(Set dst (CountTrailingZerosI src));
7452   effect(KILL cr);
7453 
7454   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7455             "jnz     done\n\t"
7456             "movl    $dst, 32\n"
7457       "done:" %}
7458   ins_encode %{
7459     Register Rdst = $dst$$Register;
7460     Label done;
7461     __ bsfl(Rdst, $src$$Register);
7462     __ jccb(Assembler::notZero, done);
7463     __ movl(Rdst, BitsPerInt);
7464     __ bind(done);
7465   %}
7466   ins_pipe(ialu_reg);
7467 %}
7468 
7469 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7470   match(Set dst (CountTrailingZerosL src));
7471   effect(KILL cr);
7472 
7473   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7474             "jnz     done\n\t"
7475             "movl    $dst, 64\n"
7476       "done:" %}
7477   ins_encode %{
7478     Register Rdst = $dst$$Register;
7479     Label done;
7480     __ bsfq(Rdst, $src$$Register);
7481     __ jccb(Assembler::notZero, done);
7482     __ movl(Rdst, BitsPerLong);
7483     __ bind(done);
7484   %}
7485   ins_pipe(ialu_reg);
7486 %}
7487 
7488 
7489 //---------- Population Count Instructions -------------------------------------
7490 
7491 instruct popCountI(rRegI dst, rRegI src) %{
7492   predicate(UsePopCountInstruction);
7493   match(Set dst (PopCountI src));
7494 
7495   format %{ "popcnt  $dst, $src" %}
7496   ins_encode %{
7497     __ popcntl($dst$$Register, $src$$Register);
7498   %}
7499   ins_pipe(ialu_reg);
7500 %}
7501 
7502 instruct popCountI_mem(rRegI dst, memory mem) %{
7503   predicate(UsePopCountInstruction);
7504   match(Set dst (PopCountI (LoadI mem)));
7505 
7506   format %{ "popcnt  $dst, $mem" %}
7507   ins_encode %{
7508     __ popcntl($dst$$Register, $mem$$Address);
7509   %}
7510   ins_pipe(ialu_reg);
7511 %}
7512 
7513 // Note: Long.bitCount(long) returns an int.
7514 instruct popCountL(rRegI dst, rRegL src) %{
7515   predicate(UsePopCountInstruction);
7516   match(Set dst (PopCountL src));
7517 
7518   format %{ "popcnt  $dst, $src" %}
7519   ins_encode %{
7520     __ popcntq($dst$$Register, $src$$Register);
7521   %}
7522   ins_pipe(ialu_reg);
7523 %}
7524 
7525 // Note: Long.bitCount(long) returns an int.
7526 instruct popCountL_mem(rRegI dst, memory mem) %{
7527   predicate(UsePopCountInstruction);
7528   match(Set dst (PopCountL (LoadL mem)));
7529 
7530   format %{ "popcnt  $dst, $mem" %}
7531   ins_encode %{
7532     __ popcntq($dst$$Register, $mem$$Address);
7533   %}
7534   ins_pipe(ialu_reg);
7535 %}
7536 
7537 
7538 //----------MemBar Instructions-----------------------------------------------
7539 // Memory barrier flavors
7540 
7541 instruct membar_acquire()
7542 %{
7543   match(MemBarAcquire);
7544   ins_cost(0);
7545 
7546   size(0);
7547   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7548   ins_encode();
7549   ins_pipe(empty);
7550 %}
7551 
7552 instruct membar_acquire_lock()
7553 %{
7554   match(MemBarAcquire);
7555   predicate(Matcher::prior_fast_lock(n));
7556   ins_cost(0);
7557 
7558   size(0);
7559   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7560   ins_encode();
7561   ins_pipe(empty);
7562 %}
7563 
7564 instruct membar_release()
7565 %{
7566   match(MemBarRelease);
7567   ins_cost(0);
7568 
7569   size(0);
7570   format %{ "MEMBAR-release ! (empty encoding)" %}
7571   ins_encode();
7572   ins_pipe(empty);
7573 %}
7574 
7575 instruct membar_release_lock()
7576 %{
7577   match(MemBarRelease);
7578   predicate(Matcher::post_fast_unlock(n));
7579   ins_cost(0);
7580 
7581   size(0);
7582   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7583   ins_encode();
7584   ins_pipe(empty);
7585 %}
7586 
7587 instruct membar_volatile(rFlagsReg cr) %{
7588   match(MemBarVolatile);
7589   effect(KILL cr);
7590   ins_cost(400);
7591 
7592   format %{ 
7593     $$template
7594     if (os::is_MP()) {
7595       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7596     } else {
7597       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7598     }
7599   %}
7600   ins_encode %{
7601     __ membar(Assembler::StoreLoad);
7602   %}
7603   ins_pipe(pipe_slow);
7604 %}
7605 
7606 instruct unnecessary_membar_volatile()
7607 %{
7608   match(MemBarVolatile);
7609   predicate(Matcher::post_store_load_barrier(n));
7610   ins_cost(0);
7611 
7612   size(0);
7613   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7614   ins_encode();
7615   ins_pipe(empty);
7616 %}
7617 
7618 //----------Move Instructions--------------------------------------------------
7619 
7620 instruct castX2P(rRegP dst, rRegL src)
7621 %{
7622   match(Set dst (CastX2P src));
7623 
7624   format %{ "movq    $dst, $src\t# long->ptr" %}
7625   ins_encode(enc_copy_wide(dst, src));
7626   ins_pipe(ialu_reg_reg); // XXX
7627 %}
7628 
7629 instruct castP2X(rRegL dst, rRegP src)
7630 %{
7631   match(Set dst (CastP2X src));
7632 
7633   format %{ "movq    $dst, $src\t# ptr -> long" %}
7634   ins_encode(enc_copy_wide(dst, src));
7635   ins_pipe(ialu_reg_reg); // XXX
7636 %}
7637 
7638 
7639 // Convert oop pointer into compressed form
7640 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7641   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7642   match(Set dst (EncodeP src));
7643   effect(KILL cr);
7644   format %{ "encode_heap_oop $dst,$src" %}
7645   ins_encode %{
7646     Register s = $src$$Register;
7647     Register d = $dst$$Register;
7648     if (s != d) {
7649       __ movq(d, s);
7650     }
7651     __ encode_heap_oop(d);
7652   %}
7653   ins_pipe(ialu_reg_long);
7654 %}
7655 
7656 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7657   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7658   match(Set dst (EncodeP src));
7659   effect(KILL cr);
7660   format %{ "encode_heap_oop_not_null $dst,$src" %}
7661   ins_encode %{
7662     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7663   %}
7664   ins_pipe(ialu_reg_long);
7665 %}
7666 
7667 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7668   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7669             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7670   match(Set dst (DecodeN src));
7671   effect(KILL cr);
7672   format %{ "decode_heap_oop $dst,$src" %}
7673   ins_encode %{
7674     Register s = $src$$Register;
7675     Register d = $dst$$Register;
7676     if (s != d) {
7677       __ movq(d, s);
7678     }
7679     __ decode_heap_oop(d);
7680   %}
7681   ins_pipe(ialu_reg_long);
7682 %}
7683 
7684 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7685   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7686             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7687   match(Set dst (DecodeN src));
7688   format %{ "decode_heap_oop_not_null $dst,$src" %}
7689   ins_encode %{
7690     Register s = $src$$Register;
7691     Register d = $dst$$Register;
7692     if (s != d) {
7693       __ decode_heap_oop_not_null(d, s);
7694     } else {
7695       __ decode_heap_oop_not_null(d);
7696     }
7697   %}
7698   ins_pipe(ialu_reg_long);
7699 %}
7700 
7701 
7702 //----------Conditional Move---------------------------------------------------
7703 // Jump
7704 // dummy instruction for generating temp registers
7705 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7706   match(Jump (LShiftL switch_val shift));
7707   ins_cost(350);
7708   predicate(false);
7709   effect(TEMP dest);
7710 
7711   format %{ "leaq    $dest, table_base\n\t"
7712             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7713   ins_encode(jump_enc_offset(switch_val, shift, dest));
7714   ins_pipe(pipe_jmp);
7715   ins_pc_relative(1);
7716 %}
7717 
7718 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7719   match(Jump (AddL (LShiftL switch_val shift) offset));
7720   ins_cost(350);
7721   effect(TEMP dest);
7722 
7723   format %{ "leaq    $dest, table_base\n\t"
7724             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7725   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7726   ins_pipe(pipe_jmp);
7727   ins_pc_relative(1);
7728 %}
7729 
7730 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7731   match(Jump switch_val);
7732   ins_cost(350);
7733   effect(TEMP dest);
7734 
7735   format %{ "leaq    $dest, table_base\n\t"
7736             "jmp     [$dest + $switch_val]\n\t" %}
7737   ins_encode(jump_enc(switch_val, dest));
7738   ins_pipe(pipe_jmp);
7739   ins_pc_relative(1);
7740 %}
7741 
7742 // Conditional move
7743 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7744 %{
7745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7746 
7747   ins_cost(200); // XXX
7748   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7749   opcode(0x0F, 0x40);
7750   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7751   ins_pipe(pipe_cmov_reg);
7752 %}
7753 
7754 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7755   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7756 
7757   ins_cost(200); // XXX
7758   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7759   opcode(0x0F, 0x40);
7760   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7761   ins_pipe(pipe_cmov_reg);
7762 %}
7763 
7764 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7765   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7766   ins_cost(200);
7767   expand %{
7768     cmovI_regU(cop, cr, dst, src);
7769   %}
7770 %}
7771 
7772 // Conditional move
7773 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7774   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7775 
7776   ins_cost(250); // XXX
7777   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7778   opcode(0x0F, 0x40);
7779   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7780   ins_pipe(pipe_cmov_mem);
7781 %}
7782 
7783 // Conditional move
7784 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7785 %{
7786   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7787 
7788   ins_cost(250); // XXX
7789   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7790   opcode(0x0F, 0x40);
7791   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7792   ins_pipe(pipe_cmov_mem);
7793 %}
7794 
7795 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7796   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7797   ins_cost(250);
7798   expand %{
7799     cmovI_memU(cop, cr, dst, src);
7800   %}
7801 %}
7802 
7803 // Conditional move
7804 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7805 %{
7806   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7807 
7808   ins_cost(200); // XXX
7809   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7810   opcode(0x0F, 0x40);
7811   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7812   ins_pipe(pipe_cmov_reg);
7813 %}
7814 
7815 // Conditional move
7816 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7817 %{
7818   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7819 
7820   ins_cost(200); // XXX
7821   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7822   opcode(0x0F, 0x40);
7823   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7824   ins_pipe(pipe_cmov_reg);
7825 %}
7826 
7827 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7828   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7829   ins_cost(200);
7830   expand %{
7831     cmovN_regU(cop, cr, dst, src);
7832   %}
7833 %}
7834 
7835 // Conditional move
7836 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7837 %{
7838   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7839 
7840   ins_cost(200); // XXX
7841   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7842   opcode(0x0F, 0x40);
7843   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7844   ins_pipe(pipe_cmov_reg);  // XXX
7845 %}
7846 
7847 // Conditional move
7848 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7849 %{
7850   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7851 
7852   ins_cost(200); // XXX
7853   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7854   opcode(0x0F, 0x40);
7855   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7856   ins_pipe(pipe_cmov_reg); // XXX
7857 %}
7858 
7859 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7860   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7861   ins_cost(200);
7862   expand %{
7863     cmovP_regU(cop, cr, dst, src);
7864   %}
7865 %}
7866 
7867 // DISABLED: Requires the ADLC to emit a bottom_type call that
7868 // correctly meets the two pointer arguments; one is an incoming
7869 // register but the other is a memory operand.  ALSO appears to
7870 // be buggy with implicit null checks.
7871 //
7872 //// Conditional move
7873 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7874 //%{
7875 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7876 //  ins_cost(250);
7877 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7878 //  opcode(0x0F,0x40);
7879 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7880 //  ins_pipe( pipe_cmov_mem );
7881 //%}
7882 //
7883 //// Conditional move
7884 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7885 //%{
7886 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7887 //  ins_cost(250);
7888 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7889 //  opcode(0x0F,0x40);
7890 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7891 //  ins_pipe( pipe_cmov_mem );
7892 //%}
7893 
7894 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7895 %{
7896   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7897 
7898   ins_cost(200); // XXX
7899   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7900   opcode(0x0F, 0x40);
7901   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7902   ins_pipe(pipe_cmov_reg);  // XXX
7903 %}
7904 
7905 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7906 %{
7907   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7908 
7909   ins_cost(200); // XXX
7910   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7911   opcode(0x0F, 0x40);
7912   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7913   ins_pipe(pipe_cmov_mem);  // XXX
7914 %}
7915 
7916 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7917 %{
7918   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7919 
7920   ins_cost(200); // XXX
7921   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7922   opcode(0x0F, 0x40);
7923   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7924   ins_pipe(pipe_cmov_reg); // XXX
7925 %}
7926 
7927 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7928   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7929   ins_cost(200);
7930   expand %{
7931     cmovL_regU(cop, cr, dst, src);
7932   %}
7933 %}
7934 
7935 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7936 %{
7937   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7938 
7939   ins_cost(200); // XXX
7940   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7941   opcode(0x0F, 0x40);
7942   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7943   ins_pipe(pipe_cmov_mem); // XXX
7944 %}
7945 
7946 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7947   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7948   ins_cost(200);
7949   expand %{
7950     cmovL_memU(cop, cr, dst, src);
7951   %}
7952 %}
7953 
7954 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7955 %{
7956   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7957 
7958   ins_cost(200); // XXX
7959   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7960             "movss     $dst, $src\n"
7961     "skip:" %}
7962   ins_encode(enc_cmovf_branch(cop, dst, src));
7963   ins_pipe(pipe_slow);
7964 %}
7965 
7966 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7967 // %{
7968 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7969 
7970 //   ins_cost(200); // XXX
7971 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7972 //             "movss     $dst, $src\n"
7973 //     "skip:" %}
7974 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7975 //   ins_pipe(pipe_slow);
7976 // %}
7977 
7978 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7979 %{
7980   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7981 
7982   ins_cost(200); // XXX
7983   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7984             "movss     $dst, $src\n"
7985     "skip:" %}
7986   ins_encode(enc_cmovf_branch(cop, dst, src));
7987   ins_pipe(pipe_slow);
7988 %}
7989 
7990 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7992   ins_cost(200);
7993   expand %{
7994     cmovF_regU(cop, cr, dst, src);
7995   %}
7996 %}
7997 
7998 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7999 %{
8000   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8001 
8002   ins_cost(200); // XXX
8003   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8004             "movsd     $dst, $src\n"
8005     "skip:" %}
8006   ins_encode(enc_cmovd_branch(cop, dst, src));
8007   ins_pipe(pipe_slow);
8008 %}
8009 
8010 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8011 %{
8012   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8013 
8014   ins_cost(200); // XXX
8015   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8016             "movsd     $dst, $src\n"
8017     "skip:" %}
8018   ins_encode(enc_cmovd_branch(cop, dst, src));
8019   ins_pipe(pipe_slow);
8020 %}
8021 
8022 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8023   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8024   ins_cost(200);
8025   expand %{
8026     cmovD_regU(cop, cr, dst, src);
8027   %}
8028 %}
8029 
8030 //----------Arithmetic Instructions--------------------------------------------
8031 //----------Addition Instructions----------------------------------------------
8032 
8033 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8034 %{
8035   match(Set dst (AddI dst src));
8036   effect(KILL cr);
8037 
8038   format %{ "addl    $dst, $src\t# int" %}
8039   opcode(0x03);
8040   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8041   ins_pipe(ialu_reg_reg);
8042 %}
8043 
8044 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8045 %{
8046   match(Set dst (AddI dst src));
8047   effect(KILL cr);
8048 
8049   format %{ "addl    $dst, $src\t# int" %}
8050   opcode(0x81, 0x00); /* /0 id */
8051   ins_encode(OpcSErm(dst, src), Con8or32(src));
8052   ins_pipe( ialu_reg );
8053 %}
8054 
8055 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8056 %{
8057   match(Set dst (AddI dst (LoadI src)));
8058   effect(KILL cr);
8059 
8060   ins_cost(125); // XXX
8061   format %{ "addl    $dst, $src\t# int" %}
8062   opcode(0x03);
8063   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8064   ins_pipe(ialu_reg_mem);
8065 %}
8066 
8067 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8068 %{
8069   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8070   effect(KILL cr);
8071 
8072   ins_cost(150); // XXX
8073   format %{ "addl    $dst, $src\t# int" %}
8074   opcode(0x01); /* Opcode 01 /r */
8075   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8076   ins_pipe(ialu_mem_reg);
8077 %}
8078 
8079 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8080 %{
8081   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8082   effect(KILL cr);
8083 
8084   ins_cost(125); // XXX
8085   format %{ "addl    $dst, $src\t# int" %}
8086   opcode(0x81); /* Opcode 81 /0 id */
8087   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8088   ins_pipe(ialu_mem_imm);
8089 %}
8090 
8091 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8092 %{
8093   predicate(UseIncDec);
8094   match(Set dst (AddI dst src));
8095   effect(KILL cr);
8096 
8097   format %{ "incl    $dst\t# int" %}
8098   opcode(0xFF, 0x00); // FF /0
8099   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8100   ins_pipe(ialu_reg);
8101 %}
8102 
8103 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8104 %{
8105   predicate(UseIncDec);
8106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8107   effect(KILL cr);
8108 
8109   ins_cost(125); // XXX
8110   format %{ "incl    $dst\t# int" %}
8111   opcode(0xFF); /* Opcode FF /0 */
8112   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8113   ins_pipe(ialu_mem_imm);
8114 %}
8115 
8116 // XXX why does that use AddI
8117 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8118 %{
8119   predicate(UseIncDec);
8120   match(Set dst (AddI dst src));
8121   effect(KILL cr);
8122 
8123   format %{ "decl    $dst\t# int" %}
8124   opcode(0xFF, 0x01); // FF /1
8125   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8126   ins_pipe(ialu_reg);
8127 %}
8128 
8129 // XXX why does that use AddI
8130 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8131 %{
8132   predicate(UseIncDec);
8133   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8134   effect(KILL cr);
8135 
8136   ins_cost(125); // XXX
8137   format %{ "decl    $dst\t# int" %}
8138   opcode(0xFF); /* Opcode FF /1 */
8139   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8140   ins_pipe(ialu_mem_imm);
8141 %}
8142 
8143 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8144 %{
8145   match(Set dst (AddI src0 src1));
8146 
8147   ins_cost(110);
8148   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8149   opcode(0x8D); /* 0x8D /r */
8150   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8151   ins_pipe(ialu_reg_reg);
8152 %}
8153 
8154 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8155 %{
8156   match(Set dst (AddL dst src));
8157   effect(KILL cr);
8158 
8159   format %{ "addq    $dst, $src\t# long" %}
8160   opcode(0x03);
8161   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8162   ins_pipe(ialu_reg_reg);
8163 %}
8164 
8165 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8166 %{
8167   match(Set dst (AddL dst src));
8168   effect(KILL cr);
8169 
8170   format %{ "addq    $dst, $src\t# long" %}
8171   opcode(0x81, 0x00); /* /0 id */
8172   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8173   ins_pipe( ialu_reg );
8174 %}
8175 
8176 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8177 %{
8178   match(Set dst (AddL dst (LoadL src)));
8179   effect(KILL cr);
8180 
8181   ins_cost(125); // XXX
8182   format %{ "addq    $dst, $src\t# long" %}
8183   opcode(0x03);
8184   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8185   ins_pipe(ialu_reg_mem);
8186 %}
8187 
8188 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8189 %{
8190   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8191   effect(KILL cr);
8192 
8193   ins_cost(150); // XXX
8194   format %{ "addq    $dst, $src\t# long" %}
8195   opcode(0x01); /* Opcode 01 /r */
8196   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8197   ins_pipe(ialu_mem_reg);
8198 %}
8199 
8200 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8201 %{
8202   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8203   effect(KILL cr);
8204 
8205   ins_cost(125); // XXX
8206   format %{ "addq    $dst, $src\t# long" %}
8207   opcode(0x81); /* Opcode 81 /0 id */
8208   ins_encode(REX_mem_wide(dst),
8209              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8210   ins_pipe(ialu_mem_imm);
8211 %}
8212 
8213 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8214 %{
8215   predicate(UseIncDec);
8216   match(Set dst (AddL dst src));
8217   effect(KILL cr);
8218 
8219   format %{ "incq    $dst\t# long" %}
8220   opcode(0xFF, 0x00); // FF /0
8221   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8222   ins_pipe(ialu_reg);
8223 %}
8224 
8225 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8226 %{
8227   predicate(UseIncDec);
8228   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8229   effect(KILL cr);
8230 
8231   ins_cost(125); // XXX
8232   format %{ "incq    $dst\t# long" %}
8233   opcode(0xFF); /* Opcode FF /0 */
8234   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8235   ins_pipe(ialu_mem_imm);
8236 %}
8237 
8238 // XXX why does that use AddL
8239 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8240 %{
8241   predicate(UseIncDec);
8242   match(Set dst (AddL dst src));
8243   effect(KILL cr);
8244 
8245   format %{ "decq    $dst\t# long" %}
8246   opcode(0xFF, 0x01); // FF /1
8247   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8248   ins_pipe(ialu_reg);
8249 %}
8250 
8251 // XXX why does that use AddL
8252 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8253 %{
8254   predicate(UseIncDec);
8255   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8256   effect(KILL cr);
8257 
8258   ins_cost(125); // XXX
8259   format %{ "decq    $dst\t# long" %}
8260   opcode(0xFF); /* Opcode FF /1 */
8261   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8262   ins_pipe(ialu_mem_imm);
8263 %}
8264 
8265 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8266 %{
8267   match(Set dst (AddL src0 src1));
8268 
8269   ins_cost(110);
8270   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8271   opcode(0x8D); /* 0x8D /r */
8272   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8273   ins_pipe(ialu_reg_reg);
8274 %}
8275 
8276 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8277 %{
8278   match(Set dst (AddP dst src));
8279   effect(KILL cr);
8280 
8281   format %{ "addq    $dst, $src\t# ptr" %}
8282   opcode(0x03);
8283   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8284   ins_pipe(ialu_reg_reg);
8285 %}
8286 
8287 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8288 %{
8289   match(Set dst (AddP dst src));
8290   effect(KILL cr);
8291 
8292   format %{ "addq    $dst, $src\t# ptr" %}
8293   opcode(0x81, 0x00); /* /0 id */
8294   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8295   ins_pipe( ialu_reg );
8296 %}
8297 
8298 // XXX addP mem ops ????
8299 
8300 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8301 %{
8302   match(Set dst (AddP src0 src1));
8303 
8304   ins_cost(110);
8305   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8306   opcode(0x8D); /* 0x8D /r */
8307   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8308   ins_pipe(ialu_reg_reg);
8309 %}
8310 
8311 instruct checkCastPP(rRegP dst)
8312 %{
8313   match(Set dst (CheckCastPP dst));
8314 
8315   size(0);
8316   format %{ "# checkcastPP of $dst" %}
8317   ins_encode(/* empty encoding */);
8318   ins_pipe(empty);
8319 %}
8320 
8321 instruct castPP(rRegP dst)
8322 %{
8323   match(Set dst (CastPP dst));
8324 
8325   size(0);
8326   format %{ "# castPP of $dst" %}
8327   ins_encode(/* empty encoding */);
8328   ins_pipe(empty);
8329 %}
8330 
8331 instruct castII(rRegI dst)
8332 %{
8333   match(Set dst (CastII dst));
8334 
8335   size(0);
8336   format %{ "# castII of $dst" %}
8337   ins_encode(/* empty encoding */);
8338   ins_cost(0);
8339   ins_pipe(empty);
8340 %}
8341 
8342 // LoadP-locked same as a regular LoadP when used with compare-swap
8343 instruct loadPLocked(rRegP dst, memory mem)
8344 %{
8345   match(Set dst (LoadPLocked mem));
8346 
8347   ins_cost(125); // XXX
8348   format %{ "movq    $dst, $mem\t# ptr locked" %}
8349   opcode(0x8B);
8350   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8351   ins_pipe(ialu_reg_mem); // XXX
8352 %}
8353 
8354 // LoadL-locked - same as a regular LoadL when used with compare-swap
8355 instruct loadLLocked(rRegL dst, memory mem)
8356 %{
8357   match(Set dst (LoadLLocked mem));
8358 
8359   ins_cost(125); // XXX
8360   format %{ "movq    $dst, $mem\t# long locked" %}
8361   opcode(0x8B);
8362   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8363   ins_pipe(ialu_reg_mem); // XXX
8364 %}
8365 
8366 // Conditional-store of the updated heap-top.
8367 // Used during allocation of the shared heap.
8368 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8369 
8370 instruct storePConditional(memory heap_top_ptr,
8371                            rax_RegP oldval, rRegP newval,
8372                            rFlagsReg cr)
8373 %{
8374   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8375  
8376   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8377             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8378   opcode(0x0F, 0xB1);
8379   ins_encode(lock_prefix,
8380              REX_reg_mem_wide(newval, heap_top_ptr),
8381              OpcP, OpcS,
8382              reg_mem(newval, heap_top_ptr));
8383   ins_pipe(pipe_cmpxchg);
8384 %}
8385 
8386 // Conditional-store of an int value.
8387 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8388 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8389 %{
8390   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8391   effect(KILL oldval);
8392 
8393   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8394   opcode(0x0F, 0xB1);
8395   ins_encode(lock_prefix,
8396              REX_reg_mem(newval, mem),
8397              OpcP, OpcS,
8398              reg_mem(newval, mem));
8399   ins_pipe(pipe_cmpxchg);
8400 %}
8401 
8402 // Conditional-store of a long value.
8403 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8404 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8405 %{
8406   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8407   effect(KILL oldval);
8408 
8409   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8410   opcode(0x0F, 0xB1);
8411   ins_encode(lock_prefix,
8412              REX_reg_mem_wide(newval, mem),
8413              OpcP, OpcS,
8414              reg_mem(newval, mem));
8415   ins_pipe(pipe_cmpxchg);
8416 %}
8417 
8418 
8419 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8420 instruct compareAndSwapP(rRegI res,
8421                          memory mem_ptr,
8422                          rax_RegP oldval, rRegP newval,
8423                          rFlagsReg cr)
8424 %{
8425   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8426   effect(KILL cr, KILL oldval);
8427 
8428   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8429             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8430             "sete    $res\n\t"
8431             "movzbl  $res, $res" %}
8432   opcode(0x0F, 0xB1);
8433   ins_encode(lock_prefix,
8434              REX_reg_mem_wide(newval, mem_ptr),
8435              OpcP, OpcS,
8436              reg_mem(newval, mem_ptr),
8437              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8438              REX_reg_breg(res, res), // movzbl
8439              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8440   ins_pipe( pipe_cmpxchg );
8441 %}
8442 
8443 instruct compareAndSwapL(rRegI res,
8444                          memory mem_ptr,
8445                          rax_RegL oldval, rRegL newval,
8446                          rFlagsReg cr)
8447 %{
8448   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8449   effect(KILL cr, KILL oldval);
8450 
8451   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8452             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8453             "sete    $res\n\t"
8454             "movzbl  $res, $res" %}
8455   opcode(0x0F, 0xB1);
8456   ins_encode(lock_prefix,
8457              REX_reg_mem_wide(newval, mem_ptr),
8458              OpcP, OpcS,
8459              reg_mem(newval, mem_ptr),
8460              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8461              REX_reg_breg(res, res), // movzbl
8462              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8463   ins_pipe( pipe_cmpxchg );
8464 %}
8465 
8466 instruct compareAndSwapI(rRegI res,
8467                          memory mem_ptr,
8468                          rax_RegI oldval, rRegI newval,
8469                          rFlagsReg cr)
8470 %{
8471   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8472   effect(KILL cr, KILL oldval);
8473 
8474   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8475             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8476             "sete    $res\n\t"
8477             "movzbl  $res, $res" %}
8478   opcode(0x0F, 0xB1);
8479   ins_encode(lock_prefix,
8480              REX_reg_mem(newval, mem_ptr),
8481              OpcP, OpcS,
8482              reg_mem(newval, mem_ptr),
8483              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8484              REX_reg_breg(res, res), // movzbl
8485              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8486   ins_pipe( pipe_cmpxchg );
8487 %}
8488 
8489 
8490 instruct compareAndSwapN(rRegI res,
8491                           memory mem_ptr,
8492                           rax_RegN oldval, rRegN newval,
8493                           rFlagsReg cr) %{
8494   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8495   effect(KILL cr, KILL oldval);
8496 
8497   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8498             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8499             "sete    $res\n\t"
8500             "movzbl  $res, $res" %}
8501   opcode(0x0F, 0xB1);
8502   ins_encode(lock_prefix,
8503              REX_reg_mem(newval, mem_ptr),
8504              OpcP, OpcS,
8505              reg_mem(newval, mem_ptr),
8506              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8507              REX_reg_breg(res, res), // movzbl
8508              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8509   ins_pipe( pipe_cmpxchg );
8510 %}
8511 
8512 //----------Subtraction Instructions-------------------------------------------
8513 
8514 // Integer Subtraction Instructions
8515 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8516 %{
8517   match(Set dst (SubI dst src));
8518   effect(KILL cr);
8519 
8520   format %{ "subl    $dst, $src\t# int" %}
8521   opcode(0x2B);
8522   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8523   ins_pipe(ialu_reg_reg);
8524 %}
8525 
8526 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8527 %{
8528   match(Set dst (SubI dst src));
8529   effect(KILL cr);
8530 
8531   format %{ "subl    $dst, $src\t# int" %}
8532   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8533   ins_encode(OpcSErm(dst, src), Con8or32(src));
8534   ins_pipe(ialu_reg);
8535 %}
8536 
8537 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8538 %{
8539   match(Set dst (SubI dst (LoadI src)));
8540   effect(KILL cr);
8541 
8542   ins_cost(125);
8543   format %{ "subl    $dst, $src\t# int" %}
8544   opcode(0x2B);
8545   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8546   ins_pipe(ialu_reg_mem);
8547 %}
8548 
8549 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8550 %{
8551   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8552   effect(KILL cr);
8553 
8554   ins_cost(150);
8555   format %{ "subl    $dst, $src\t# int" %}
8556   opcode(0x29); /* Opcode 29 /r */
8557   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8558   ins_pipe(ialu_mem_reg);
8559 %}
8560 
8561 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8562 %{
8563   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8564   effect(KILL cr);
8565 
8566   ins_cost(125); // XXX
8567   format %{ "subl    $dst, $src\t# int" %}
8568   opcode(0x81); /* Opcode 81 /5 id */
8569   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8570   ins_pipe(ialu_mem_imm);
8571 %}
8572 
8573 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8574 %{
8575   match(Set dst (SubL dst src));
8576   effect(KILL cr);
8577 
8578   format %{ "subq    $dst, $src\t# long" %}
8579   opcode(0x2B);
8580   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8581   ins_pipe(ialu_reg_reg);
8582 %}
8583 
8584 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8585 %{
8586   match(Set dst (SubL dst src));
8587   effect(KILL cr);
8588 
8589   format %{ "subq    $dst, $src\t# long" %}
8590   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8591   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8592   ins_pipe(ialu_reg);
8593 %}
8594 
8595 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8596 %{
8597   match(Set dst (SubL dst (LoadL src)));
8598   effect(KILL cr);
8599 
8600   ins_cost(125);
8601   format %{ "subq    $dst, $src\t# long" %}
8602   opcode(0x2B);
8603   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8604   ins_pipe(ialu_reg_mem);
8605 %}
8606 
8607 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8608 %{
8609   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8610   effect(KILL cr);
8611 
8612   ins_cost(150);
8613   format %{ "subq    $dst, $src\t# long" %}
8614   opcode(0x29); /* Opcode 29 /r */
8615   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8616   ins_pipe(ialu_mem_reg);
8617 %}
8618 
8619 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8620 %{
8621   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8622   effect(KILL cr);
8623 
8624   ins_cost(125); // XXX
8625   format %{ "subq    $dst, $src\t# long" %}
8626   opcode(0x81); /* Opcode 81 /5 id */
8627   ins_encode(REX_mem_wide(dst),
8628              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8629   ins_pipe(ialu_mem_imm);
8630 %}
8631 
8632 // Subtract from a pointer
8633 // XXX hmpf???
8634 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8635 %{
8636   match(Set dst (AddP dst (SubI zero src)));
8637   effect(KILL cr);
8638 
8639   format %{ "subq    $dst, $src\t# ptr - int" %}
8640   opcode(0x2B);
8641   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8642   ins_pipe(ialu_reg_reg);
8643 %}
8644 
8645 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8646 %{
8647   match(Set dst (SubI zero dst));
8648   effect(KILL cr);
8649 
8650   format %{ "negl    $dst\t# int" %}
8651   opcode(0xF7, 0x03);  // Opcode F7 /3
8652   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8653   ins_pipe(ialu_reg);
8654 %}
8655 
8656 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8657 %{
8658   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8659   effect(KILL cr);
8660 
8661   format %{ "negl    $dst\t# int" %}
8662   opcode(0xF7, 0x03);  // Opcode F7 /3
8663   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8664   ins_pipe(ialu_reg);
8665 %}
8666 
8667 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8668 %{
8669   match(Set dst (SubL zero dst));
8670   effect(KILL cr);
8671 
8672   format %{ "negq    $dst\t# long" %}
8673   opcode(0xF7, 0x03);  // Opcode F7 /3
8674   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8675   ins_pipe(ialu_reg);
8676 %}
8677 
8678 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8679 %{
8680   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8681   effect(KILL cr);
8682 
8683   format %{ "negq    $dst\t# long" %}
8684   opcode(0xF7, 0x03);  // Opcode F7 /3
8685   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8686   ins_pipe(ialu_reg);
8687 %}
8688 
8689 
8690 //----------Multiplication/Division Instructions-------------------------------
8691 // Integer Multiplication Instructions
8692 // Multiply Register
8693 
8694 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8695 %{
8696   match(Set dst (MulI dst src));
8697   effect(KILL cr);
8698 
8699   ins_cost(300);
8700   format %{ "imull   $dst, $src\t# int" %}
8701   opcode(0x0F, 0xAF);
8702   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8703   ins_pipe(ialu_reg_reg_alu0);
8704 %}
8705 
8706 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8707 %{
8708   match(Set dst (MulI src imm));
8709   effect(KILL cr);
8710 
8711   ins_cost(300);
8712   format %{ "imull   $dst, $src, $imm\t# int" %}
8713   opcode(0x69); /* 69 /r id */
8714   ins_encode(REX_reg_reg(dst, src),
8715              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8716   ins_pipe(ialu_reg_reg_alu0);
8717 %}
8718 
8719 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8720 %{
8721   match(Set dst (MulI dst (LoadI src)));
8722   effect(KILL cr);
8723 
8724   ins_cost(350);
8725   format %{ "imull   $dst, $src\t# int" %}
8726   opcode(0x0F, 0xAF);
8727   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8728   ins_pipe(ialu_reg_mem_alu0);
8729 %}
8730 
8731 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8732 %{
8733   match(Set dst (MulI (LoadI src) imm));
8734   effect(KILL cr);
8735 
8736   ins_cost(300);
8737   format %{ "imull   $dst, $src, $imm\t# int" %}
8738   opcode(0x69); /* 69 /r id */
8739   ins_encode(REX_reg_mem(dst, src),
8740              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8741   ins_pipe(ialu_reg_mem_alu0);
8742 %}
8743 
8744 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8745 %{
8746   match(Set dst (MulL dst src));
8747   effect(KILL cr);
8748 
8749   ins_cost(300);
8750   format %{ "imulq   $dst, $src\t# long" %}
8751   opcode(0x0F, 0xAF);
8752   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8753   ins_pipe(ialu_reg_reg_alu0);
8754 %}
8755 
8756 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8757 %{
8758   match(Set dst (MulL src imm));
8759   effect(KILL cr);
8760 
8761   ins_cost(300);
8762   format %{ "imulq   $dst, $src, $imm\t# long" %}
8763   opcode(0x69); /* 69 /r id */
8764   ins_encode(REX_reg_reg_wide(dst, src),
8765              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8766   ins_pipe(ialu_reg_reg_alu0);
8767 %}
8768 
8769 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8770 %{
8771   match(Set dst (MulL dst (LoadL src)));
8772   effect(KILL cr);
8773 
8774   ins_cost(350);
8775   format %{ "imulq   $dst, $src\t# long" %}
8776   opcode(0x0F, 0xAF);
8777   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8778   ins_pipe(ialu_reg_mem_alu0);
8779 %}
8780 
8781 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8782 %{
8783   match(Set dst (MulL (LoadL src) imm));
8784   effect(KILL cr);
8785 
8786   ins_cost(300);
8787   format %{ "imulq   $dst, $src, $imm\t# long" %}
8788   opcode(0x69); /* 69 /r id */
8789   ins_encode(REX_reg_mem_wide(dst, src),
8790              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8791   ins_pipe(ialu_reg_mem_alu0);
8792 %}
8793 
8794 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8795 %{
8796   match(Set dst (MulHiL src rax));
8797   effect(USE_KILL rax, KILL cr);
8798 
8799   ins_cost(300);
8800   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8801   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8802   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8803   ins_pipe(ialu_reg_reg_alu0);
8804 %}
8805 
8806 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8807                    rFlagsReg cr)
8808 %{
8809   match(Set rax (DivI rax div));
8810   effect(KILL rdx, KILL cr);
8811 
8812   ins_cost(30*100+10*100); // XXX
8813   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8814             "jne,s   normal\n\t"
8815             "xorl    rdx, rdx\n\t"
8816             "cmpl    $div, -1\n\t"
8817             "je,s    done\n"
8818     "normal: cdql\n\t"
8819             "idivl   $div\n"
8820     "done:"        %}
8821   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8822   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8823   ins_pipe(ialu_reg_reg_alu0);
8824 %}
8825 
8826 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8827                    rFlagsReg cr)
8828 %{
8829   match(Set rax (DivL rax div));
8830   effect(KILL rdx, KILL cr);
8831 
8832   ins_cost(30*100+10*100); // XXX
8833   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8834             "cmpq    rax, rdx\n\t"
8835             "jne,s   normal\n\t"
8836             "xorl    rdx, rdx\n\t"
8837             "cmpq    $div, -1\n\t"
8838             "je,s    done\n"
8839     "normal: cdqq\n\t"
8840             "idivq   $div\n"
8841     "done:"        %}
8842   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8843   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8844   ins_pipe(ialu_reg_reg_alu0);
8845 %}
8846 
8847 // Integer DIVMOD with Register, both quotient and mod results
8848 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8849                              rFlagsReg cr)
8850 %{
8851   match(DivModI rax div);
8852   effect(KILL cr);
8853 
8854   ins_cost(30*100+10*100); // XXX
8855   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8856             "jne,s   normal\n\t"
8857             "xorl    rdx, rdx\n\t"
8858             "cmpl    $div, -1\n\t"
8859             "je,s    done\n"
8860     "normal: cdql\n\t"
8861             "idivl   $div\n"
8862     "done:"        %}
8863   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8864   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8865   ins_pipe(pipe_slow);
8866 %}
8867 
8868 // Long DIVMOD with Register, both quotient and mod results
8869 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8870                              rFlagsReg cr)
8871 %{
8872   match(DivModL rax div);
8873   effect(KILL cr);
8874 
8875   ins_cost(30*100+10*100); // XXX
8876   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8877             "cmpq    rax, rdx\n\t"
8878             "jne,s   normal\n\t"
8879             "xorl    rdx, rdx\n\t"
8880             "cmpq    $div, -1\n\t"
8881             "je,s    done\n"
8882     "normal: cdqq\n\t"
8883             "idivq   $div\n"
8884     "done:"        %}
8885   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8886   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8887   ins_pipe(pipe_slow);
8888 %}
8889 
8890 //----------- DivL-By-Constant-Expansions--------------------------------------
8891 // DivI cases are handled by the compiler
8892 
8893 // Magic constant, reciprocal of 10
8894 instruct loadConL_0x6666666666666667(rRegL dst)
8895 %{
8896   effect(DEF dst);
8897 
8898   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8899   ins_encode(load_immL(dst, 0x6666666666666667));
8900   ins_pipe(ialu_reg);
8901 %}
8902 
8903 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8904 %{
8905   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8906 
8907   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8908   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8909   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8910   ins_pipe(ialu_reg_reg_alu0);
8911 %}
8912 
8913 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8914 %{
8915   effect(USE_DEF dst, KILL cr);
8916 
8917   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8918   opcode(0xC1, 0x7); /* C1 /7 ib */
8919   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8920   ins_pipe(ialu_reg);
8921 %}
8922 
8923 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8924 %{
8925   effect(USE_DEF dst, KILL cr);
8926 
8927   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8928   opcode(0xC1, 0x7); /* C1 /7 ib */
8929   ins_encode(reg_opc_imm_wide(dst, 0x2));
8930   ins_pipe(ialu_reg);
8931 %}
8932 
8933 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8934 %{
8935   match(Set dst (DivL src div));
8936 
8937   ins_cost((5+8)*100);
8938   expand %{
8939     rax_RegL rax;                     // Killed temp
8940     rFlagsReg cr;                     // Killed
8941     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8942     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8943     sarL_rReg_63(src, cr);            // sarq  src, 63
8944     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8945     subL_rReg(dst, src, cr);          // subl  rdx, src
8946   %}
8947 %}
8948 
8949 //-----------------------------------------------------------------------------
8950 
8951 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8952                    rFlagsReg cr)
8953 %{
8954   match(Set rdx (ModI rax div));
8955   effect(KILL rax, KILL cr);
8956 
8957   ins_cost(300); // XXX
8958   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8959             "jne,s   normal\n\t"
8960             "xorl    rdx, rdx\n\t"
8961             "cmpl    $div, -1\n\t"
8962             "je,s    done\n"
8963     "normal: cdql\n\t"
8964             "idivl   $div\n"
8965     "done:"        %}
8966   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8967   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8968   ins_pipe(ialu_reg_reg_alu0);
8969 %}
8970 
8971 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8972                    rFlagsReg cr)
8973 %{
8974   match(Set rdx (ModL rax div));
8975   effect(KILL rax, KILL cr);
8976 
8977   ins_cost(300); // XXX
8978   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8979             "cmpq    rax, rdx\n\t"
8980             "jne,s   normal\n\t"
8981             "xorl    rdx, rdx\n\t"
8982             "cmpq    $div, -1\n\t"
8983             "je,s    done\n"
8984     "normal: cdqq\n\t"
8985             "idivq   $div\n"
8986     "done:"        %}
8987   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8988   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8989   ins_pipe(ialu_reg_reg_alu0);
8990 %}
8991 
8992 // Integer Shift Instructions
8993 // Shift Left by one
8994 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8995 %{
8996   match(Set dst (LShiftI dst shift));
8997   effect(KILL cr);
8998 
8999   format %{ "sall    $dst, $shift" %}
9000   opcode(0xD1, 0x4); /* D1 /4 */
9001   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9002   ins_pipe(ialu_reg);
9003 %}
9004 
9005 // Shift Left by one
9006 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9007 %{
9008   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9009   effect(KILL cr);
9010 
9011   format %{ "sall    $dst, $shift\t" %}
9012   opcode(0xD1, 0x4); /* D1 /4 */
9013   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9014   ins_pipe(ialu_mem_imm);
9015 %}
9016 
9017 // Shift Left by 8-bit immediate
9018 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9019 %{
9020   match(Set dst (LShiftI dst shift));
9021   effect(KILL cr);
9022 
9023   format %{ "sall    $dst, $shift" %}
9024   opcode(0xC1, 0x4); /* C1 /4 ib */
9025   ins_encode(reg_opc_imm(dst, shift));
9026   ins_pipe(ialu_reg);
9027 %}
9028 
9029 // Shift Left by 8-bit immediate
9030 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9031 %{
9032   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9033   effect(KILL cr);
9034 
9035   format %{ "sall    $dst, $shift" %}
9036   opcode(0xC1, 0x4); /* C1 /4 ib */
9037   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9038   ins_pipe(ialu_mem_imm);
9039 %}
9040 
9041 // Shift Left by variable
9042 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9043 %{
9044   match(Set dst (LShiftI dst shift));
9045   effect(KILL cr);
9046 
9047   format %{ "sall    $dst, $shift" %}
9048   opcode(0xD3, 0x4); /* D3 /4 */
9049   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9050   ins_pipe(ialu_reg_reg);
9051 %}
9052 
9053 // Shift Left by variable
9054 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9055 %{
9056   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9057   effect(KILL cr);
9058 
9059   format %{ "sall    $dst, $shift" %}
9060   opcode(0xD3, 0x4); /* D3 /4 */
9061   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9062   ins_pipe(ialu_mem_reg);
9063 %}
9064 
9065 // Arithmetic shift right by one
9066 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9067 %{
9068   match(Set dst (RShiftI dst shift));
9069   effect(KILL cr);
9070 
9071   format %{ "sarl    $dst, $shift" %}
9072   opcode(0xD1, 0x7); /* D1 /7 */
9073   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9074   ins_pipe(ialu_reg);
9075 %}
9076 
9077 // Arithmetic shift right by one
9078 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9079 %{
9080   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9081   effect(KILL cr);
9082 
9083   format %{ "sarl    $dst, $shift" %}
9084   opcode(0xD1, 0x7); /* D1 /7 */
9085   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9086   ins_pipe(ialu_mem_imm);
9087 %}
9088 
9089 // Arithmetic Shift Right by 8-bit immediate
9090 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9091 %{
9092   match(Set dst (RShiftI dst shift));
9093   effect(KILL cr);
9094 
9095   format %{ "sarl    $dst, $shift" %}
9096   opcode(0xC1, 0x7); /* C1 /7 ib */
9097   ins_encode(reg_opc_imm(dst, shift));
9098   ins_pipe(ialu_mem_imm);
9099 %}
9100 
9101 // Arithmetic Shift Right by 8-bit immediate
9102 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9103 %{
9104   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9105   effect(KILL cr);
9106 
9107   format %{ "sarl    $dst, $shift" %}
9108   opcode(0xC1, 0x7); /* C1 /7 ib */
9109   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9110   ins_pipe(ialu_mem_imm);
9111 %}
9112 
9113 // Arithmetic Shift Right by variable
9114 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9115 %{
9116   match(Set dst (RShiftI dst shift));
9117   effect(KILL cr);
9118 
9119   format %{ "sarl    $dst, $shift" %}
9120   opcode(0xD3, 0x7); /* D3 /7 */
9121   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9122   ins_pipe(ialu_reg_reg);
9123 %}
9124 
9125 // Arithmetic Shift Right by variable
9126 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9127 %{
9128   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9129   effect(KILL cr);
9130 
9131   format %{ "sarl    $dst, $shift" %}
9132   opcode(0xD3, 0x7); /* D3 /7 */
9133   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9134   ins_pipe(ialu_mem_reg);
9135 %}
9136 
9137 // Logical shift right by one
9138 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9139 %{
9140   match(Set dst (URShiftI dst shift));
9141   effect(KILL cr);
9142 
9143   format %{ "shrl    $dst, $shift" %}
9144   opcode(0xD1, 0x5); /* D1 /5 */
9145   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9146   ins_pipe(ialu_reg);
9147 %}
9148 
9149 // Logical shift right by one
9150 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9151 %{
9152   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9153   effect(KILL cr);
9154 
9155   format %{ "shrl    $dst, $shift" %}
9156   opcode(0xD1, 0x5); /* D1 /5 */
9157   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9158   ins_pipe(ialu_mem_imm);
9159 %}
9160 
9161 // Logical Shift Right by 8-bit immediate
9162 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9163 %{
9164   match(Set dst (URShiftI dst shift));
9165   effect(KILL cr);
9166 
9167   format %{ "shrl    $dst, $shift" %}
9168   opcode(0xC1, 0x5); /* C1 /5 ib */
9169   ins_encode(reg_opc_imm(dst, shift));
9170   ins_pipe(ialu_reg);
9171 %}
9172 
9173 // Logical Shift Right by 8-bit immediate
9174 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9175 %{
9176   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9177   effect(KILL cr);
9178 
9179   format %{ "shrl    $dst, $shift" %}
9180   opcode(0xC1, 0x5); /* C1 /5 ib */
9181   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9182   ins_pipe(ialu_mem_imm);
9183 %}
9184 
9185 // Logical Shift Right by variable
9186 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9187 %{
9188   match(Set dst (URShiftI dst shift));
9189   effect(KILL cr);
9190 
9191   format %{ "shrl    $dst, $shift" %}
9192   opcode(0xD3, 0x5); /* D3 /5 */
9193   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9194   ins_pipe(ialu_reg_reg);
9195 %}
9196 
9197 // Logical Shift Right by variable
9198 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9199 %{
9200   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9201   effect(KILL cr);
9202 
9203   format %{ "shrl    $dst, $shift" %}
9204   opcode(0xD3, 0x5); /* D3 /5 */
9205   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9206   ins_pipe(ialu_mem_reg);
9207 %}
9208 
9209 // Long Shift Instructions
9210 // Shift Left by one
9211 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9212 %{
9213   match(Set dst (LShiftL dst shift));
9214   effect(KILL cr);
9215 
9216   format %{ "salq    $dst, $shift" %}
9217   opcode(0xD1, 0x4); /* D1 /4 */
9218   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9219   ins_pipe(ialu_reg);
9220 %}
9221 
9222 // Shift Left by one
9223 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9224 %{
9225   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9226   effect(KILL cr);
9227 
9228   format %{ "salq    $dst, $shift" %}
9229   opcode(0xD1, 0x4); /* D1 /4 */
9230   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9231   ins_pipe(ialu_mem_imm);
9232 %}
9233 
9234 // Shift Left by 8-bit immediate
9235 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9236 %{
9237   match(Set dst (LShiftL dst shift));
9238   effect(KILL cr);
9239 
9240   format %{ "salq    $dst, $shift" %}
9241   opcode(0xC1, 0x4); /* C1 /4 ib */
9242   ins_encode(reg_opc_imm_wide(dst, shift));
9243   ins_pipe(ialu_reg);
9244 %}
9245 
9246 // Shift Left by 8-bit immediate
9247 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9248 %{
9249   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9250   effect(KILL cr);
9251 
9252   format %{ "salq    $dst, $shift" %}
9253   opcode(0xC1, 0x4); /* C1 /4 ib */
9254   ins_encode(REX_mem_wide(dst), OpcP,
9255              RM_opc_mem(secondary, dst), Con8or32(shift));
9256   ins_pipe(ialu_mem_imm);
9257 %}
9258 
9259 // Shift Left by variable
9260 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9261 %{
9262   match(Set dst (LShiftL dst shift));
9263   effect(KILL cr);
9264 
9265   format %{ "salq    $dst, $shift" %}
9266   opcode(0xD3, 0x4); /* D3 /4 */
9267   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9268   ins_pipe(ialu_reg_reg);
9269 %}
9270 
9271 // Shift Left by variable
9272 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9275   effect(KILL cr);
9276 
9277   format %{ "salq    $dst, $shift" %}
9278   opcode(0xD3, 0x4); /* D3 /4 */
9279   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9280   ins_pipe(ialu_mem_reg);
9281 %}
9282 
9283 // Arithmetic shift right by one
9284 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9285 %{
9286   match(Set dst (RShiftL dst shift));
9287   effect(KILL cr);
9288 
9289   format %{ "sarq    $dst, $shift" %}
9290   opcode(0xD1, 0x7); /* D1 /7 */
9291   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9292   ins_pipe(ialu_reg);
9293 %}
9294 
9295 // Arithmetic shift right by one
9296 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9297 %{
9298   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9299   effect(KILL cr);
9300 
9301   format %{ "sarq    $dst, $shift" %}
9302   opcode(0xD1, 0x7); /* D1 /7 */
9303   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9304   ins_pipe(ialu_mem_imm);
9305 %}
9306 
9307 // Arithmetic Shift Right by 8-bit immediate
9308 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9309 %{
9310   match(Set dst (RShiftL dst shift));
9311   effect(KILL cr);
9312 
9313   format %{ "sarq    $dst, $shift" %}
9314   opcode(0xC1, 0x7); /* C1 /7 ib */
9315   ins_encode(reg_opc_imm_wide(dst, shift));
9316   ins_pipe(ialu_mem_imm);
9317 %}
9318 
9319 // Arithmetic Shift Right by 8-bit immediate
9320 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9321 %{
9322   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9323   effect(KILL cr);
9324 
9325   format %{ "sarq    $dst, $shift" %}
9326   opcode(0xC1, 0x7); /* C1 /7 ib */
9327   ins_encode(REX_mem_wide(dst), OpcP,
9328              RM_opc_mem(secondary, dst), Con8or32(shift));
9329   ins_pipe(ialu_mem_imm);
9330 %}
9331 
9332 // Arithmetic Shift Right by variable
9333 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9334 %{
9335   match(Set dst (RShiftL dst shift));
9336   effect(KILL cr);
9337 
9338   format %{ "sarq    $dst, $shift" %}
9339   opcode(0xD3, 0x7); /* D3 /7 */
9340   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9341   ins_pipe(ialu_reg_reg);
9342 %}
9343 
9344 // Arithmetic Shift Right by variable
9345 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9346 %{
9347   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9348   effect(KILL cr);
9349 
9350   format %{ "sarq    $dst, $shift" %}
9351   opcode(0xD3, 0x7); /* D3 /7 */
9352   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9353   ins_pipe(ialu_mem_reg);
9354 %}
9355 
9356 // Logical shift right by one
9357 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9358 %{
9359   match(Set dst (URShiftL dst shift));
9360   effect(KILL cr);
9361 
9362   format %{ "shrq    $dst, $shift" %}
9363   opcode(0xD1, 0x5); /* D1 /5 */
9364   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9365   ins_pipe(ialu_reg);
9366 %}
9367 
9368 // Logical shift right by one
9369 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9370 %{
9371   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9372   effect(KILL cr);
9373 
9374   format %{ "shrq    $dst, $shift" %}
9375   opcode(0xD1, 0x5); /* D1 /5 */
9376   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9377   ins_pipe(ialu_mem_imm);
9378 %}
9379 
9380 // Logical Shift Right by 8-bit immediate
9381 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9382 %{
9383   match(Set dst (URShiftL dst shift));
9384   effect(KILL cr);
9385 
9386   format %{ "shrq    $dst, $shift" %}
9387   opcode(0xC1, 0x5); /* C1 /5 ib */
9388   ins_encode(reg_opc_imm_wide(dst, shift));
9389   ins_pipe(ialu_reg);
9390 %}
9391 
9392 
9393 // Logical Shift Right by 8-bit immediate
9394 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9395 %{
9396   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9397   effect(KILL cr);
9398 
9399   format %{ "shrq    $dst, $shift" %}
9400   opcode(0xC1, 0x5); /* C1 /5 ib */
9401   ins_encode(REX_mem_wide(dst), OpcP,
9402              RM_opc_mem(secondary, dst), Con8or32(shift));
9403   ins_pipe(ialu_mem_imm);
9404 %}
9405 
9406 // Logical Shift Right by variable
9407 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9408 %{
9409   match(Set dst (URShiftL dst shift));
9410   effect(KILL cr);
9411 
9412   format %{ "shrq    $dst, $shift" %}
9413   opcode(0xD3, 0x5); /* D3 /5 */
9414   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9415   ins_pipe(ialu_reg_reg);
9416 %}
9417 
9418 // Logical Shift Right by variable
9419 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9420 %{
9421   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9422   effect(KILL cr);
9423 
9424   format %{ "shrq    $dst, $shift" %}
9425   opcode(0xD3, 0x5); /* D3 /5 */
9426   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9427   ins_pipe(ialu_mem_reg);
9428 %}
9429 
9430 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9431 // This idiom is used by the compiler for the i2b bytecode.
9432 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9433 %{
9434   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9435 
9436   format %{ "movsbl  $dst, $src\t# i2b" %}
9437   opcode(0x0F, 0xBE);
9438   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9439   ins_pipe(ialu_reg_reg);
9440 %}
9441 
9442 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9443 // This idiom is used by the compiler the i2s bytecode.
9444 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9445 %{
9446   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9447 
9448   format %{ "movswl  $dst, $src\t# i2s" %}
9449   opcode(0x0F, 0xBF);
9450   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9451   ins_pipe(ialu_reg_reg);
9452 %}
9453 
9454 // ROL/ROR instructions
9455 
9456 // ROL expand
9457 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9458   effect(KILL cr, USE_DEF dst);
9459 
9460   format %{ "roll    $dst" %}
9461   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9462   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9463   ins_pipe(ialu_reg);
9464 %}
9465 
9466 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9467   effect(USE_DEF dst, USE shift, KILL cr);
9468 
9469   format %{ "roll    $dst, $shift" %}
9470   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9471   ins_encode( reg_opc_imm(dst, shift) );
9472   ins_pipe(ialu_reg);
9473 %}
9474 
9475 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9476 %{
9477   effect(USE_DEF dst, USE shift, KILL cr);
9478 
9479   format %{ "roll    $dst, $shift" %}
9480   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9481   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9482   ins_pipe(ialu_reg_reg);
9483 %}
9484 // end of ROL expand
9485 
9486 // Rotate Left by one
9487 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9488 %{
9489   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9490 
9491   expand %{
9492     rolI_rReg_imm1(dst, cr);
9493   %}
9494 %}
9495 
9496 // Rotate Left by 8-bit immediate
9497 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9498 %{
9499   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9500   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9501 
9502   expand %{
9503     rolI_rReg_imm8(dst, lshift, cr);
9504   %}
9505 %}
9506 
9507 // Rotate Left by variable
9508 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9509 %{
9510   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9511 
9512   expand %{
9513     rolI_rReg_CL(dst, shift, cr);
9514   %}
9515 %}
9516 
9517 // Rotate Left by variable
9518 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9519 %{
9520   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9521 
9522   expand %{
9523     rolI_rReg_CL(dst, shift, cr);
9524   %}
9525 %}
9526 
9527 // ROR expand
9528 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9529 %{
9530   effect(USE_DEF dst, KILL cr);
9531 
9532   format %{ "rorl    $dst" %}
9533   opcode(0xD1, 0x1); /* D1 /1 */
9534   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9535   ins_pipe(ialu_reg);
9536 %}
9537 
9538 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9539 %{
9540   effect(USE_DEF dst, USE shift, KILL cr);
9541 
9542   format %{ "rorl    $dst, $shift" %}
9543   opcode(0xC1, 0x1); /* C1 /1 ib */
9544   ins_encode(reg_opc_imm(dst, shift));
9545   ins_pipe(ialu_reg);
9546 %}
9547 
9548 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9549 %{
9550   effect(USE_DEF dst, USE shift, KILL cr);
9551 
9552   format %{ "rorl    $dst, $shift" %}
9553   opcode(0xD3, 0x1); /* D3 /1 */
9554   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9555   ins_pipe(ialu_reg_reg);
9556 %}
9557 // end of ROR expand
9558 
9559 // Rotate Right by one
9560 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9561 %{
9562   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9563 
9564   expand %{
9565     rorI_rReg_imm1(dst, cr);
9566   %}
9567 %}
9568 
9569 // Rotate Right by 8-bit immediate
9570 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9571 %{
9572   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9573   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9574 
9575   expand %{
9576     rorI_rReg_imm8(dst, rshift, cr);
9577   %}
9578 %}
9579 
9580 // Rotate Right by variable
9581 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9582 %{
9583   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9584 
9585   expand %{
9586     rorI_rReg_CL(dst, shift, cr);
9587   %}
9588 %}
9589 
9590 // Rotate Right by variable
9591 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9592 %{
9593   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9594 
9595   expand %{
9596     rorI_rReg_CL(dst, shift, cr);
9597   %}
9598 %}
9599 
9600 // for long rotate
9601 // ROL expand
9602 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9603   effect(USE_DEF dst, KILL cr);
9604 
9605   format %{ "rolq    $dst" %}
9606   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9607   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9608   ins_pipe(ialu_reg);
9609 %}
9610 
9611 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9612   effect(USE_DEF dst, USE shift, KILL cr);
9613 
9614   format %{ "rolq    $dst, $shift" %}
9615   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9616   ins_encode( reg_opc_imm_wide(dst, shift) );
9617   ins_pipe(ialu_reg);
9618 %}
9619 
9620 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9621 %{
9622   effect(USE_DEF dst, USE shift, KILL cr);
9623 
9624   format %{ "rolq    $dst, $shift" %}
9625   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9626   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9627   ins_pipe(ialu_reg_reg);
9628 %}
9629 // end of ROL expand
9630 
9631 // Rotate Left by one
9632 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9633 %{
9634   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9635 
9636   expand %{
9637     rolL_rReg_imm1(dst, cr);
9638   %}
9639 %}
9640 
9641 // Rotate Left by 8-bit immediate
9642 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9643 %{
9644   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9645   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9646 
9647   expand %{
9648     rolL_rReg_imm8(dst, lshift, cr);
9649   %}
9650 %}
9651 
9652 // Rotate Left by variable
9653 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9654 %{
9655   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9656 
9657   expand %{
9658     rolL_rReg_CL(dst, shift, cr);
9659   %}
9660 %}
9661 
9662 // Rotate Left by variable
9663 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9664 %{
9665   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9666 
9667   expand %{
9668     rolL_rReg_CL(dst, shift, cr);
9669   %}
9670 %}
9671 
9672 // ROR expand
9673 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9674 %{
9675   effect(USE_DEF dst, KILL cr);
9676 
9677   format %{ "rorq    $dst" %}
9678   opcode(0xD1, 0x1); /* D1 /1 */
9679   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9680   ins_pipe(ialu_reg);
9681 %}
9682 
9683 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9684 %{
9685   effect(USE_DEF dst, USE shift, KILL cr);
9686 
9687   format %{ "rorq    $dst, $shift" %}
9688   opcode(0xC1, 0x1); /* C1 /1 ib */
9689   ins_encode(reg_opc_imm_wide(dst, shift));
9690   ins_pipe(ialu_reg);
9691 %}
9692 
9693 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9694 %{
9695   effect(USE_DEF dst, USE shift, KILL cr);
9696 
9697   format %{ "rorq    $dst, $shift" %}
9698   opcode(0xD3, 0x1); /* D3 /1 */
9699   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9700   ins_pipe(ialu_reg_reg);
9701 %}
9702 // end of ROR expand
9703 
9704 // Rotate Right by one
9705 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9706 %{
9707   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9708 
9709   expand %{
9710     rorL_rReg_imm1(dst, cr);
9711   %}
9712 %}
9713 
9714 // Rotate Right by 8-bit immediate
9715 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9716 %{
9717   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9718   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9719 
9720   expand %{
9721     rorL_rReg_imm8(dst, rshift, cr);
9722   %}
9723 %}
9724 
9725 // Rotate Right by variable
9726 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9727 %{
9728   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9729 
9730   expand %{
9731     rorL_rReg_CL(dst, shift, cr);
9732   %}
9733 %}
9734 
9735 // Rotate Right by variable
9736 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9737 %{
9738   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9739 
9740   expand %{
9741     rorL_rReg_CL(dst, shift, cr);
9742   %}
9743 %}
9744 
9745 // Logical Instructions
9746 
9747 // Integer Logical Instructions
9748 
9749 // And Instructions
9750 // And Register with Register
9751 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9752 %{
9753   match(Set dst (AndI dst src));
9754   effect(KILL cr);
9755 
9756   format %{ "andl    $dst, $src\t# int" %}
9757   opcode(0x23);
9758   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9759   ins_pipe(ialu_reg_reg);
9760 %}
9761 
9762 // And Register with Immediate 255
9763 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9764 %{
9765   match(Set dst (AndI dst src));
9766 
9767   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9768   opcode(0x0F, 0xB6);
9769   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9770   ins_pipe(ialu_reg);
9771 %}
9772 
9773 // And Register with Immediate 255 and promote to long
9774 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9775 %{
9776   match(Set dst (ConvI2L (AndI src mask)));
9777 
9778   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9779   opcode(0x0F, 0xB6);
9780   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9781   ins_pipe(ialu_reg);
9782 %}
9783 
9784 // And Register with Immediate 65535
9785 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9786 %{
9787   match(Set dst (AndI dst src));
9788 
9789   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9790   opcode(0x0F, 0xB7);
9791   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9792   ins_pipe(ialu_reg);
9793 %}
9794 
9795 // And Register with Immediate 65535 and promote to long
9796 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9797 %{
9798   match(Set dst (ConvI2L (AndI src mask)));
9799 
9800   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9801   opcode(0x0F, 0xB7);
9802   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9803   ins_pipe(ialu_reg);
9804 %}
9805 
9806 // And Register with Immediate
9807 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9808 %{
9809   match(Set dst (AndI dst src));
9810   effect(KILL cr);
9811 
9812   format %{ "andl    $dst, $src\t# int" %}
9813   opcode(0x81, 0x04); /* Opcode 81 /4 */
9814   ins_encode(OpcSErm(dst, src), Con8or32(src));
9815   ins_pipe(ialu_reg);
9816 %}
9817 
9818 // And Register with Memory
9819 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9820 %{
9821   match(Set dst (AndI dst (LoadI src)));
9822   effect(KILL cr);
9823 
9824   ins_cost(125);
9825   format %{ "andl    $dst, $src\t# int" %}
9826   opcode(0x23);
9827   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9828   ins_pipe(ialu_reg_mem);
9829 %}
9830 
9831 // And Memory with Register
9832 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9833 %{
9834   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9835   effect(KILL cr);
9836 
9837   ins_cost(150);
9838   format %{ "andl    $dst, $src\t# int" %}
9839   opcode(0x21); /* Opcode 21 /r */
9840   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9841   ins_pipe(ialu_mem_reg);
9842 %}
9843 
9844 // And Memory with Immediate
9845 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9846 %{
9847   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9848   effect(KILL cr);
9849 
9850   ins_cost(125);
9851   format %{ "andl    $dst, $src\t# int" %}
9852   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9853   ins_encode(REX_mem(dst), OpcSE(src),
9854              RM_opc_mem(secondary, dst), Con8or32(src));
9855   ins_pipe(ialu_mem_imm);
9856 %}
9857 
9858 // Or Instructions
9859 // Or Register with Register
9860 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9861 %{
9862   match(Set dst (OrI dst src));
9863   effect(KILL cr);
9864 
9865   format %{ "orl     $dst, $src\t# int" %}
9866   opcode(0x0B);
9867   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9868   ins_pipe(ialu_reg_reg);
9869 %}
9870 
9871 // Or Register with Immediate
9872 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9873 %{
9874   match(Set dst (OrI dst src));
9875   effect(KILL cr);
9876 
9877   format %{ "orl     $dst, $src\t# int" %}
9878   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9879   ins_encode(OpcSErm(dst, src), Con8or32(src));
9880   ins_pipe(ialu_reg);
9881 %}
9882 
9883 // Or Register with Memory
9884 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9885 %{
9886   match(Set dst (OrI dst (LoadI src)));
9887   effect(KILL cr);
9888 
9889   ins_cost(125);
9890   format %{ "orl     $dst, $src\t# int" %}
9891   opcode(0x0B);
9892   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9893   ins_pipe(ialu_reg_mem);
9894 %}
9895 
9896 // Or Memory with Register
9897 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9898 %{
9899   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9900   effect(KILL cr);
9901 
9902   ins_cost(150);
9903   format %{ "orl     $dst, $src\t# int" %}
9904   opcode(0x09); /* Opcode 09 /r */
9905   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9906   ins_pipe(ialu_mem_reg);
9907 %}
9908 
9909 // Or Memory with Immediate
9910 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9911 %{
9912   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9913   effect(KILL cr);
9914 
9915   ins_cost(125);
9916   format %{ "orl     $dst, $src\t# int" %}
9917   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9918   ins_encode(REX_mem(dst), OpcSE(src),
9919              RM_opc_mem(secondary, dst), Con8or32(src));
9920   ins_pipe(ialu_mem_imm);
9921 %}
9922 
9923 // Xor Instructions
9924 // Xor Register with Register
9925 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9926 %{
9927   match(Set dst (XorI dst src));
9928   effect(KILL cr);
9929 
9930   format %{ "xorl    $dst, $src\t# int" %}
9931   opcode(0x33);
9932   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9933   ins_pipe(ialu_reg_reg);
9934 %}
9935 
9936 // Xor Register with Immediate -1
9937 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9938   match(Set dst (XorI dst imm));  
9939 
9940   format %{ "not    $dst" %}  
9941   ins_encode %{
9942      __ notl($dst$$Register);
9943   %}
9944   ins_pipe(ialu_reg);
9945 %}
9946 
9947 // Xor Register with Immediate
9948 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9949 %{
9950   match(Set dst (XorI dst src));
9951   effect(KILL cr);
9952 
9953   format %{ "xorl    $dst, $src\t# int" %}
9954   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9955   ins_encode(OpcSErm(dst, src), Con8or32(src));
9956   ins_pipe(ialu_reg);
9957 %}
9958 
9959 // Xor Register with Memory
9960 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9961 %{
9962   match(Set dst (XorI dst (LoadI src)));
9963   effect(KILL cr);
9964 
9965   ins_cost(125);
9966   format %{ "xorl    $dst, $src\t# int" %}
9967   opcode(0x33);
9968   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9969   ins_pipe(ialu_reg_mem);
9970 %}
9971 
9972 // Xor Memory with Register
9973 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9974 %{
9975   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9976   effect(KILL cr);
9977 
9978   ins_cost(150);
9979   format %{ "xorl    $dst, $src\t# int" %}
9980   opcode(0x31); /* Opcode 31 /r */
9981   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9982   ins_pipe(ialu_mem_reg);
9983 %}
9984 
9985 // Xor Memory with Immediate
9986 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9987 %{
9988   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9989   effect(KILL cr);
9990 
9991   ins_cost(125);
9992   format %{ "xorl    $dst, $src\t# int" %}
9993   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9994   ins_encode(REX_mem(dst), OpcSE(src),
9995              RM_opc_mem(secondary, dst), Con8or32(src));
9996   ins_pipe(ialu_mem_imm);
9997 %}
9998 
9999 
10000 // Long Logical Instructions
10001 
10002 // And Instructions
10003 // And Register with Register
10004 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10005 %{
10006   match(Set dst (AndL dst src));
10007   effect(KILL cr);
10008 
10009   format %{ "andq    $dst, $src\t# long" %}
10010   opcode(0x23);
10011   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10012   ins_pipe(ialu_reg_reg);
10013 %}
10014 
10015 // And Register with Immediate 255
10016 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10017 %{
10018   match(Set dst (AndL dst src));
10019 
10020   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10021   opcode(0x0F, 0xB6);
10022   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10023   ins_pipe(ialu_reg);
10024 %}
10025 
10026 // And Register with Immediate 65535
10027 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10028 %{
10029   match(Set dst (AndL dst src));
10030 
10031   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10032   opcode(0x0F, 0xB7);
10033   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10034   ins_pipe(ialu_reg);
10035 %}
10036 
10037 // And Register with Immediate
10038 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10039 %{
10040   match(Set dst (AndL dst src));
10041   effect(KILL cr);
10042 
10043   format %{ "andq    $dst, $src\t# long" %}
10044   opcode(0x81, 0x04); /* Opcode 81 /4 */
10045   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10046   ins_pipe(ialu_reg);
10047 %}
10048 
10049 // And Register with Memory
10050 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10051 %{
10052   match(Set dst (AndL dst (LoadL src)));
10053   effect(KILL cr);
10054 
10055   ins_cost(125);
10056   format %{ "andq    $dst, $src\t# long" %}
10057   opcode(0x23);
10058   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10059   ins_pipe(ialu_reg_mem);
10060 %}
10061 
10062 // And Memory with Register
10063 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10064 %{
10065   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10066   effect(KILL cr);
10067 
10068   ins_cost(150);
10069   format %{ "andq    $dst, $src\t# long" %}
10070   opcode(0x21); /* Opcode 21 /r */
10071   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10072   ins_pipe(ialu_mem_reg);
10073 %}
10074 
10075 // And Memory with Immediate
10076 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10077 %{
10078   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10079   effect(KILL cr);
10080 
10081   ins_cost(125);
10082   format %{ "andq    $dst, $src\t# long" %}
10083   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10084   ins_encode(REX_mem_wide(dst), OpcSE(src),
10085              RM_opc_mem(secondary, dst), Con8or32(src));
10086   ins_pipe(ialu_mem_imm);
10087 %}
10088 
10089 // Or Instructions
10090 // Or Register with Register
10091 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10092 %{
10093   match(Set dst (OrL dst src));
10094   effect(KILL cr);
10095 
10096   format %{ "orq     $dst, $src\t# long" %}
10097   opcode(0x0B);
10098   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10099   ins_pipe(ialu_reg_reg);
10100 %}
10101 
10102 // Use any_RegP to match R15 (TLS register) without spilling.
10103 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10104   match(Set dst (OrL dst (CastP2X src)));
10105   effect(KILL cr);
10106 
10107   format %{ "orq     $dst, $src\t# long" %}
10108   opcode(0x0B);
10109   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10110   ins_pipe(ialu_reg_reg);
10111 %}
10112 
10113 
10114 // Or Register with Immediate
10115 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10116 %{
10117   match(Set dst (OrL dst src));
10118   effect(KILL cr);
10119 
10120   format %{ "orq     $dst, $src\t# long" %}
10121   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10122   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10123   ins_pipe(ialu_reg);
10124 %}
10125 
10126 // Or Register with Memory
10127 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10128 %{
10129   match(Set dst (OrL dst (LoadL src)));
10130   effect(KILL cr);
10131 
10132   ins_cost(125);
10133   format %{ "orq     $dst, $src\t# long" %}
10134   opcode(0x0B);
10135   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10136   ins_pipe(ialu_reg_mem);
10137 %}
10138 
10139 // Or Memory with Register
10140 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10141 %{
10142   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10143   effect(KILL cr);
10144 
10145   ins_cost(150);
10146   format %{ "orq     $dst, $src\t# long" %}
10147   opcode(0x09); /* Opcode 09 /r */
10148   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10149   ins_pipe(ialu_mem_reg);
10150 %}
10151 
10152 // Or Memory with Immediate
10153 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10154 %{
10155   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10156   effect(KILL cr);
10157 
10158   ins_cost(125);
10159   format %{ "orq     $dst, $src\t# long" %}
10160   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10161   ins_encode(REX_mem_wide(dst), OpcSE(src),
10162              RM_opc_mem(secondary, dst), Con8or32(src));
10163   ins_pipe(ialu_mem_imm);
10164 %}
10165 
10166 // Xor Instructions
10167 // Xor Register with Register
10168 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10169 %{
10170   match(Set dst (XorL dst src));
10171   effect(KILL cr);
10172 
10173   format %{ "xorq    $dst, $src\t# long" %}
10174   opcode(0x33);
10175   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10176   ins_pipe(ialu_reg_reg);
10177 %}
10178 
10179 // Xor Register with Immediate -1
10180 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10181   match(Set dst (XorL dst imm));  
10182 
10183   format %{ "notq   $dst" %}  
10184   ins_encode %{
10185      __ notq($dst$$Register);
10186   %}
10187   ins_pipe(ialu_reg);
10188 %}
10189 
10190 // Xor Register with Immediate
10191 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10192 %{
10193   match(Set dst (XorL dst src));
10194   effect(KILL cr);
10195 
10196   format %{ "xorq    $dst, $src\t# long" %}
10197   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10198   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10199   ins_pipe(ialu_reg);
10200 %}
10201 
10202 // Xor Register with Memory
10203 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10204 %{
10205   match(Set dst (XorL dst (LoadL src)));
10206   effect(KILL cr);
10207 
10208   ins_cost(125);
10209   format %{ "xorq    $dst, $src\t# long" %}
10210   opcode(0x33);
10211   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10212   ins_pipe(ialu_reg_mem);
10213 %}
10214 
10215 // Xor Memory with Register
10216 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10217 %{
10218   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10219   effect(KILL cr);
10220 
10221   ins_cost(150);
10222   format %{ "xorq    $dst, $src\t# long" %}
10223   opcode(0x31); /* Opcode 31 /r */
10224   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10225   ins_pipe(ialu_mem_reg);
10226 %}
10227 
10228 // Xor Memory with Immediate
10229 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10230 %{
10231   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10232   effect(KILL cr);
10233 
10234   ins_cost(125);
10235   format %{ "xorq    $dst, $src\t# long" %}
10236   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10237   ins_encode(REX_mem_wide(dst), OpcSE(src),
10238              RM_opc_mem(secondary, dst), Con8or32(src));
10239   ins_pipe(ialu_mem_imm);
10240 %}
10241 
10242 // Convert Int to Boolean
10243 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10244 %{
10245   match(Set dst (Conv2B src));
10246   effect(KILL cr);
10247 
10248   format %{ "testl   $src, $src\t# ci2b\n\t"
10249             "setnz   $dst\n\t"
10250             "movzbl  $dst, $dst" %}
10251   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10252              setNZ_reg(dst),
10253              REX_reg_breg(dst, dst), // movzbl
10254              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10255   ins_pipe(pipe_slow); // XXX
10256 %}
10257 
10258 // Convert Pointer to Boolean
10259 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10260 %{
10261   match(Set dst (Conv2B src));
10262   effect(KILL cr);
10263 
10264   format %{ "testq   $src, $src\t# cp2b\n\t"
10265             "setnz   $dst\n\t"
10266             "movzbl  $dst, $dst" %}
10267   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10268              setNZ_reg(dst),
10269              REX_reg_breg(dst, dst), // movzbl
10270              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10271   ins_pipe(pipe_slow); // XXX
10272 %}
10273 
10274 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10275 %{
10276   match(Set dst (CmpLTMask p q));
10277   effect(KILL cr);
10278 
10279   ins_cost(400); // XXX
10280   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10281             "setlt   $dst\n\t"
10282             "movzbl  $dst, $dst\n\t"
10283             "negl    $dst" %}
10284   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10285              setLT_reg(dst),
10286              REX_reg_breg(dst, dst), // movzbl
10287              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10288              neg_reg(dst));
10289   ins_pipe(pipe_slow);
10290 %}
10291 
10292 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10293 %{
10294   match(Set dst (CmpLTMask dst zero));
10295   effect(KILL cr);
10296 
10297   ins_cost(100); // XXX
10298   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10299   opcode(0xC1, 0x7);  /* C1 /7 ib */
10300   ins_encode(reg_opc_imm(dst, 0x1F));
10301   ins_pipe(ialu_reg);
10302 %}
10303 
10304 
10305 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10306                          rRegI tmp,
10307                          rFlagsReg cr)
10308 %{
10309   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10310   effect(TEMP tmp, KILL cr);
10311 
10312   ins_cost(400); // XXX
10313   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10314             "sbbl    $tmp, $tmp\n\t"
10315             "andl    $tmp, $y\n\t"
10316             "addl    $p, $tmp" %}
10317   ins_encode(enc_cmpLTP(p, q, y, tmp));
10318   ins_pipe(pipe_cmplt);
10319 %}
10320 
10321 /* If I enable this, I encourage spilling in the inner loop of compress.
10322 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10323 %{
10324   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10325   effect( TEMP tmp, KILL cr );
10326   ins_cost(400);
10327 
10328   format %{ "SUB    $p,$q\n\t"
10329             "SBB    RCX,RCX\n\t"
10330             "AND    RCX,$y\n\t"
10331             "ADD    $p,RCX" %}
10332   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10333 %}
10334 */
10335 
10336 //---------- FP Instructions------------------------------------------------
10337 
10338 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10339 %{
10340   match(Set cr (CmpF src1 src2));
10341 
10342   ins_cost(145);
10343   format %{ "ucomiss $src1, $src2\n\t"
10344             "jnp,s   exit\n\t"
10345             "pushfq\t# saw NaN, set CF\n\t"
10346             "andq    [rsp], #0xffffff2b\n\t"
10347             "popfq\n"
10348     "exit:   nop\t# avoid branch to branch" %}
10349   opcode(0x0F, 0x2E);
10350   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10351              cmpfp_fixup);
10352   ins_pipe(pipe_slow);
10353 %}
10354 
10355 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10356   match(Set cr (CmpF src1 src2));
10357 
10358   ins_cost(145);
10359   format %{ "ucomiss $src1, $src2" %}
10360   ins_encode %{
10361     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10362   %}
10363   ins_pipe(pipe_slow);
10364 %}
10365 
10366 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10367 %{
10368   match(Set cr (CmpF src1 (LoadF src2)));
10369 
10370   ins_cost(145);
10371   format %{ "ucomiss $src1, $src2\n\t"
10372             "jnp,s   exit\n\t"
10373             "pushfq\t# saw NaN, set CF\n\t"
10374             "andq    [rsp], #0xffffff2b\n\t"
10375             "popfq\n"
10376     "exit:   nop\t# avoid branch to branch" %}
10377   opcode(0x0F, 0x2E);
10378   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10379              cmpfp_fixup);
10380   ins_pipe(pipe_slow);
10381 %}
10382 
10383 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10384   match(Set cr (CmpF src1 (LoadF src2)));
10385 
10386   ins_cost(100);
10387   format %{ "ucomiss $src1, $src2" %}
10388   opcode(0x0F, 0x2E);
10389   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10390   ins_pipe(pipe_slow);
10391 %}
10392 
10393 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10394 %{
10395   match(Set cr (CmpF src1 src2));
10396 
10397   ins_cost(145);
10398   format %{ "ucomiss $src1, $src2\n\t"
10399             "jnp,s   exit\n\t"
10400             "pushfq\t# saw NaN, set CF\n\t"
10401             "andq    [rsp], #0xffffff2b\n\t"
10402             "popfq\n"
10403     "exit:   nop\t# avoid branch to branch" %}
10404   opcode(0x0F, 0x2E);
10405   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10406              cmpfp_fixup);
10407   ins_pipe(pipe_slow);
10408 %}
10409 
10410 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10411   match(Set cr (CmpF src1 src2));
10412 
10413   ins_cost(100);
10414   format %{ "ucomiss $src1, $src2" %}
10415   opcode(0x0F, 0x2E);
10416   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10417   ins_pipe(pipe_slow);
10418 %}
10419 
10420 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10421 %{
10422   match(Set cr (CmpD src1 src2));
10423 
10424   ins_cost(145);
10425   format %{ "ucomisd $src1, $src2\n\t"
10426             "jnp,s   exit\n\t"
10427             "pushfq\t# saw NaN, set CF\n\t"
10428             "andq    [rsp], #0xffffff2b\n\t"
10429             "popfq\n"
10430     "exit:   nop\t# avoid branch to branch" %}
10431   opcode(0x66, 0x0F, 0x2E);
10432   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10433              cmpfp_fixup);
10434   ins_pipe(pipe_slow);
10435 %}
10436 
10437 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10438   match(Set cr (CmpD src1 src2));
10439 
10440   ins_cost(100);
10441   format %{ "ucomisd $src1, $src2 test" %}
10442   ins_encode %{
10443     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10444   %}
10445   ins_pipe(pipe_slow);
10446 %}
10447 
10448 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10449 %{
10450   match(Set cr (CmpD src1 (LoadD src2)));
10451 
10452   ins_cost(145);
10453   format %{ "ucomisd $src1, $src2\n\t"
10454             "jnp,s   exit\n\t"
10455             "pushfq\t# saw NaN, set CF\n\t"
10456             "andq    [rsp], #0xffffff2b\n\t"
10457             "popfq\n"
10458     "exit:   nop\t# avoid branch to branch" %}
10459   opcode(0x66, 0x0F, 0x2E);
10460   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10461              cmpfp_fixup);
10462   ins_pipe(pipe_slow);
10463 %}
10464 
10465 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10466   match(Set cr (CmpD src1 (LoadD src2)));
10467 
10468   ins_cost(100);
10469   format %{ "ucomisd $src1, $src2" %}
10470   opcode(0x66, 0x0F, 0x2E);
10471   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10472   ins_pipe(pipe_slow);
10473 %}
10474 
10475 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10476 %{
10477   match(Set cr (CmpD src1 src2));
10478 
10479   ins_cost(145);
10480   format %{ "ucomisd $src1, [$src2]\n\t"
10481             "jnp,s   exit\n\t"
10482             "pushfq\t# saw NaN, set CF\n\t"
10483             "andq    [rsp], #0xffffff2b\n\t"
10484             "popfq\n"
10485     "exit:   nop\t# avoid branch to branch" %}
10486   opcode(0x66, 0x0F, 0x2E);
10487   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10488              cmpfp_fixup);
10489   ins_pipe(pipe_slow);
10490 %}
10491 
10492 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10493   match(Set cr (CmpD src1 src2));
10494 
10495   ins_cost(100);
10496   format %{ "ucomisd $src1, [$src2]" %}
10497   opcode(0x66, 0x0F, 0x2E);
10498   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10499   ins_pipe(pipe_slow);
10500 %}
10501 
10502 // Compare into -1,0,1
10503 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10504 %{
10505   match(Set dst (CmpF3 src1 src2));
10506   effect(KILL cr);
10507 
10508   ins_cost(275);
10509   format %{ "ucomiss $src1, $src2\n\t"
10510             "movl    $dst, #-1\n\t"
10511             "jp,s    done\n\t"
10512             "jb,s    done\n\t"
10513             "setne   $dst\n\t"
10514             "movzbl  $dst, $dst\n"
10515     "done:" %}
10516 
10517   opcode(0x0F, 0x2E);
10518   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10519              cmpfp3(dst));
10520   ins_pipe(pipe_slow);
10521 %}
10522 
10523 // Compare into -1,0,1
10524 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10525 %{
10526   match(Set dst (CmpF3 src1 (LoadF src2)));
10527   effect(KILL cr);
10528 
10529   ins_cost(275);
10530   format %{ "ucomiss $src1, $src2\n\t"
10531             "movl    $dst, #-1\n\t"
10532             "jp,s    done\n\t"
10533             "jb,s    done\n\t"
10534             "setne   $dst\n\t"
10535             "movzbl  $dst, $dst\n"
10536     "done:" %}
10537 
10538   opcode(0x0F, 0x2E);
10539   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10540              cmpfp3(dst));
10541   ins_pipe(pipe_slow);
10542 %}
10543 
10544 // Compare into -1,0,1
10545 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10546 %{
10547   match(Set dst (CmpF3 src1 src2));
10548   effect(KILL cr);
10549 
10550   ins_cost(275);
10551   format %{ "ucomiss $src1, [$src2]\n\t"
10552             "movl    $dst, #-1\n\t"
10553             "jp,s    done\n\t"
10554             "jb,s    done\n\t"
10555             "setne   $dst\n\t"
10556             "movzbl  $dst, $dst\n"
10557     "done:" %}
10558 
10559   opcode(0x0F, 0x2E);
10560   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10561              cmpfp3(dst));
10562   ins_pipe(pipe_slow);
10563 %}
10564 
10565 // Compare into -1,0,1
10566 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10567 %{
10568   match(Set dst (CmpD3 src1 src2));
10569   effect(KILL cr);
10570 
10571   ins_cost(275);
10572   format %{ "ucomisd $src1, $src2\n\t"
10573             "movl    $dst, #-1\n\t"
10574             "jp,s    done\n\t"
10575             "jb,s    done\n\t"
10576             "setne   $dst\n\t"
10577             "movzbl  $dst, $dst\n"
10578     "done:" %}
10579 
10580   opcode(0x66, 0x0F, 0x2E);
10581   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10582              cmpfp3(dst));
10583   ins_pipe(pipe_slow);
10584 %}
10585 
10586 // Compare into -1,0,1
10587 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10588 %{
10589   match(Set dst (CmpD3 src1 (LoadD src2)));
10590   effect(KILL cr);
10591 
10592   ins_cost(275);
10593   format %{ "ucomisd $src1, $src2\n\t"
10594             "movl    $dst, #-1\n\t"
10595             "jp,s    done\n\t"
10596             "jb,s    done\n\t"
10597             "setne   $dst\n\t"
10598             "movzbl  $dst, $dst\n"
10599     "done:" %}
10600 
10601   opcode(0x66, 0x0F, 0x2E);
10602   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10603              cmpfp3(dst));
10604   ins_pipe(pipe_slow);
10605 %}
10606 
10607 // Compare into -1,0,1
10608 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10609 %{
10610   match(Set dst (CmpD3 src1 src2));
10611   effect(KILL cr);
10612 
10613   ins_cost(275);
10614   format %{ "ucomisd $src1, [$src2]\n\t"
10615             "movl    $dst, #-1\n\t"
10616             "jp,s    done\n\t"
10617             "jb,s    done\n\t"
10618             "setne   $dst\n\t"
10619             "movzbl  $dst, $dst\n"
10620     "done:" %}
10621 
10622   opcode(0x66, 0x0F, 0x2E);
10623   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10624              cmpfp3(dst));
10625   ins_pipe(pipe_slow);
10626 %}
10627 
10628 instruct addF_reg(regF dst, regF src)
10629 %{
10630   match(Set dst (AddF dst src));
10631 
10632   format %{ "addss   $dst, $src" %}
10633   ins_cost(150); // XXX
10634   opcode(0xF3, 0x0F, 0x58);
10635   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10636   ins_pipe(pipe_slow);
10637 %}
10638 
10639 instruct addF_mem(regF dst, memory src)
10640 %{
10641   match(Set dst (AddF dst (LoadF src)));
10642 
10643   format %{ "addss   $dst, $src" %}
10644   ins_cost(150); // XXX
10645   opcode(0xF3, 0x0F, 0x58);
10646   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10647   ins_pipe(pipe_slow);
10648 %}
10649 
10650 instruct addF_imm(regF dst, immF src)
10651 %{
10652   match(Set dst (AddF dst src));
10653 
10654   format %{ "addss   $dst, [$src]" %}
10655   ins_cost(150); // XXX
10656   opcode(0xF3, 0x0F, 0x58);
10657   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10658   ins_pipe(pipe_slow);
10659 %}
10660 
10661 instruct addD_reg(regD dst, regD src)
10662 %{
10663   match(Set dst (AddD dst src));
10664 
10665   format %{ "addsd   $dst, $src" %}
10666   ins_cost(150); // XXX
10667   opcode(0xF2, 0x0F, 0x58);
10668   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10669   ins_pipe(pipe_slow);
10670 %}
10671 
10672 instruct addD_mem(regD dst, memory src)
10673 %{
10674   match(Set dst (AddD dst (LoadD src)));
10675 
10676   format %{ "addsd   $dst, $src" %}
10677   ins_cost(150); // XXX
10678   opcode(0xF2, 0x0F, 0x58);
10679   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10680   ins_pipe(pipe_slow);
10681 %}
10682 
10683 instruct addD_imm(regD dst, immD src)
10684 %{
10685   match(Set dst (AddD dst src));
10686 
10687   format %{ "addsd   $dst, [$src]" %}
10688   ins_cost(150); // XXX
10689   opcode(0xF2, 0x0F, 0x58);
10690   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10691   ins_pipe(pipe_slow);
10692 %}
10693 
10694 instruct subF_reg(regF dst, regF src)
10695 %{
10696   match(Set dst (SubF dst src));
10697 
10698   format %{ "subss   $dst, $src" %}
10699   ins_cost(150); // XXX
10700   opcode(0xF3, 0x0F, 0x5C);
10701   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10702   ins_pipe(pipe_slow);
10703 %}
10704 
10705 instruct subF_mem(regF dst, memory src)
10706 %{
10707   match(Set dst (SubF dst (LoadF src)));
10708 
10709   format %{ "subss   $dst, $src" %}
10710   ins_cost(150); // XXX
10711   opcode(0xF3, 0x0F, 0x5C);
10712   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10713   ins_pipe(pipe_slow);
10714 %}
10715 
10716 instruct subF_imm(regF dst, immF src)
10717 %{
10718   match(Set dst (SubF dst src));
10719 
10720   format %{ "subss   $dst, [$src]" %}
10721   ins_cost(150); // XXX
10722   opcode(0xF3, 0x0F, 0x5C);
10723   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10724   ins_pipe(pipe_slow);
10725 %}
10726 
10727 instruct subD_reg(regD dst, regD src)
10728 %{
10729   match(Set dst (SubD dst src));
10730 
10731   format %{ "subsd   $dst, $src" %}
10732   ins_cost(150); // XXX
10733   opcode(0xF2, 0x0F, 0x5C);
10734   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10735   ins_pipe(pipe_slow);
10736 %}
10737 
10738 instruct subD_mem(regD dst, memory src)
10739 %{
10740   match(Set dst (SubD dst (LoadD src)));
10741 
10742   format %{ "subsd   $dst, $src" %}
10743   ins_cost(150); // XXX
10744   opcode(0xF2, 0x0F, 0x5C);
10745   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10746   ins_pipe(pipe_slow);
10747 %}
10748 
10749 instruct subD_imm(regD dst, immD src)
10750 %{
10751   match(Set dst (SubD dst src));
10752 
10753   format %{ "subsd   $dst, [$src]" %}
10754   ins_cost(150); // XXX
10755   opcode(0xF2, 0x0F, 0x5C);
10756   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10757   ins_pipe(pipe_slow);
10758 %}
10759 
10760 instruct mulF_reg(regF dst, regF src)
10761 %{
10762   match(Set dst (MulF dst src));
10763 
10764   format %{ "mulss   $dst, $src" %}
10765   ins_cost(150); // XXX
10766   opcode(0xF3, 0x0F, 0x59);
10767   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10768   ins_pipe(pipe_slow);
10769 %}
10770 
10771 instruct mulF_mem(regF dst, memory src)
10772 %{
10773   match(Set dst (MulF dst (LoadF src)));
10774 
10775   format %{ "mulss   $dst, $src" %}
10776   ins_cost(150); // XXX
10777   opcode(0xF3, 0x0F, 0x59);
10778   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10779   ins_pipe(pipe_slow);
10780 %}
10781 
10782 instruct mulF_imm(regF dst, immF src)
10783 %{
10784   match(Set dst (MulF dst src));
10785 
10786   format %{ "mulss   $dst, [$src]" %}
10787   ins_cost(150); // XXX
10788   opcode(0xF3, 0x0F, 0x59);
10789   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10790   ins_pipe(pipe_slow);
10791 %}
10792 
10793 instruct mulD_reg(regD dst, regD src)
10794 %{
10795   match(Set dst (MulD dst src));
10796 
10797   format %{ "mulsd   $dst, $src" %}
10798   ins_cost(150); // XXX
10799   opcode(0xF2, 0x0F, 0x59);
10800   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10801   ins_pipe(pipe_slow);
10802 %}
10803 
10804 instruct mulD_mem(regD dst, memory src)
10805 %{
10806   match(Set dst (MulD dst (LoadD src)));
10807 
10808   format %{ "mulsd   $dst, $src" %}
10809   ins_cost(150); // XXX
10810   opcode(0xF2, 0x0F, 0x59);
10811   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10812   ins_pipe(pipe_slow);
10813 %}
10814 
10815 instruct mulD_imm(regD dst, immD src)
10816 %{
10817   match(Set dst (MulD dst src));
10818 
10819   format %{ "mulsd   $dst, [$src]" %}
10820   ins_cost(150); // XXX
10821   opcode(0xF2, 0x0F, 0x59);
10822   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10823   ins_pipe(pipe_slow);
10824 %}
10825 
10826 instruct divF_reg(regF dst, regF src)
10827 %{
10828   match(Set dst (DivF dst src));
10829 
10830   format %{ "divss   $dst, $src" %}
10831   ins_cost(150); // XXX
10832   opcode(0xF3, 0x0F, 0x5E);
10833   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10834   ins_pipe(pipe_slow);
10835 %}
10836 
10837 instruct divF_mem(regF dst, memory src)
10838 %{
10839   match(Set dst (DivF dst (LoadF src)));
10840 
10841   format %{ "divss   $dst, $src" %}
10842   ins_cost(150); // XXX
10843   opcode(0xF3, 0x0F, 0x5E);
10844   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10845   ins_pipe(pipe_slow);
10846 %}
10847 
10848 instruct divF_imm(regF dst, immF src)
10849 %{
10850   match(Set dst (DivF dst src));
10851 
10852   format %{ "divss   $dst, [$src]" %}
10853   ins_cost(150); // XXX
10854   opcode(0xF3, 0x0F, 0x5E);
10855   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10856   ins_pipe(pipe_slow);
10857 %}
10858 
10859 instruct divD_reg(regD dst, regD src)
10860 %{
10861   match(Set dst (DivD dst src));
10862 
10863   format %{ "divsd   $dst, $src" %}
10864   ins_cost(150); // XXX
10865   opcode(0xF2, 0x0F, 0x5E);
10866   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10867   ins_pipe(pipe_slow);
10868 %}
10869 
10870 instruct divD_mem(regD dst, memory src)
10871 %{
10872   match(Set dst (DivD dst (LoadD src)));
10873 
10874   format %{ "divsd   $dst, $src" %}
10875   ins_cost(150); // XXX
10876   opcode(0xF2, 0x0F, 0x5E);
10877   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10878   ins_pipe(pipe_slow);
10879 %}
10880 
10881 instruct divD_imm(regD dst, immD src)
10882 %{
10883   match(Set dst (DivD dst src));
10884 
10885   format %{ "divsd   $dst, [$src]" %}
10886   ins_cost(150); // XXX
10887   opcode(0xF2, 0x0F, 0x5E);
10888   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10889   ins_pipe(pipe_slow);
10890 %}
10891 
10892 instruct sqrtF_reg(regF dst, regF src)
10893 %{
10894   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10895 
10896   format %{ "sqrtss  $dst, $src" %}
10897   ins_cost(150); // XXX
10898   opcode(0xF3, 0x0F, 0x51);
10899   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10900   ins_pipe(pipe_slow);
10901 %}
10902 
10903 instruct sqrtF_mem(regF dst, memory src)
10904 %{
10905   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10906 
10907   format %{ "sqrtss  $dst, $src" %}
10908   ins_cost(150); // XXX
10909   opcode(0xF3, 0x0F, 0x51);
10910   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10911   ins_pipe(pipe_slow);
10912 %}
10913 
10914 instruct sqrtF_imm(regF dst, immF src)
10915 %{
10916   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10917 
10918   format %{ "sqrtss  $dst, [$src]" %}
10919   ins_cost(150); // XXX
10920   opcode(0xF3, 0x0F, 0x51);
10921   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 instruct sqrtD_reg(regD dst, regD src)
10926 %{
10927   match(Set dst (SqrtD src));
10928 
10929   format %{ "sqrtsd  $dst, $src" %}
10930   ins_cost(150); // XXX
10931   opcode(0xF2, 0x0F, 0x51);
10932   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10933   ins_pipe(pipe_slow);
10934 %}
10935 
10936 instruct sqrtD_mem(regD dst, memory src)
10937 %{
10938   match(Set dst (SqrtD (LoadD src)));
10939 
10940   format %{ "sqrtsd  $dst, $src" %}
10941   ins_cost(150); // XXX
10942   opcode(0xF2, 0x0F, 0x51);
10943   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10944   ins_pipe(pipe_slow);
10945 %}
10946 
10947 instruct sqrtD_imm(regD dst, immD src)
10948 %{
10949   match(Set dst (SqrtD src));
10950 
10951   format %{ "sqrtsd  $dst, [$src]" %}
10952   ins_cost(150); // XXX
10953   opcode(0xF2, 0x0F, 0x51);
10954   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10955   ins_pipe(pipe_slow);
10956 %}
10957 
10958 instruct absF_reg(regF dst)
10959 %{
10960   match(Set dst (AbsF dst));
10961 
10962   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10963   ins_encode(absF_encoding(dst));
10964   ins_pipe(pipe_slow);
10965 %}
10966 
10967 instruct absD_reg(regD dst)
10968 %{
10969   match(Set dst (AbsD dst));
10970 
10971   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10972             "# abs double by sign masking" %}
10973   ins_encode(absD_encoding(dst));
10974   ins_pipe(pipe_slow);
10975 %}
10976 
10977 instruct negF_reg(regF dst)
10978 %{
10979   match(Set dst (NegF dst));
10980 
10981   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10982   ins_encode(negF_encoding(dst));
10983   ins_pipe(pipe_slow);
10984 %}
10985 
10986 instruct negD_reg(regD dst)
10987 %{
10988   match(Set dst (NegD dst));
10989 
10990   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10991             "# neg double by sign flipping" %}
10992   ins_encode(negD_encoding(dst));
10993   ins_pipe(pipe_slow);
10994 %}
10995 
10996 // -----------Trig and Trancendental Instructions------------------------------
10997 instruct cosD_reg(regD dst) %{
10998   match(Set dst (CosD dst));
10999 
11000   format %{ "dcos   $dst\n\t" %}
11001   opcode(0xD9, 0xFF);
11002   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11003   ins_pipe( pipe_slow );
11004 %}
11005 
11006 instruct sinD_reg(regD dst) %{
11007   match(Set dst (SinD dst));
11008 
11009   format %{ "dsin   $dst\n\t" %}
11010   opcode(0xD9, 0xFE);
11011   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11012   ins_pipe( pipe_slow );
11013 %}
11014 
11015 instruct tanD_reg(regD dst) %{
11016   match(Set dst (TanD dst));
11017 
11018   format %{ "dtan   $dst\n\t" %}
11019   ins_encode( Push_SrcXD(dst),
11020               Opcode(0xD9), Opcode(0xF2),   //fptan
11021               Opcode(0xDD), Opcode(0xD8),   //fstp st
11022               Push_ResultXD(dst) );
11023   ins_pipe( pipe_slow );
11024 %}
11025 
11026 instruct log10D_reg(regD dst) %{
11027   // The source and result Double operands in XMM registers
11028   match(Set dst (Log10D dst));
11029   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11030   // fyl2x        ; compute log_10(2) * log_2(x)
11031   format %{ "fldlg2\t\t\t#Log10\n\t"
11032             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11033          %}
11034    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11035               Push_SrcXD(dst),
11036               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11037               Push_ResultXD(dst));
11038 
11039   ins_pipe( pipe_slow );
11040 %}
11041 
11042 instruct logD_reg(regD dst) %{
11043   // The source and result Double operands in XMM registers
11044   match(Set dst (LogD dst));
11045   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11046   // fyl2x        ; compute log_e(2) * log_2(x)
11047   format %{ "fldln2\t\t\t#Log_e\n\t"
11048             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11049          %}
11050   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11051               Push_SrcXD(dst),
11052               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11053               Push_ResultXD(dst));
11054   ins_pipe( pipe_slow );
11055 %}
11056 
11057 
11058 
11059 //----------Arithmetic Conversion Instructions---------------------------------
11060 
11061 instruct roundFloat_nop(regF dst)
11062 %{
11063   match(Set dst (RoundFloat dst));
11064 
11065   ins_cost(0);
11066   ins_encode();
11067   ins_pipe(empty);
11068 %}
11069 
11070 instruct roundDouble_nop(regD dst)
11071 %{
11072   match(Set dst (RoundDouble dst));
11073 
11074   ins_cost(0);
11075   ins_encode();
11076   ins_pipe(empty);
11077 %}
11078 
11079 instruct convF2D_reg_reg(regD dst, regF src)
11080 %{
11081   match(Set dst (ConvF2D src));
11082 
11083   format %{ "cvtss2sd $dst, $src" %}
11084   opcode(0xF3, 0x0F, 0x5A);
11085   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11086   ins_pipe(pipe_slow); // XXX
11087 %}
11088 
11089 instruct convF2D_reg_mem(regD dst, memory src)
11090 %{
11091   match(Set dst (ConvF2D (LoadF src)));
11092 
11093   format %{ "cvtss2sd $dst, $src" %}
11094   opcode(0xF3, 0x0F, 0x5A);
11095   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11096   ins_pipe(pipe_slow); // XXX
11097 %}
11098 
11099 instruct convD2F_reg_reg(regF dst, regD src)
11100 %{
11101   match(Set dst (ConvD2F src));
11102 
11103   format %{ "cvtsd2ss $dst, $src" %}
11104   opcode(0xF2, 0x0F, 0x5A);
11105   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11106   ins_pipe(pipe_slow); // XXX
11107 %}
11108 
11109 instruct convD2F_reg_mem(regF dst, memory src)
11110 %{
11111   match(Set dst (ConvD2F (LoadD src)));
11112 
11113   format %{ "cvtsd2ss $dst, $src" %}
11114   opcode(0xF2, 0x0F, 0x5A);
11115   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11116   ins_pipe(pipe_slow); // XXX
11117 %}
11118 
11119 // XXX do mem variants
11120 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11121 %{
11122   match(Set dst (ConvF2I src));
11123   effect(KILL cr);
11124 
11125   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11126             "cmpl    $dst, #0x80000000\n\t"
11127             "jne,s   done\n\t"
11128             "subq    rsp, #8\n\t"
11129             "movss   [rsp], $src\n\t"
11130             "call    f2i_fixup\n\t"
11131             "popq    $dst\n"
11132     "done:   "%}
11133   opcode(0xF3, 0x0F, 0x2C);
11134   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11135              f2i_fixup(dst, src));
11136   ins_pipe(pipe_slow);
11137 %}
11138 
11139 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11140 %{
11141   match(Set dst (ConvF2L src));
11142   effect(KILL cr);
11143 
11144   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11145             "cmpq    $dst, [0x8000000000000000]\n\t"
11146             "jne,s   done\n\t"
11147             "subq    rsp, #8\n\t"
11148             "movss   [rsp], $src\n\t"
11149             "call    f2l_fixup\n\t"
11150             "popq    $dst\n"
11151     "done:   "%}
11152   opcode(0xF3, 0x0F, 0x2C);
11153   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11154              f2l_fixup(dst, src));
11155   ins_pipe(pipe_slow);
11156 %}
11157 
11158 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11159 %{
11160   match(Set dst (ConvD2I src));
11161   effect(KILL cr);
11162 
11163   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11164             "cmpl    $dst, #0x80000000\n\t"
11165             "jne,s   done\n\t"
11166             "subq    rsp, #8\n\t"
11167             "movsd   [rsp], $src\n\t"
11168             "call    d2i_fixup\n\t"
11169             "popq    $dst\n"
11170     "done:   "%}
11171   opcode(0xF2, 0x0F, 0x2C);
11172   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11173              d2i_fixup(dst, src));
11174   ins_pipe(pipe_slow);
11175 %}
11176 
11177 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11178 %{
11179   match(Set dst (ConvD2L src));
11180   effect(KILL cr);
11181 
11182   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11183             "cmpq    $dst, [0x8000000000000000]\n\t"
11184             "jne,s   done\n\t"
11185             "subq    rsp, #8\n\t"
11186             "movsd   [rsp], $src\n\t"
11187             "call    d2l_fixup\n\t"
11188             "popq    $dst\n"
11189     "done:   "%}
11190   opcode(0xF2, 0x0F, 0x2C);
11191   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11192              d2l_fixup(dst, src));
11193   ins_pipe(pipe_slow);
11194 %}
11195 
11196 instruct convI2F_reg_reg(regF dst, rRegI src)
11197 %{
11198   predicate(!UseXmmI2F);
11199   match(Set dst (ConvI2F src));
11200 
11201   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11202   opcode(0xF3, 0x0F, 0x2A);
11203   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11204   ins_pipe(pipe_slow); // XXX
11205 %}
11206 
11207 instruct convI2F_reg_mem(regF dst, memory src)
11208 %{
11209   match(Set dst (ConvI2F (LoadI src)));
11210 
11211   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11212   opcode(0xF3, 0x0F, 0x2A);
11213   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11214   ins_pipe(pipe_slow); // XXX
11215 %}
11216 
11217 instruct convI2D_reg_reg(regD dst, rRegI src)
11218 %{
11219   predicate(!UseXmmI2D);
11220   match(Set dst (ConvI2D src));
11221 
11222   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11223   opcode(0xF2, 0x0F, 0x2A);
11224   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11225   ins_pipe(pipe_slow); // XXX
11226 %}
11227 
11228 instruct convI2D_reg_mem(regD dst, memory src)
11229 %{
11230   match(Set dst (ConvI2D (LoadI src)));
11231 
11232   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11233   opcode(0xF2, 0x0F, 0x2A);
11234   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11235   ins_pipe(pipe_slow); // XXX
11236 %}
11237 
11238 instruct convXI2F_reg(regF dst, rRegI src)
11239 %{
11240   predicate(UseXmmI2F);
11241   match(Set dst (ConvI2F src));
11242 
11243   format %{ "movdl $dst, $src\n\t"
11244             "cvtdq2psl $dst, $dst\t# i2f" %}
11245   ins_encode %{
11246     __ movdl($dst$$XMMRegister, $src$$Register);
11247     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11248   %}
11249   ins_pipe(pipe_slow); // XXX
11250 %}
11251 
11252 instruct convXI2D_reg(regD dst, rRegI src)
11253 %{
11254   predicate(UseXmmI2D);
11255   match(Set dst (ConvI2D src));
11256 
11257   format %{ "movdl $dst, $src\n\t"
11258             "cvtdq2pdl $dst, $dst\t# i2d" %}
11259   ins_encode %{
11260     __ movdl($dst$$XMMRegister, $src$$Register);
11261     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11262   %}
11263   ins_pipe(pipe_slow); // XXX
11264 %}
11265 
11266 instruct convL2F_reg_reg(regF dst, rRegL src)
11267 %{
11268   match(Set dst (ConvL2F src));
11269 
11270   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11271   opcode(0xF3, 0x0F, 0x2A);
11272   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11273   ins_pipe(pipe_slow); // XXX
11274 %}
11275 
11276 instruct convL2F_reg_mem(regF dst, memory src)
11277 %{
11278   match(Set dst (ConvL2F (LoadL src)));
11279 
11280   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11281   opcode(0xF3, 0x0F, 0x2A);
11282   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11283   ins_pipe(pipe_slow); // XXX
11284 %}
11285 
11286 instruct convL2D_reg_reg(regD dst, rRegL src)
11287 %{
11288   match(Set dst (ConvL2D src));
11289 
11290   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11291   opcode(0xF2, 0x0F, 0x2A);
11292   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11293   ins_pipe(pipe_slow); // XXX
11294 %}
11295 
11296 instruct convL2D_reg_mem(regD dst, memory src)
11297 %{
11298   match(Set dst (ConvL2D (LoadL src)));
11299 
11300   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11301   opcode(0xF2, 0x0F, 0x2A);
11302   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11303   ins_pipe(pipe_slow); // XXX
11304 %}
11305 
11306 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11307 %{
11308   match(Set dst (ConvI2L src));
11309 
11310   ins_cost(125);
11311   format %{ "movslq  $dst, $src\t# i2l" %}
11312   ins_encode %{
11313     __ movslq($dst$$Register, $src$$Register);
11314   %}
11315   ins_pipe(ialu_reg_reg);
11316 %}
11317 
11318 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11319 // %{
11320 //   match(Set dst (ConvI2L src));
11321 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11322 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11323 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11324 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11325 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11326 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11327 
11328 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11329 //   ins_encode(enc_copy(dst, src));
11330 // //   opcode(0x63); // needs REX.W
11331 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11332 //   ins_pipe(ialu_reg_reg);
11333 // %}
11334 
11335 // Zero-extend convert int to long
11336 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11337 %{
11338   match(Set dst (AndL (ConvI2L src) mask));
11339 
11340   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11341   ins_encode(enc_copy(dst, src));
11342   ins_pipe(ialu_reg_reg);
11343 %}
11344 
11345 // Zero-extend convert int to long
11346 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11347 %{
11348   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11349 
11350   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11351   opcode(0x8B);
11352   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11353   ins_pipe(ialu_reg_mem);
11354 %}
11355 
11356 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11357 %{
11358   match(Set dst (AndL src mask));
11359 
11360   format %{ "movl    $dst, $src\t# zero-extend long" %}
11361   ins_encode(enc_copy_always(dst, src));
11362   ins_pipe(ialu_reg_reg);
11363 %}
11364 
11365 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11366 %{
11367   match(Set dst (ConvL2I src));
11368 
11369   format %{ "movl    $dst, $src\t# l2i" %}
11370   ins_encode(enc_copy_always(dst, src));
11371   ins_pipe(ialu_reg_reg);
11372 %}
11373 
11374 
11375 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11376   match(Set dst (MoveF2I src));
11377   effect(DEF dst, USE src);
11378 
11379   ins_cost(125);
11380   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11381   opcode(0x8B);
11382   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11383   ins_pipe(ialu_reg_mem);
11384 %}
11385 
11386 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11387   match(Set dst (MoveI2F src));
11388   effect(DEF dst, USE src);
11389 
11390   ins_cost(125);
11391   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11392   opcode(0xF3, 0x0F, 0x10);
11393   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11394   ins_pipe(pipe_slow);
11395 %}
11396 
11397 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11398   match(Set dst (MoveD2L src));
11399   effect(DEF dst, USE src);
11400 
11401   ins_cost(125);
11402   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11403   opcode(0x8B);
11404   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11405   ins_pipe(ialu_reg_mem);
11406 %}
11407 
11408 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11409   predicate(!UseXmmLoadAndClearUpper);
11410   match(Set dst (MoveL2D src));
11411   effect(DEF dst, USE src);
11412 
11413   ins_cost(125);
11414   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11415   opcode(0x66, 0x0F, 0x12);
11416   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11417   ins_pipe(pipe_slow);
11418 %}
11419 
11420 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11421   predicate(UseXmmLoadAndClearUpper);
11422   match(Set dst (MoveL2D src));
11423   effect(DEF dst, USE src);
11424 
11425   ins_cost(125);
11426   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11427   opcode(0xF2, 0x0F, 0x10);
11428   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11429   ins_pipe(pipe_slow);
11430 %}
11431 
11432 
11433 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11434   match(Set dst (MoveF2I src));
11435   effect(DEF dst, USE src);
11436 
11437   ins_cost(95); // XXX
11438   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11439   opcode(0xF3, 0x0F, 0x11);
11440   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11441   ins_pipe(pipe_slow);
11442 %}
11443 
11444 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11445   match(Set dst (MoveI2F src));
11446   effect(DEF dst, USE src);
11447 
11448   ins_cost(100);
11449   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11450   opcode(0x89);
11451   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11452   ins_pipe( ialu_mem_reg );
11453 %}
11454 
11455 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11456   match(Set dst (MoveD2L src));
11457   effect(DEF dst, USE src);
11458 
11459   ins_cost(95); // XXX
11460   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11461   opcode(0xF2, 0x0F, 0x11);
11462   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11463   ins_pipe(pipe_slow);
11464 %}
11465 
11466 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11467   match(Set dst (MoveL2D src));
11468   effect(DEF dst, USE src);
11469 
11470   ins_cost(100);
11471   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11472   opcode(0x89);
11473   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11474   ins_pipe(ialu_mem_reg);
11475 %}
11476 
11477 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11478   match(Set dst (MoveF2I src));
11479   effect(DEF dst, USE src);
11480   ins_cost(85);
11481   format %{ "movd    $dst,$src\t# MoveF2I" %}
11482   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11487   match(Set dst (MoveD2L src));
11488   effect(DEF dst, USE src);
11489   ins_cost(85);
11490   format %{ "movd    $dst,$src\t# MoveD2L" %}
11491   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11492   ins_pipe( pipe_slow );
11493 %}
11494 
11495 // The next instructions have long latency and use Int unit. Set high cost.
11496 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11497   match(Set dst (MoveI2F src));
11498   effect(DEF dst, USE src);
11499   ins_cost(300);
11500   format %{ "movd    $dst,$src\t# MoveI2F" %}
11501   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11502   ins_pipe( pipe_slow );
11503 %}
11504 
11505 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11506   match(Set dst (MoveL2D src));
11507   effect(DEF dst, USE src);
11508   ins_cost(300);
11509   format %{ "movd    $dst,$src\t# MoveL2D" %}
11510   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11511   ins_pipe( pipe_slow );
11512 %}
11513 
11514 // Replicate scalar to packed byte (1 byte) values in xmm
11515 instruct Repl8B_reg(regD dst, regD src) %{
11516   match(Set dst (Replicate8B src));
11517   format %{ "MOVDQA  $dst,$src\n\t"
11518             "PUNPCKLBW $dst,$dst\n\t"
11519             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11520   ins_encode( pshufd_8x8(dst, src));
11521   ins_pipe( pipe_slow );
11522 %}
11523 
11524 // Replicate scalar to packed byte (1 byte) values in xmm
11525 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11526   match(Set dst (Replicate8B src));
11527   format %{ "MOVD    $dst,$src\n\t"
11528             "PUNPCKLBW $dst,$dst\n\t"
11529             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11530   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 // Replicate scalar zero to packed byte (1 byte) values in xmm
11535 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11536   match(Set dst (Replicate8B zero));
11537   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11538   ins_encode( pxor(dst, dst));
11539   ins_pipe( fpu_reg_reg );
11540 %}
11541 
11542 // Replicate scalar to packed shore (2 byte) values in xmm
11543 instruct Repl4S_reg(regD dst, regD src) %{
11544   match(Set dst (Replicate4S src));
11545   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11546   ins_encode( pshufd_4x16(dst, src));
11547   ins_pipe( fpu_reg_reg );
11548 %}
11549 
11550 // Replicate scalar to packed shore (2 byte) values in xmm
11551 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11552   match(Set dst (Replicate4S src));
11553   format %{ "MOVD    $dst,$src\n\t"
11554             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11555   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11556   ins_pipe( fpu_reg_reg );
11557 %}
11558 
11559 // Replicate scalar zero to packed short (2 byte) values in xmm
11560 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11561   match(Set dst (Replicate4S zero));
11562   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11563   ins_encode( pxor(dst, dst));
11564   ins_pipe( fpu_reg_reg );
11565 %}
11566 
11567 // Replicate scalar to packed char (2 byte) values in xmm
11568 instruct Repl4C_reg(regD dst, regD src) %{
11569   match(Set dst (Replicate4C src));
11570   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11571   ins_encode( pshufd_4x16(dst, src));
11572   ins_pipe( fpu_reg_reg );
11573 %}
11574 
11575 // Replicate scalar to packed char (2 byte) values in xmm
11576 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11577   match(Set dst (Replicate4C src));
11578   format %{ "MOVD    $dst,$src\n\t"
11579             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11580   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11581   ins_pipe( fpu_reg_reg );
11582 %}
11583 
11584 // Replicate scalar zero to packed char (2 byte) values in xmm
11585 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11586   match(Set dst (Replicate4C zero));
11587   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11588   ins_encode( pxor(dst, dst));
11589   ins_pipe( fpu_reg_reg );
11590 %}
11591 
11592 // Replicate scalar to packed integer (4 byte) values in xmm
11593 instruct Repl2I_reg(regD dst, regD src) %{
11594   match(Set dst (Replicate2I src));
11595   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11596   ins_encode( pshufd(dst, src, 0x00));
11597   ins_pipe( fpu_reg_reg );
11598 %}
11599 
11600 // Replicate scalar to packed integer (4 byte) values in xmm
11601 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11602   match(Set dst (Replicate2I src));
11603   format %{ "MOVD   $dst,$src\n\t"
11604             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11605   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11606   ins_pipe( fpu_reg_reg );
11607 %}
11608 
11609 // Replicate scalar zero to packed integer (2 byte) values in xmm
11610 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11611   match(Set dst (Replicate2I zero));
11612   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11613   ins_encode( pxor(dst, dst));
11614   ins_pipe( fpu_reg_reg );
11615 %}
11616 
11617 // Replicate scalar to packed single precision floating point values in xmm
11618 instruct Repl2F_reg(regD dst, regD src) %{
11619   match(Set dst (Replicate2F src));
11620   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11621   ins_encode( pshufd(dst, src, 0xe0));
11622   ins_pipe( fpu_reg_reg );
11623 %}
11624 
11625 // Replicate scalar to packed single precision floating point values in xmm
11626 instruct Repl2F_regF(regD dst, regF src) %{
11627   match(Set dst (Replicate2F src));
11628   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11629   ins_encode( pshufd(dst, src, 0xe0));
11630   ins_pipe( fpu_reg_reg );
11631 %}
11632 
11633 // Replicate scalar to packed single precision floating point values in xmm
11634 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11635   match(Set dst (Replicate2F zero));
11636   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11637   ins_encode( pxor(dst, dst));
11638   ins_pipe( fpu_reg_reg );
11639 %}
11640 
11641 
11642 // =======================================================================
11643 // fast clearing of an array
11644 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11645                   rFlagsReg cr)
11646 %{
11647   match(Set dummy (ClearArray cnt base));
11648   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11649 
11650   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11651             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11652   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11653              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11654   ins_pipe(pipe_slow);
11655 %}
11656 
11657 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11658                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11659 %{
11660   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11661   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11662 
11663   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11664   ins_encode %{
11665     __ string_compare($str1$$Register, $str2$$Register,
11666                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11667                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11668   %}
11669   ins_pipe( pipe_slow );
11670 %}
11671 
11672 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11673                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11674 %{
11675   predicate(UseSSE42Intrinsics);
11676   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11677   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11678 
11679   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11680   ins_encode %{
11681     __ string_indexof($str1$$Register, $str2$$Register,
11682                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11683                       $tmp1$$XMMRegister, $tmp2$$Register);
11684   %}
11685   ins_pipe( pipe_slow );
11686 %}
11687 
11688 // fast string equals
11689 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11690                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11691 %{
11692   match(Set result (StrEquals (Binary str1 str2) cnt));
11693   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11694 
11695   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11696   ins_encode %{
11697     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11698                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11699                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11700   %}
11701   ins_pipe( pipe_slow );
11702 %}
11703 
11704 // fast array equals
11705 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11706                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11707 %{
11708   match(Set result (AryEq ary1 ary2));
11709   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11710   //ins_cost(300);
11711 
11712   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11713   ins_encode %{
11714     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11715                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11716                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11717   %}
11718   ins_pipe( pipe_slow );
11719 %}
11720 
11721 //----------Control Flow Instructions------------------------------------------
11722 // Signed compare Instructions
11723 
11724 // XXX more variants!!
11725 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11726 %{
11727   match(Set cr (CmpI op1 op2));
11728   effect(DEF cr, USE op1, USE op2);
11729 
11730   format %{ "cmpl    $op1, $op2" %}
11731   opcode(0x3B);  /* Opcode 3B /r */
11732   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11733   ins_pipe(ialu_cr_reg_reg);
11734 %}
11735 
11736 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11737 %{
11738   match(Set cr (CmpI op1 op2));
11739 
11740   format %{ "cmpl    $op1, $op2" %}
11741   opcode(0x81, 0x07); /* Opcode 81 /7 */
11742   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11743   ins_pipe(ialu_cr_reg_imm);
11744 %}
11745 
11746 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11747 %{
11748   match(Set cr (CmpI op1 (LoadI op2)));
11749 
11750   ins_cost(500); // XXX
11751   format %{ "cmpl    $op1, $op2" %}
11752   opcode(0x3B); /* Opcode 3B /r */
11753   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11754   ins_pipe(ialu_cr_reg_mem);
11755 %}
11756 
11757 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11758 %{
11759   match(Set cr (CmpI src zero));
11760 
11761   format %{ "testl   $src, $src" %}
11762   opcode(0x85);
11763   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11764   ins_pipe(ialu_cr_reg_imm);
11765 %}
11766 
11767 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11768 %{
11769   match(Set cr (CmpI (AndI src con) zero));
11770 
11771   format %{ "testl   $src, $con" %}
11772   opcode(0xF7, 0x00);
11773   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11774   ins_pipe(ialu_cr_reg_imm);
11775 %}
11776 
11777 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11778 %{
11779   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11780 
11781   format %{ "testl   $src, $mem" %}
11782   opcode(0x85);
11783   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11784   ins_pipe(ialu_cr_reg_mem);
11785 %}
11786 
11787 // Unsigned compare Instructions; really, same as signed except they
11788 // produce an rFlagsRegU instead of rFlagsReg.
11789 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11790 %{
11791   match(Set cr (CmpU op1 op2));
11792 
11793   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11794   opcode(0x3B); /* Opcode 3B /r */
11795   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11796   ins_pipe(ialu_cr_reg_reg);
11797 %}
11798 
11799 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11800 %{
11801   match(Set cr (CmpU op1 op2));
11802 
11803   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11804   opcode(0x81,0x07); /* Opcode 81 /7 */
11805   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11806   ins_pipe(ialu_cr_reg_imm);
11807 %}
11808 
11809 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11810 %{
11811   match(Set cr (CmpU op1 (LoadI op2)));
11812 
11813   ins_cost(500); // XXX
11814   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11815   opcode(0x3B); /* Opcode 3B /r */
11816   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11817   ins_pipe(ialu_cr_reg_mem);
11818 %}
11819 
11820 // // // Cisc-spilled version of cmpU_rReg
11821 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11822 // //%{
11823 // //  match(Set cr (CmpU (LoadI op1) op2));
11824 // //
11825 // //  format %{ "CMPu   $op1,$op2" %}
11826 // //  ins_cost(500);
11827 // //  opcode(0x39);  /* Opcode 39 /r */
11828 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11829 // //%}
11830 
11831 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11832 %{
11833   match(Set cr (CmpU src zero));
11834 
11835   format %{ "testl  $src, $src\t# unsigned" %}
11836   opcode(0x85);
11837   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11838   ins_pipe(ialu_cr_reg_imm);
11839 %}
11840 
11841 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11842 %{
11843   match(Set cr (CmpP op1 op2));
11844 
11845   format %{ "cmpq    $op1, $op2\t# ptr" %}
11846   opcode(0x3B); /* Opcode 3B /r */
11847   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11848   ins_pipe(ialu_cr_reg_reg);
11849 %}
11850 
11851 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11852 %{
11853   match(Set cr (CmpP op1 (LoadP op2)));
11854 
11855   ins_cost(500); // XXX
11856   format %{ "cmpq    $op1, $op2\t# ptr" %}
11857   opcode(0x3B); /* Opcode 3B /r */
11858   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11859   ins_pipe(ialu_cr_reg_mem);
11860 %}
11861 
11862 // // // Cisc-spilled version of cmpP_rReg
11863 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11864 // //%{
11865 // //  match(Set cr (CmpP (LoadP op1) op2));
11866 // //
11867 // //  format %{ "CMPu   $op1,$op2" %}
11868 // //  ins_cost(500);
11869 // //  opcode(0x39);  /* Opcode 39 /r */
11870 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11871 // //%}
11872 
11873 // XXX this is generalized by compP_rReg_mem???
11874 // Compare raw pointer (used in out-of-heap check).
11875 // Only works because non-oop pointers must be raw pointers
11876 // and raw pointers have no anti-dependencies.
11877 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11878 %{
11879   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11880   match(Set cr (CmpP op1 (LoadP op2)));
11881 
11882   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11883   opcode(0x3B); /* Opcode 3B /r */
11884   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11885   ins_pipe(ialu_cr_reg_mem);
11886 %}
11887 
11888 // This will generate a signed flags result. This should be OK since
11889 // any compare to a zero should be eq/neq.
11890 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11891 %{
11892   match(Set cr (CmpP src zero));
11893 
11894   format %{ "testq   $src, $src\t# ptr" %}
11895   opcode(0x85);
11896   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11897   ins_pipe(ialu_cr_reg_imm);
11898 %}
11899 
11900 // This will generate a signed flags result. This should be OK since
11901 // any compare to a zero should be eq/neq.
11902 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11903 %{
11904   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11905   match(Set cr (CmpP (LoadP op) zero));
11906 
11907   ins_cost(500); // XXX
11908   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11909   opcode(0xF7); /* Opcode F7 /0 */
11910   ins_encode(REX_mem_wide(op),
11911              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11912   ins_pipe(ialu_cr_reg_imm);
11913 %}
11914 
11915 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11916 %{
11917   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11918   match(Set cr (CmpP (LoadP mem) zero));
11919 
11920   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11921   ins_encode %{
11922     __ cmpq(r12, $mem$$Address);
11923   %}
11924   ins_pipe(ialu_cr_reg_mem);
11925 %}
11926 
11927 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11928 %{
11929   match(Set cr (CmpN op1 op2));
11930 
11931   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11932   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11933   ins_pipe(ialu_cr_reg_reg);
11934 %}
11935 
11936 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11937 %{
11938   match(Set cr (CmpN src (LoadN mem)));
11939 
11940   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11941   ins_encode %{
11942     __ cmpl($src$$Register, $mem$$Address);
11943   %}
11944   ins_pipe(ialu_cr_reg_mem);
11945 %}
11946 
11947 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11948   match(Set cr (CmpN op1 op2));
11949 
11950   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11951   ins_encode %{
11952     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11953   %}
11954   ins_pipe(ialu_cr_reg_imm);
11955 %}
11956 
11957 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11958 %{
11959   match(Set cr (CmpN src (LoadN mem)));
11960 
11961   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11962   ins_encode %{
11963     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11964   %}
11965   ins_pipe(ialu_cr_reg_mem);
11966 %}
11967 
11968 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11969   match(Set cr (CmpN src zero));
11970 
11971   format %{ "testl   $src, $src\t# compressed ptr" %}
11972   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11973   ins_pipe(ialu_cr_reg_imm);
11974 %}
11975 
11976 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11977 %{
11978   predicate(Universe::narrow_oop_base() != NULL);
11979   match(Set cr (CmpN (LoadN mem) zero));
11980 
11981   ins_cost(500); // XXX
11982   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11983   ins_encode %{
11984     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11985   %}
11986   ins_pipe(ialu_cr_reg_mem);
11987 %}
11988 
11989 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11990 %{
11991   predicate(Universe::narrow_oop_base() == NULL);
11992   match(Set cr (CmpN (LoadN mem) zero));
11993 
11994   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11995   ins_encode %{
11996     __ cmpl(r12, $mem$$Address);
11997   %}
11998   ins_pipe(ialu_cr_reg_mem);
11999 %}
12000 
12001 // Yanked all unsigned pointer compare operations.
12002 // Pointer compares are done with CmpP which is already unsigned.
12003 
12004 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12005 %{
12006   match(Set cr (CmpL op1 op2));
12007 
12008   format %{ "cmpq    $op1, $op2" %}
12009   opcode(0x3B);  /* Opcode 3B /r */
12010   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12011   ins_pipe(ialu_cr_reg_reg);
12012 %}
12013 
12014 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12015 %{
12016   match(Set cr (CmpL op1 op2));
12017 
12018   format %{ "cmpq    $op1, $op2" %}
12019   opcode(0x81, 0x07); /* Opcode 81 /7 */
12020   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12021   ins_pipe(ialu_cr_reg_imm);
12022 %}
12023 
12024 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12025 %{
12026   match(Set cr (CmpL op1 (LoadL op2)));
12027 
12028   format %{ "cmpq    $op1, $op2" %}
12029   opcode(0x3B); /* Opcode 3B /r */
12030   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12031   ins_pipe(ialu_cr_reg_mem);
12032 %}
12033 
12034 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12035 %{
12036   match(Set cr (CmpL src zero));
12037 
12038   format %{ "testq   $src, $src" %}
12039   opcode(0x85);
12040   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12041   ins_pipe(ialu_cr_reg_imm);
12042 %}
12043 
12044 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12045 %{
12046   match(Set cr (CmpL (AndL src con) zero));
12047 
12048   format %{ "testq   $src, $con\t# long" %}
12049   opcode(0xF7, 0x00);
12050   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12051   ins_pipe(ialu_cr_reg_imm);
12052 %}
12053 
12054 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12055 %{
12056   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12057 
12058   format %{ "testq   $src, $mem" %}
12059   opcode(0x85);
12060   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12061   ins_pipe(ialu_cr_reg_mem);
12062 %}
12063 
12064 // Manifest a CmpL result in an integer register.  Very painful.
12065 // This is the test to avoid.
12066 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12067 %{
12068   match(Set dst (CmpL3 src1 src2));
12069   effect(KILL flags);
12070 
12071   ins_cost(275); // XXX
12072   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12073             "movl    $dst, -1\n\t"
12074             "jl,s    done\n\t"
12075             "setne   $dst\n\t"
12076             "movzbl  $dst, $dst\n\t"
12077     "done:" %}
12078   ins_encode(cmpl3_flag(src1, src2, dst));
12079   ins_pipe(pipe_slow);
12080 %}
12081 
12082 //----------Max and Min--------------------------------------------------------
12083 // Min Instructions
12084 
12085 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12086 %{
12087   effect(USE_DEF dst, USE src, USE cr);
12088 
12089   format %{ "cmovlgt $dst, $src\t# min" %}
12090   opcode(0x0F, 0x4F);
12091   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12092   ins_pipe(pipe_cmov_reg);
12093 %}
12094 
12095 
12096 instruct minI_rReg(rRegI dst, rRegI src)
12097 %{
12098   match(Set dst (MinI dst src));
12099 
12100   ins_cost(200);
12101   expand %{
12102     rFlagsReg cr;
12103     compI_rReg(cr, dst, src);
12104     cmovI_reg_g(dst, src, cr);
12105   %}
12106 %}
12107 
12108 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12109 %{
12110   effect(USE_DEF dst, USE src, USE cr);
12111 
12112   format %{ "cmovllt $dst, $src\t# max" %}
12113   opcode(0x0F, 0x4C);
12114   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12115   ins_pipe(pipe_cmov_reg);
12116 %}
12117 
12118 
12119 instruct maxI_rReg(rRegI dst, rRegI src)
12120 %{
12121   match(Set dst (MaxI dst src));
12122 
12123   ins_cost(200);
12124   expand %{
12125     rFlagsReg cr;
12126     compI_rReg(cr, dst, src);
12127     cmovI_reg_l(dst, src, cr);
12128   %}
12129 %}
12130 
12131 // ============================================================================
12132 // Branch Instructions
12133 
12134 // Jump Direct - Label defines a relative address from JMP+1
12135 instruct jmpDir(label labl)
12136 %{
12137   match(Goto);
12138   effect(USE labl);
12139 
12140   ins_cost(300);
12141   format %{ "jmp     $labl" %}
12142   size(5);
12143   opcode(0xE9);
12144   ins_encode(OpcP, Lbl(labl));
12145   ins_pipe(pipe_jmp);
12146   ins_pc_relative(1);
12147 %}
12148 
12149 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12150 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12151 %{
12152   match(If cop cr);
12153   effect(USE labl);
12154 
12155   ins_cost(300);
12156   format %{ "j$cop     $labl" %}
12157   size(6);
12158   opcode(0x0F, 0x80);
12159   ins_encode(Jcc(cop, labl));
12160   ins_pipe(pipe_jcc);
12161   ins_pc_relative(1);
12162 %}
12163 
12164 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12165 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12166 %{
12167   match(CountedLoopEnd cop cr);
12168   effect(USE labl);
12169 
12170   ins_cost(300);
12171   format %{ "j$cop     $labl\t# loop end" %}
12172   size(6);
12173   opcode(0x0F, 0x80);
12174   ins_encode(Jcc(cop, labl));
12175   ins_pipe(pipe_jcc);
12176   ins_pc_relative(1);
12177 %}
12178 
12179 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12180 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12181   match(CountedLoopEnd cop cmp);
12182   effect(USE labl);
12183 
12184   ins_cost(300);
12185   format %{ "j$cop,u   $labl\t# loop end" %}
12186   size(6);
12187   opcode(0x0F, 0x80);
12188   ins_encode(Jcc(cop, labl));
12189   ins_pipe(pipe_jcc);
12190   ins_pc_relative(1);
12191 %}
12192 
12193 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12194   match(CountedLoopEnd cop cmp);
12195   effect(USE labl);
12196 
12197   ins_cost(200);
12198   format %{ "j$cop,u   $labl\t# loop end" %}
12199   size(6);
12200   opcode(0x0F, 0x80);
12201   ins_encode(Jcc(cop, labl));
12202   ins_pipe(pipe_jcc);
12203   ins_pc_relative(1);
12204 %}
12205 
12206 // Jump Direct Conditional - using unsigned comparison
12207 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12208   match(If cop cmp);
12209   effect(USE labl);
12210 
12211   ins_cost(300);
12212   format %{ "j$cop,u  $labl" %}
12213   size(6);
12214   opcode(0x0F, 0x80);
12215   ins_encode(Jcc(cop, labl));
12216   ins_pipe(pipe_jcc);
12217   ins_pc_relative(1);
12218 %}
12219 
12220 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12221   match(If cop cmp);
12222   effect(USE labl);
12223 
12224   ins_cost(200);
12225   format %{ "j$cop,u  $labl" %}
12226   size(6);
12227   opcode(0x0F, 0x80);
12228   ins_encode(Jcc(cop, labl));
12229   ins_pipe(pipe_jcc);
12230   ins_pc_relative(1);
12231 %}
12232 
12233 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12234   match(If cop cmp);
12235   effect(USE labl);
12236 
12237   ins_cost(200);
12238   format %{ $$template
12239     if ($cop$$cmpcode == Assembler::notEqual) {
12240       $$emit$$"jp,u   $labl\n\t"
12241       $$emit$$"j$cop,u   $labl"
12242     } else {
12243       $$emit$$"jp,u   done\n\t"
12244       $$emit$$"j$cop,u   $labl\n\t"
12245       $$emit$$"done:"
12246     }
12247   %}
12248   size(12);
12249   opcode(0x0F, 0x80);
12250   ins_encode %{
12251     Label* l = $labl$$label;
12252     $$$emit8$primary;
12253     emit_cc(cbuf, $secondary, Assembler::parity);
12254     int parity_disp = -1;
12255     if ($cop$$cmpcode == Assembler::notEqual) {
12256        // the two jumps 6 bytes apart so the jump distances are too
12257        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12258     } else if ($cop$$cmpcode == Assembler::equal) {
12259        parity_disp = 6;
12260     } else {
12261        ShouldNotReachHere();
12262     }
12263     emit_d32(cbuf, parity_disp);
12264     $$$emit8$primary;
12265     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12266     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12267     emit_d32(cbuf, disp);
12268   %}
12269   ins_pipe(pipe_jcc);
12270   ins_pc_relative(1);
12271 %}
12272 
12273 // ============================================================================
12274 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12275 // superklass array for an instance of the superklass.  Set a hidden
12276 // internal cache on a hit (cache is checked with exposed code in
12277 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12278 // encoding ALSO sets flags.
12279 
12280 instruct partialSubtypeCheck(rdi_RegP result,
12281                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12282                              rFlagsReg cr)
12283 %{
12284   match(Set result (PartialSubtypeCheck sub super));
12285   effect(KILL rcx, KILL cr);
12286 
12287   ins_cost(1100);  // slightly larger than the next version
12288   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12289             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12290             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12291             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12292             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12293             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12294             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12295     "miss:\t" %}
12296 
12297   opcode(0x1); // Force a XOR of RDI
12298   ins_encode(enc_PartialSubtypeCheck());
12299   ins_pipe(pipe_slow);
12300 %}
12301 
12302 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12303                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12304                                      immP0 zero,
12305                                      rdi_RegP result)
12306 %{
12307   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12308   effect(KILL rcx, KILL result);
12309 
12310   ins_cost(1000);
12311   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12312             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12313             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12314             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12315             "jne,s   miss\t\t# Missed: flags nz\n\t"
12316             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12317     "miss:\t" %}
12318 
12319   opcode(0x0); // No need to XOR RDI
12320   ins_encode(enc_PartialSubtypeCheck());
12321   ins_pipe(pipe_slow);
12322 %}
12323 
12324 // ============================================================================
12325 // Branch Instructions -- short offset versions
12326 //
12327 // These instructions are used to replace jumps of a long offset (the default
12328 // match) with jumps of a shorter offset.  These instructions are all tagged
12329 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12330 // match rules in general matching.  Instead, the ADLC generates a conversion
12331 // method in the MachNode which can be used to do in-place replacement of the
12332 // long variant with the shorter variant.  The compiler will determine if a
12333 // branch can be taken by the is_short_branch_offset() predicate in the machine
12334 // specific code section of the file.
12335 
12336 // Jump Direct - Label defines a relative address from JMP+1
12337 instruct jmpDir_short(label labl) %{
12338   match(Goto);
12339   effect(USE labl);
12340 
12341   ins_cost(300);
12342   format %{ "jmp,s   $labl" %}
12343   size(2);
12344   opcode(0xEB);
12345   ins_encode(OpcP, LblShort(labl));
12346   ins_pipe(pipe_jmp);
12347   ins_pc_relative(1);
12348   ins_short_branch(1);
12349 %}
12350 
12351 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12352 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12353   match(If cop cr);
12354   effect(USE labl);
12355 
12356   ins_cost(300);
12357   format %{ "j$cop,s   $labl" %}
12358   size(2);
12359   opcode(0x70);
12360   ins_encode(JccShort(cop, labl));
12361   ins_pipe(pipe_jcc);
12362   ins_pc_relative(1);
12363   ins_short_branch(1);
12364 %}
12365 
12366 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12367 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12368   match(CountedLoopEnd cop cr);
12369   effect(USE labl);
12370 
12371   ins_cost(300);
12372   format %{ "j$cop,s   $labl\t# loop end" %}
12373   size(2);
12374   opcode(0x70);
12375   ins_encode(JccShort(cop, labl));
12376   ins_pipe(pipe_jcc);
12377   ins_pc_relative(1);
12378   ins_short_branch(1);
12379 %}
12380 
12381 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12382 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12383   match(CountedLoopEnd cop cmp);
12384   effect(USE labl);
12385 
12386   ins_cost(300);
12387   format %{ "j$cop,us  $labl\t# loop end" %}
12388   size(2);
12389   opcode(0x70);
12390   ins_encode(JccShort(cop, labl));
12391   ins_pipe(pipe_jcc);
12392   ins_pc_relative(1);
12393   ins_short_branch(1);
12394 %}
12395 
12396 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12397   match(CountedLoopEnd cop cmp);
12398   effect(USE labl);
12399 
12400   ins_cost(300);
12401   format %{ "j$cop,us  $labl\t# loop end" %}
12402   size(2);
12403   opcode(0x70);
12404   ins_encode(JccShort(cop, labl));
12405   ins_pipe(pipe_jcc);
12406   ins_pc_relative(1);
12407   ins_short_branch(1);
12408 %}
12409 
12410 // Jump Direct Conditional - using unsigned comparison
12411 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12412   match(If cop cmp);
12413   effect(USE labl);
12414 
12415   ins_cost(300);
12416   format %{ "j$cop,us  $labl" %}
12417   size(2);
12418   opcode(0x70);
12419   ins_encode(JccShort(cop, labl));
12420   ins_pipe(pipe_jcc);
12421   ins_pc_relative(1);
12422   ins_short_branch(1);
12423 %}
12424 
12425 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12426   match(If cop cmp);
12427   effect(USE labl);
12428 
12429   ins_cost(300);
12430   format %{ "j$cop,us  $labl" %}
12431   size(2);
12432   opcode(0x70);
12433   ins_encode(JccShort(cop, labl));
12434   ins_pipe(pipe_jcc);
12435   ins_pc_relative(1);
12436   ins_short_branch(1);
12437 %}
12438 
12439 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12440   match(If cop cmp);
12441   effect(USE labl);
12442 
12443   ins_cost(300);
12444   format %{ $$template
12445     if ($cop$$cmpcode == Assembler::notEqual) {
12446       $$emit$$"jp,u,s   $labl\n\t"
12447       $$emit$$"j$cop,u,s   $labl"
12448     } else {
12449       $$emit$$"jp,u,s   done\n\t"
12450       $$emit$$"j$cop,u,s  $labl\n\t"
12451       $$emit$$"done:"
12452     }
12453   %}
12454   size(4);
12455   opcode(0x70);
12456   ins_encode %{
12457     Label* l = $labl$$label;
12458     emit_cc(cbuf, $primary, Assembler::parity);
12459     int parity_disp = -1;
12460     if ($cop$$cmpcode == Assembler::notEqual) {
12461       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12462     } else if ($cop$$cmpcode == Assembler::equal) {
12463       parity_disp = 2;
12464     } else {
12465       ShouldNotReachHere();
12466     }
12467     emit_d8(cbuf, parity_disp);
12468     emit_cc(cbuf, $primary, $cop$$cmpcode);
12469     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12470     emit_d8(cbuf, disp);
12471     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12472     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12473   %}
12474   ins_pipe(pipe_jcc);
12475   ins_pc_relative(1);
12476   ins_short_branch(1);
12477 %}
12478 
12479 // ============================================================================
12480 // inlined locking and unlocking
12481 
12482 instruct cmpFastLock(rFlagsReg cr,
12483                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12484 %{
12485   match(Set cr (FastLock object box));
12486   effect(TEMP tmp, TEMP scr);
12487 
12488   ins_cost(300);
12489   format %{ "fastlock $object,$box,$tmp,$scr" %}
12490   ins_encode(Fast_Lock(object, box, tmp, scr));
12491   ins_pipe(pipe_slow);
12492   ins_pc_relative(1);
12493 %}
12494 
12495 instruct cmpFastUnlock(rFlagsReg cr,
12496                        rRegP object, rax_RegP box, rRegP tmp)
12497 %{
12498   match(Set cr (FastUnlock object box));
12499   effect(TEMP tmp);
12500 
12501   ins_cost(300);
12502   format %{ "fastunlock $object, $box, $tmp" %}
12503   ins_encode(Fast_Unlock(object, box, tmp));
12504   ins_pipe(pipe_slow);
12505   ins_pc_relative(1);
12506 %}
12507 
12508 
12509 // ============================================================================
12510 // Safepoint Instructions
12511 instruct safePoint_poll(rFlagsReg cr)
12512 %{
12513   match(SafePoint);
12514   effect(KILL cr);
12515 
12516   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12517             "# Safepoint: poll for GC" %}
12518   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12519   ins_cost(125);
12520   ins_encode(enc_safepoint_poll);
12521   ins_pipe(ialu_reg_mem);
12522 %}
12523 
12524 // ============================================================================
12525 // Procedure Call/Return Instructions
12526 // Call Java Static Instruction
12527 // Note: If this code changes, the corresponding ret_addr_offset() and
12528 //       compute_padding() functions will have to be adjusted.
12529 instruct CallStaticJavaDirect(method meth)
12530 %{
12531   match(CallStaticJava);
12532   effect(USE meth);
12533 
12534   ins_cost(300);
12535   format %{ "call,static " %}
12536   opcode(0xE8); /* E8 cd */
12537   ins_encode(Java_Static_Call(meth), call_epilog);
12538   ins_pipe(pipe_slow);
12539   ins_pc_relative(1);
12540   ins_alignment(4);
12541 %}
12542 
12543 // Call Java Dynamic Instruction
12544 // Note: If this code changes, the corresponding ret_addr_offset() and
12545 //       compute_padding() functions will have to be adjusted.
12546 instruct CallDynamicJavaDirect(method meth)
12547 %{
12548   match(CallDynamicJava);
12549   effect(USE meth);
12550 
12551   ins_cost(300);
12552   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12553             "call,dynamic " %}
12554   opcode(0xE8); /* E8 cd */
12555   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12556   ins_pipe(pipe_slow);
12557   ins_pc_relative(1);
12558   ins_alignment(4);
12559 %}
12560 
12561 // Call Runtime Instruction
12562 instruct CallRuntimeDirect(method meth)
12563 %{
12564   match(CallRuntime);
12565   effect(USE meth);
12566 
12567   ins_cost(300);
12568   format %{ "call,runtime " %}
12569   opcode(0xE8); /* E8 cd */
12570   ins_encode(Java_To_Runtime(meth));
12571   ins_pipe(pipe_slow);
12572   ins_pc_relative(1);
12573 %}
12574 
12575 // Call runtime without safepoint
12576 instruct CallLeafDirect(method meth)
12577 %{
12578   match(CallLeaf);
12579   effect(USE meth);
12580 
12581   ins_cost(300);
12582   format %{ "call_leaf,runtime " %}
12583   opcode(0xE8); /* E8 cd */
12584   ins_encode(Java_To_Runtime(meth));
12585   ins_pipe(pipe_slow);
12586   ins_pc_relative(1);
12587 %}
12588 
12589 // Call runtime without safepoint
12590 instruct CallLeafNoFPDirect(method meth)
12591 %{
12592   match(CallLeafNoFP);
12593   effect(USE meth);
12594 
12595   ins_cost(300);
12596   format %{ "call_leaf_nofp,runtime " %}
12597   opcode(0xE8); /* E8 cd */
12598   ins_encode(Java_To_Runtime(meth));
12599   ins_pipe(pipe_slow);
12600   ins_pc_relative(1);
12601 %}
12602 
12603 // Return Instruction
12604 // Remove the return address & jump to it.
12605 // Notice: We always emit a nop after a ret to make sure there is room
12606 // for safepoint patching
12607 instruct Ret()
12608 %{
12609   match(Return);
12610 
12611   format %{ "ret" %}
12612   opcode(0xC3);
12613   ins_encode(OpcP);
12614   ins_pipe(pipe_jmp);
12615 %}
12616 
12617 // Tail Call; Jump from runtime stub to Java code.
12618 // Also known as an 'interprocedural jump'.
12619 // Target of jump will eventually return to caller.
12620 // TailJump below removes the return address.
12621 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12622 %{
12623   match(TailCall jump_target method_oop);
12624 
12625   ins_cost(300);
12626   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12627   opcode(0xFF, 0x4); /* Opcode FF /4 */
12628   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12629   ins_pipe(pipe_jmp);
12630 %}
12631 
12632 // Tail Jump; remove the return address; jump to target.
12633 // TailCall above leaves the return address around.
12634 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12635 %{
12636   match(TailJump jump_target ex_oop);
12637 
12638   ins_cost(300);
12639   format %{ "popq    rdx\t# pop return address\n\t"
12640             "jmp     $jump_target" %}
12641   opcode(0xFF, 0x4); /* Opcode FF /4 */
12642   ins_encode(Opcode(0x5a), // popq rdx
12643              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12644   ins_pipe(pipe_jmp);
12645 %}
12646 
12647 // Create exception oop: created by stack-crawling runtime code.
12648 // Created exception is now available to this handler, and is setup
12649 // just prior to jumping to this handler.  No code emitted.
12650 instruct CreateException(rax_RegP ex_oop)
12651 %{
12652   match(Set ex_oop (CreateEx));
12653 
12654   size(0);
12655   // use the following format syntax
12656   format %{ "# exception oop is in rax; no code emitted" %}
12657   ins_encode();
12658   ins_pipe(empty);
12659 %}
12660 
12661 // Rethrow exception:
12662 // The exception oop will come in the first argument position.
12663 // Then JUMP (not call) to the rethrow stub code.
12664 instruct RethrowException()
12665 %{
12666   match(Rethrow);
12667 
12668   // use the following format syntax
12669   format %{ "jmp     rethrow_stub" %}
12670   ins_encode(enc_rethrow);
12671   ins_pipe(pipe_jmp);
12672 %}
12673 
12674 
12675 //----------PEEPHOLE RULES-----------------------------------------------------
12676 // These must follow all instruction definitions as they use the names
12677 // defined in the instructions definitions.
12678 //
12679 // peepmatch ( root_instr_name [preceding_instruction]* );
12680 //
12681 // peepconstraint %{
12682 // (instruction_number.operand_name relational_op instruction_number.operand_name
12683 //  [, ...] );
12684 // // instruction numbers are zero-based using left to right order in peepmatch
12685 //
12686 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12687 // // provide an instruction_number.operand_name for each operand that appears
12688 // // in the replacement instruction's match rule
12689 //
12690 // ---------VM FLAGS---------------------------------------------------------
12691 //
12692 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12693 //
12694 // Each peephole rule is given an identifying number starting with zero and
12695 // increasing by one in the order seen by the parser.  An individual peephole
12696 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12697 // on the command-line.
12698 //
12699 // ---------CURRENT LIMITATIONS----------------------------------------------
12700 //
12701 // Only match adjacent instructions in same basic block
12702 // Only equality constraints
12703 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12704 // Only one replacement instruction
12705 //
12706 // ---------EXAMPLE----------------------------------------------------------
12707 //
12708 // // pertinent parts of existing instructions in architecture description
12709 // instruct movI(rRegI dst, rRegI src)
12710 // %{
12711 //   match(Set dst (CopyI src));
12712 // %}
12713 //
12714 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12715 // %{
12716 //   match(Set dst (AddI dst src));
12717 //   effect(KILL cr);
12718 // %}
12719 //
12720 // // Change (inc mov) to lea
12721 // peephole %{
12722 //   // increment preceeded by register-register move
12723 //   peepmatch ( incI_rReg movI );
12724 //   // require that the destination register of the increment
12725 //   // match the destination register of the move
12726 //   peepconstraint ( 0.dst == 1.dst );
12727 //   // construct a replacement instruction that sets
12728 //   // the destination to ( move's source register + one )
12729 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12730 // %}
12731 //
12732 
12733 // Implementation no longer uses movX instructions since
12734 // machine-independent system no longer uses CopyX nodes.
12735 //
12736 // peephole
12737 // %{
12738 //   peepmatch (incI_rReg movI);
12739 //   peepconstraint (0.dst == 1.dst);
12740 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12741 // %}
12742 
12743 // peephole
12744 // %{
12745 //   peepmatch (decI_rReg movI);
12746 //   peepconstraint (0.dst == 1.dst);
12747 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12748 // %}
12749 
12750 // peephole
12751 // %{
12752 //   peepmatch (addI_rReg_imm movI);
12753 //   peepconstraint (0.dst == 1.dst);
12754 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12755 // %}
12756 
12757 // peephole
12758 // %{
12759 //   peepmatch (incL_rReg movL);
12760 //   peepconstraint (0.dst == 1.dst);
12761 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12762 // %}
12763 
12764 // peephole
12765 // %{
12766 //   peepmatch (decL_rReg movL);
12767 //   peepconstraint (0.dst == 1.dst);
12768 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12769 // %}
12770 
12771 // peephole
12772 // %{
12773 //   peepmatch (addL_rReg_imm movL);
12774 //   peepconstraint (0.dst == 1.dst);
12775 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12776 // %}
12777 
12778 // peephole
12779 // %{
12780 //   peepmatch (addP_rReg_imm movP);
12781 //   peepconstraint (0.dst == 1.dst);
12782 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12783 // %}
12784 
12785 // // Change load of spilled value to only a spill
12786 // instruct storeI(memory mem, rRegI src)
12787 // %{
12788 //   match(Set mem (StoreI mem src));
12789 // %}
12790 //
12791 // instruct loadI(rRegI dst, memory mem)
12792 // %{
12793 //   match(Set dst (LoadI mem));
12794 // %}
12795 //
12796 
12797 peephole
12798 %{
12799   peepmatch (loadI storeI);
12800   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12801   peepreplace (storeI(1.mem 1.mem 1.src));
12802 %}
12803 
12804 peephole
12805 %{
12806   peepmatch (loadL storeL);
12807   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12808   peepreplace (storeL(1.mem 1.mem 1.src));
12809 %}
12810 
12811 //----------SMARTSPILL RULES---------------------------------------------------
12812 // These must follow all instruction definitions as they use the names
12813 // defined in the instructions definitions.