1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 623 {
 624   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 625   *(cbuf.code_end()) = c;
 626   cbuf.set_code_end(cbuf.code_end() + 1);
 627 }
 628 
 629 // EMIT_CC()
 630 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 631 {
 632   unsigned char c = (unsigned char) (f1 | f2);
 633   *(cbuf.code_end()) = c;
 634   cbuf.set_code_end(cbuf.code_end() + 1);
 635 }
 636 
 637 // EMIT_OPCODE()
 638 void emit_opcode(CodeBuffer &cbuf, int code)
 639 {
 640   *(cbuf.code_end()) = (unsigned char) code;
 641   cbuf.set_code_end(cbuf.code_end() + 1);
 642 }
 643 
 644 // EMIT_OPCODE() w/ relocation information
 645 void emit_opcode(CodeBuffer &cbuf,
 646                  int code, relocInfo::relocType reloc, int offset, int format)
 647 {
 648   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 649   emit_opcode(cbuf, code);
 650 }
 651 
 652 // EMIT_D8()
 653 void emit_d8(CodeBuffer &cbuf, int d8)
 654 {
 655   *(cbuf.code_end()) = (unsigned char) d8;
 656   cbuf.set_code_end(cbuf.code_end() + 1);
 657 }
 658 
 659 // EMIT_D16()
 660 void emit_d16(CodeBuffer &cbuf, int d16)
 661 {
 662   *((short *)(cbuf.code_end())) = d16;
 663   cbuf.set_code_end(cbuf.code_end() + 2);
 664 }
 665 
 666 // EMIT_D32()
 667 void emit_d32(CodeBuffer &cbuf, int d32)
 668 {
 669   *((int *)(cbuf.code_end())) = d32;
 670   cbuf.set_code_end(cbuf.code_end() + 4);
 671 }
 672 
 673 // EMIT_D64()
 674 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 675 {
 676   *((int64_t*) (cbuf.code_end())) = d64;
 677   cbuf.set_code_end(cbuf.code_end() + 8);
 678 }
 679 
 680 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 681 void emit_d32_reloc(CodeBuffer& cbuf,
 682                     int d32,
 683                     relocInfo::relocType reloc,
 684                     int format)
 685 {
 686   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 687   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 688 
 689   *((int*) (cbuf.code_end())) = d32;
 690   cbuf.set_code_end(cbuf.code_end() + 4);
 691 }
 692 
 693 // emit 32 bit value and construct relocation entry from RelocationHolder
 694 void emit_d32_reloc(CodeBuffer& cbuf,
 695                     int d32,
 696                     RelocationHolder const& rspec,
 697                     int format)
 698 {
 699 #ifdef ASSERT
 700   if (rspec.reloc()->type() == relocInfo::oop_type &&
 701       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 702     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 703   }
 704 #endif
 705   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 706 
 707   *((int* )(cbuf.code_end())) = d32;
 708   cbuf.set_code_end(cbuf.code_end() + 4);
 709 }
 710 
 711 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 712   address next_ip = cbuf.code_end() + 4;
 713   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 714                  external_word_Relocation::spec(addr),
 715                  RELOC_DISP32);
 716 }
 717 
 718 
 719 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 720 void emit_d64_reloc(CodeBuffer& cbuf,
 721                     int64_t d64,
 722                     relocInfo::relocType reloc,
 723                     int format)
 724 {
 725   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 726 
 727   *((int64_t*) (cbuf.code_end())) = d64;
 728   cbuf.set_code_end(cbuf.code_end() + 8);
 729 }
 730 
 731 // emit 64 bit value and construct relocation entry from RelocationHolder
 732 void emit_d64_reloc(CodeBuffer& cbuf,
 733                     int64_t d64,
 734                     RelocationHolder const& rspec,
 735                     int format)
 736 {
 737 #ifdef ASSERT
 738   if (rspec.reloc()->type() == relocInfo::oop_type &&
 739       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 740     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 741            "cannot embed scavengable oops in code");
 742   }
 743 #endif
 744   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 745 
 746   *((int64_t*) (cbuf.code_end())) = d64;
 747   cbuf.set_code_end(cbuf.code_end() + 8);
 748 }
 749 
 750 // Access stack slot for load or store
 751 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 752 {
 753   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 754   if (-0x80 <= disp && disp < 0x80) {
 755     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 756     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 757     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 758   } else {
 759     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 760     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 761     emit_d32(cbuf, disp);     // Displacement // R/M byte
 762   }
 763 }
 764 
 765    // rRegI ereg, memory mem) %{    // emit_reg_mem
 766 void encode_RegMem(CodeBuffer &cbuf,
 767                    int reg,
 768                    int base, int index, int scale, int disp, bool disp_is_oop)
 769 {
 770   assert(!disp_is_oop, "cannot have disp");
 771   int regenc = reg & 7;
 772   int baseenc = base & 7;
 773   int indexenc = index & 7;
 774 
 775   // There is no index & no scale, use form without SIB byte
 776   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 777     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 778     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 779       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 780     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 781       // If 8-bit displacement, mode 0x1
 782       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 783       emit_d8(cbuf, disp);
 784     } else {
 785       // If 32-bit displacement
 786       if (base == -1) { // Special flag for absolute address
 787         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 788         if (disp_is_oop) {
 789           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 790         } else {
 791           emit_d32(cbuf, disp);
 792         }
 793       } else {
 794         // Normal base + offset
 795         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 796         if (disp_is_oop) {
 797           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 798         } else {
 799           emit_d32(cbuf, disp);
 800         }
 801       }
 802     }
 803   } else {
 804     // Else, encode with the SIB byte
 805     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 806     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 807       // If no displacement
 808       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 809       emit_rm(cbuf, scale, indexenc, baseenc);
 810     } else {
 811       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 812         // If 8-bit displacement, mode 0x1
 813         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 814         emit_rm(cbuf, scale, indexenc, baseenc);
 815         emit_d8(cbuf, disp);
 816       } else {
 817         // If 32-bit displacement
 818         if (base == 0x04 ) {
 819           emit_rm(cbuf, 0x2, regenc, 0x4);
 820           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 821         } else {
 822           emit_rm(cbuf, 0x2, regenc, 0x4);
 823           emit_rm(cbuf, scale, indexenc, baseenc); // *
 824         }
 825         if (disp_is_oop) {
 826           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 827         } else {
 828           emit_d32(cbuf, disp);
 829         }
 830       }
 831     }
 832   }
 833 }
 834 
 835 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 836 {
 837   if (dstenc != srcenc) {
 838     if (dstenc < 8) {
 839       if (srcenc >= 8) {
 840         emit_opcode(cbuf, Assembler::REX_B);
 841         srcenc -= 8;
 842       }
 843     } else {
 844       if (srcenc < 8) {
 845         emit_opcode(cbuf, Assembler::REX_R);
 846       } else {
 847         emit_opcode(cbuf, Assembler::REX_RB);
 848         srcenc -= 8;
 849       }
 850       dstenc -= 8;
 851     }
 852 
 853     emit_opcode(cbuf, 0x8B);
 854     emit_rm(cbuf, 0x3, dstenc, srcenc);
 855   }
 856 }
 857 
 858 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 859   if( dst_encoding == src_encoding ) {
 860     // reg-reg copy, use an empty encoding
 861   } else {
 862     MacroAssembler _masm(&cbuf);
 863 
 864     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 865   }
 866 }
 867 
 868 
 869 //=============================================================================
 870 #ifndef PRODUCT
 871 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 872 {
 873   Compile* C = ra_->C;
 874 
 875   int framesize = C->frame_slots() << LogBytesPerInt;
 876   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 877   // Remove wordSize for return adr already pushed
 878   // and another for the RBP we are going to save
 879   framesize -= 2*wordSize;
 880   bool need_nop = true;
 881 
 882   // Calls to C2R adapters often do not accept exceptional returns.
 883   // We require that their callers must bang for them.  But be
 884   // careful, because some VM calls (such as call site linkage) can
 885   // use several kilobytes of stack.  But the stack safety zone should
 886   // account for that.  See bugs 4446381, 4468289, 4497237.
 887   if (C->need_stack_bang(framesize)) {
 888     st->print_cr("# stack bang"); st->print("\t");
 889     need_nop = false;
 890   }
 891   st->print_cr("pushq   rbp"); st->print("\t");
 892 
 893   if (VerifyStackAtCalls) {
 894     // Majik cookie to verify stack depth
 895     st->print_cr("pushq   0xffffffffbadb100d"
 896                   "\t# Majik cookie for stack depth check");
 897     st->print("\t");
 898     framesize -= wordSize; // Remove 2 for cookie
 899     need_nop = false;
 900   }
 901 
 902   if (framesize) {
 903     st->print("subq    rsp, #%d\t# Create frame", framesize);
 904     if (framesize < 0x80 && need_nop) {
 905       st->print("\n\tnop\t# nop for patch_verified_entry");
 906     }
 907   }
 908 }
 909 #endif
 910 
 911 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 912 {
 913   Compile* C = ra_->C;
 914 
 915   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 916   // NativeJump::patch_verified_entry will be able to patch out the entry
 917   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 918   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 919   // 6 bytes. So if we don't do the fldcw or the push then we must
 920   // use the 6 byte frame allocation even if we have no frame. :-(
 921   // If method sets FPU control word do it now
 922 
 923   int framesize = C->frame_slots() << LogBytesPerInt;
 924   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 925   // Remove wordSize for return adr already pushed
 926   // and another for the RBP we are going to save
 927   framesize -= 2*wordSize;
 928   bool need_nop = true;
 929 
 930   // Calls to C2R adapters often do not accept exceptional returns.
 931   // We require that their callers must bang for them.  But be
 932   // careful, because some VM calls (such as call site linkage) can
 933   // use several kilobytes of stack.  But the stack safety zone should
 934   // account for that.  See bugs 4446381, 4468289, 4497237.
 935   if (C->need_stack_bang(framesize)) {
 936     MacroAssembler masm(&cbuf);
 937     masm.generate_stack_overflow_check(framesize);
 938     need_nop = false;
 939   }
 940 
 941   // We always push rbp so that on return to interpreter rbp will be
 942   // restored correctly and we can correct the stack.
 943   emit_opcode(cbuf, 0x50 | RBP_enc);
 944 
 945   if (VerifyStackAtCalls) {
 946     // Majik cookie to verify stack depth
 947     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 948     emit_d32(cbuf, 0xbadb100d);
 949     framesize -= wordSize; // Remove 2 for cookie
 950     need_nop = false;
 951   }
 952 
 953   if (framesize) {
 954     emit_opcode(cbuf, Assembler::REX_W);
 955     if (framesize < 0x80) {
 956       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 957       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 958       emit_d8(cbuf, framesize);
 959       if (need_nop) {
 960         emit_opcode(cbuf, 0x90); // nop
 961       }
 962     } else {
 963       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 964       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 965       emit_d32(cbuf, framesize);
 966     }
 967   }
 968 
 969   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 970 
 971 #ifdef ASSERT
 972   if (VerifyStackAtCalls) {
 973     Label L;
 974     MacroAssembler masm(&cbuf);
 975     masm.push(rax);
 976     masm.mov(rax, rsp);
 977     masm.andptr(rax, StackAlignmentInBytes-1);
 978     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 979     masm.pop(rax);
 980     masm.jcc(Assembler::equal, L);
 981     masm.stop("Stack is not properly aligned!");
 982     masm.bind(L);
 983   }
 984 #endif
 985 }
 986 
 987 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 988 {
 989   return MachNode::size(ra_); // too many variables; just compute it
 990                               // the hard way
 991 }
 992 
 993 int MachPrologNode::reloc() const
 994 {
 995   return 0; // a large enough number
 996 }
 997 
 998 //=============================================================================
 999 #ifndef PRODUCT
1000 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1001 {
1002   Compile* C = ra_->C;
1003   int framesize = C->frame_slots() << LogBytesPerInt;
1004   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1005   // Remove word for return adr already pushed
1006   // and RBP
1007   framesize -= 2*wordSize;
1008 
1009   if (framesize) {
1010     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1011     st->print("\t");
1012   }
1013 
1014   st->print_cr("popq\trbp");
1015   if (do_polling() && C->is_method_compilation()) {
1016     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1017                   "# Safepoint: poll for GC");
1018     st->print("\t");
1019   }
1020 }
1021 #endif
1022 
1023 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1024 {
1025   Compile* C = ra_->C;
1026   int framesize = C->frame_slots() << LogBytesPerInt;
1027   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1028   // Remove word for return adr already pushed
1029   // and RBP
1030   framesize -= 2*wordSize;
1031 
1032   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1033 
1034   if (framesize) {
1035     emit_opcode(cbuf, Assembler::REX_W);
1036     if (framesize < 0x80) {
1037       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1038       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1039       emit_d8(cbuf, framesize);
1040     } else {
1041       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1042       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1043       emit_d32(cbuf, framesize);
1044     }
1045   }
1046 
1047   // popq rbp
1048   emit_opcode(cbuf, 0x58 | RBP_enc);
1049 
1050   if (do_polling() && C->is_method_compilation()) {
1051     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1052     // XXX reg_mem doesn't support RIP-relative addressing yet
1053     cbuf.set_inst_mark();
1054     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1055     emit_opcode(cbuf, 0x85); // testl
1056     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1057     // cbuf.inst_mark() is beginning of instruction
1058     emit_d32_reloc(cbuf, os::get_polling_page());
1059 //                    relocInfo::poll_return_type,
1060   }
1061 }
1062 
1063 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1064 {
1065   Compile* C = ra_->C;
1066   int framesize = C->frame_slots() << LogBytesPerInt;
1067   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1068   // Remove word for return adr already pushed
1069   // and RBP
1070   framesize -= 2*wordSize;
1071 
1072   uint size = 0;
1073 
1074   if (do_polling() && C->is_method_compilation()) {
1075     size += 6;
1076   }
1077 
1078   // count popq rbp
1079   size++;
1080 
1081   if (framesize) {
1082     if (framesize < 0x80) {
1083       size += 4;
1084     } else if (framesize) {
1085       size += 7;
1086     }
1087   }
1088 
1089   return size;
1090 }
1091 
1092 int MachEpilogNode::reloc() const
1093 {
1094   return 2; // a large enough number
1095 }
1096 
1097 const Pipeline* MachEpilogNode::pipeline() const
1098 {
1099   return MachNode::pipeline_class();
1100 }
1101 
1102 int MachEpilogNode::safepoint_offset() const
1103 {
1104   return 0;
1105 }
1106 
1107 //=============================================================================
1108 
1109 enum RC {
1110   rc_bad,
1111   rc_int,
1112   rc_float,
1113   rc_stack
1114 };
1115 
1116 static enum RC rc_class(OptoReg::Name reg)
1117 {
1118   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1119 
1120   if (OptoReg::is_stack(reg)) return rc_stack;
1121 
1122   VMReg r = OptoReg::as_VMReg(reg);
1123 
1124   if (r->is_Register()) return rc_int;
1125 
1126   assert(r->is_XMMRegister(), "must be");
1127   return rc_float;
1128 }
1129 
1130 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1131                                        PhaseRegAlloc* ra_,
1132                                        bool do_size,
1133                                        outputStream* st) const
1134 {
1135 
1136   // Get registers to move
1137   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1138   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1139   OptoReg::Name dst_second = ra_->get_reg_second(this);
1140   OptoReg::Name dst_first = ra_->get_reg_first(this);
1141 
1142   enum RC src_second_rc = rc_class(src_second);
1143   enum RC src_first_rc = rc_class(src_first);
1144   enum RC dst_second_rc = rc_class(dst_second);
1145   enum RC dst_first_rc = rc_class(dst_first);
1146 
1147   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1148          "must move at least 1 register" );
1149 
1150   if (src_first == dst_first && src_second == dst_second) {
1151     // Self copy, no move
1152     return 0;
1153   } else if (src_first_rc == rc_stack) {
1154     // mem ->
1155     if (dst_first_rc == rc_stack) {
1156       // mem -> mem
1157       assert(src_second != dst_first, "overlap");
1158       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1159           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1160         // 64-bit
1161         int src_offset = ra_->reg2offset(src_first);
1162         int dst_offset = ra_->reg2offset(dst_first);
1163         if (cbuf) {
1164           emit_opcode(*cbuf, 0xFF);
1165           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1166 
1167           emit_opcode(*cbuf, 0x8F);
1168           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1169 
1170 #ifndef PRODUCT
1171         } else if (!do_size) {
1172           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1173                      "popq    [rsp + #%d]",
1174                      src_offset,
1175                      dst_offset);
1176 #endif
1177         }
1178         return
1179           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1180           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1181       } else {
1182         // 32-bit
1183         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1184         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1185         // No pushl/popl, so:
1186         int src_offset = ra_->reg2offset(src_first);
1187         int dst_offset = ra_->reg2offset(dst_first);
1188         if (cbuf) {
1189           emit_opcode(*cbuf, Assembler::REX_W);
1190           emit_opcode(*cbuf, 0x89);
1191           emit_opcode(*cbuf, 0x44);
1192           emit_opcode(*cbuf, 0x24);
1193           emit_opcode(*cbuf, 0xF8);
1194 
1195           emit_opcode(*cbuf, 0x8B);
1196           encode_RegMem(*cbuf,
1197                         RAX_enc,
1198                         RSP_enc, 0x4, 0, src_offset,
1199                         false);
1200 
1201           emit_opcode(*cbuf, 0x89);
1202           encode_RegMem(*cbuf,
1203                         RAX_enc,
1204                         RSP_enc, 0x4, 0, dst_offset,
1205                         false);
1206 
1207           emit_opcode(*cbuf, Assembler::REX_W);
1208           emit_opcode(*cbuf, 0x8B);
1209           emit_opcode(*cbuf, 0x44);
1210           emit_opcode(*cbuf, 0x24);
1211           emit_opcode(*cbuf, 0xF8);
1212 
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1216                      "movl    rax, [rsp + #%d]\n\t"
1217                      "movl    [rsp + #%d], rax\n\t"
1218                      "movq    rax, [rsp - #8]",
1219                      src_offset,
1220                      dst_offset);
1221 #endif
1222         }
1223         return
1224           5 + // movq
1225           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1226           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1227           5; // movq
1228       }
1229     } else if (dst_first_rc == rc_int) {
1230       // mem -> gpr
1231       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1232           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1233         // 64-bit
1234         int offset = ra_->reg2offset(src_first);
1235         if (cbuf) {
1236           if (Matcher::_regEncode[dst_first] < 8) {
1237             emit_opcode(*cbuf, Assembler::REX_W);
1238           } else {
1239             emit_opcode(*cbuf, Assembler::REX_WR);
1240           }
1241           emit_opcode(*cbuf, 0x8B);
1242           encode_RegMem(*cbuf,
1243                         Matcher::_regEncode[dst_first],
1244                         RSP_enc, 0x4, 0, offset,
1245                         false);
1246 #ifndef PRODUCT
1247         } else if (!do_size) {
1248           st->print("movq    %s, [rsp + #%d]\t# spill",
1249                      Matcher::regName[dst_first],
1250                      offset);
1251 #endif
1252         }
1253         return
1254           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1255       } else {
1256         // 32-bit
1257         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1258         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1259         int offset = ra_->reg2offset(src_first);
1260         if (cbuf) {
1261           if (Matcher::_regEncode[dst_first] >= 8) {
1262             emit_opcode(*cbuf, Assembler::REX_R);
1263           }
1264           emit_opcode(*cbuf, 0x8B);
1265           encode_RegMem(*cbuf,
1266                         Matcher::_regEncode[dst_first],
1267                         RSP_enc, 0x4, 0, offset,
1268                         false);
1269 #ifndef PRODUCT
1270         } else if (!do_size) {
1271           st->print("movl    %s, [rsp + #%d]\t# spill",
1272                      Matcher::regName[dst_first],
1273                      offset);
1274 #endif
1275         }
1276         return
1277           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1278           ((Matcher::_regEncode[dst_first] < 8)
1279            ? 3
1280            : 4); // REX
1281       }
1282     } else if (dst_first_rc == rc_float) {
1283       // mem-> xmm
1284       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1285           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1286         // 64-bit
1287         int offset = ra_->reg2offset(src_first);
1288         if (cbuf) {
1289           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1290           if (Matcher::_regEncode[dst_first] >= 8) {
1291             emit_opcode(*cbuf, Assembler::REX_R);
1292           }
1293           emit_opcode(*cbuf, 0x0F);
1294           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1295           encode_RegMem(*cbuf,
1296                         Matcher::_regEncode[dst_first],
1297                         RSP_enc, 0x4, 0, offset,
1298                         false);
1299 #ifndef PRODUCT
1300         } else if (!do_size) {
1301           st->print("%s  %s, [rsp + #%d]\t# spill",
1302                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1303                      Matcher::regName[dst_first],
1304                      offset);
1305 #endif
1306         }
1307         return
1308           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1309           ((Matcher::_regEncode[dst_first] < 8)
1310            ? 5
1311            : 6); // REX
1312       } else {
1313         // 32-bit
1314         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1315         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1316         int offset = ra_->reg2offset(src_first);
1317         if (cbuf) {
1318           emit_opcode(*cbuf, 0xF3);
1319           if (Matcher::_regEncode[dst_first] >= 8) {
1320             emit_opcode(*cbuf, Assembler::REX_R);
1321           }
1322           emit_opcode(*cbuf, 0x0F);
1323           emit_opcode(*cbuf, 0x10);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[dst_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movss   %s, [rsp + #%d]\t# spill",
1331                      Matcher::regName[dst_first],
1332                      offset);
1333 #endif
1334         }
1335         return
1336           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1337           ((Matcher::_regEncode[dst_first] < 8)
1338            ? 5
1339            : 6); // REX
1340       }
1341     }
1342   } else if (src_first_rc == rc_int) {
1343     // gpr ->
1344     if (dst_first_rc == rc_stack) {
1345       // gpr -> mem
1346       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1347           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1348         // 64-bit
1349         int offset = ra_->reg2offset(dst_first);
1350         if (cbuf) {
1351           if (Matcher::_regEncode[src_first] < 8) {
1352             emit_opcode(*cbuf, Assembler::REX_W);
1353           } else {
1354             emit_opcode(*cbuf, Assembler::REX_WR);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movq    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1369       } else {
1370         // 32-bit
1371         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1372         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1373         int offset = ra_->reg2offset(dst_first);
1374         if (cbuf) {
1375           if (Matcher::_regEncode[src_first] >= 8) {
1376             emit_opcode(*cbuf, Assembler::REX_R);
1377           }
1378           emit_opcode(*cbuf, 0x89);
1379           encode_RegMem(*cbuf,
1380                         Matcher::_regEncode[src_first],
1381                         RSP_enc, 0x4, 0, offset,
1382                         false);
1383 #ifndef PRODUCT
1384         } else if (!do_size) {
1385           st->print("movl    [rsp + #%d], %s\t# spill",
1386                      offset,
1387                      Matcher::regName[src_first]);
1388 #endif
1389         }
1390         return
1391           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1392           ((Matcher::_regEncode[src_first] < 8)
1393            ? 3
1394            : 4); // REX
1395       }
1396     } else if (dst_first_rc == rc_int) {
1397       // gpr -> gpr
1398       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1399           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1400         // 64-bit
1401         if (cbuf) {
1402           if (Matcher::_regEncode[dst_first] < 8) {
1403             if (Matcher::_regEncode[src_first] < 8) {
1404               emit_opcode(*cbuf, Assembler::REX_W);
1405             } else {
1406               emit_opcode(*cbuf, Assembler::REX_WB);
1407             }
1408           } else {
1409             if (Matcher::_regEncode[src_first] < 8) {
1410               emit_opcode(*cbuf, Assembler::REX_WR);
1411             } else {
1412               emit_opcode(*cbuf, Assembler::REX_WRB);
1413             }
1414           }
1415           emit_opcode(*cbuf, 0x8B);
1416           emit_rm(*cbuf, 0x3,
1417                   Matcher::_regEncode[dst_first] & 7,
1418                   Matcher::_regEncode[src_first] & 7);
1419 #ifndef PRODUCT
1420         } else if (!do_size) {
1421           st->print("movq    %s, %s\t# spill",
1422                      Matcher::regName[dst_first],
1423                      Matcher::regName[src_first]);
1424 #endif
1425         }
1426         return 3; // REX
1427       } else {
1428         // 32-bit
1429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1431         if (cbuf) {
1432           if (Matcher::_regEncode[dst_first] < 8) {
1433             if (Matcher::_regEncode[src_first] >= 8) {
1434               emit_opcode(*cbuf, Assembler::REX_B);
1435             }
1436           } else {
1437             if (Matcher::_regEncode[src_first] < 8) {
1438               emit_opcode(*cbuf, Assembler::REX_R);
1439             } else {
1440               emit_opcode(*cbuf, Assembler::REX_RB);
1441             }
1442           }
1443           emit_opcode(*cbuf, 0x8B);
1444           emit_rm(*cbuf, 0x3,
1445                   Matcher::_regEncode[dst_first] & 7,
1446                   Matcher::_regEncode[src_first] & 7);
1447 #ifndef PRODUCT
1448         } else if (!do_size) {
1449           st->print("movl    %s, %s\t# spill",
1450                      Matcher::regName[dst_first],
1451                      Matcher::regName[src_first]);
1452 #endif
1453         }
1454         return
1455           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1456           ? 2
1457           : 3; // REX
1458       }
1459     } else if (dst_first_rc == rc_float) {
1460       // gpr -> xmm
1461       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1462           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1463         // 64-bit
1464         if (cbuf) {
1465           emit_opcode(*cbuf, 0x66);
1466           if (Matcher::_regEncode[dst_first] < 8) {
1467             if (Matcher::_regEncode[src_first] < 8) {
1468               emit_opcode(*cbuf, Assembler::REX_W);
1469             } else {
1470               emit_opcode(*cbuf, Assembler::REX_WB);
1471             }
1472           } else {
1473             if (Matcher::_regEncode[src_first] < 8) {
1474               emit_opcode(*cbuf, Assembler::REX_WR);
1475             } else {
1476               emit_opcode(*cbuf, Assembler::REX_WRB);
1477             }
1478           }
1479           emit_opcode(*cbuf, 0x0F);
1480           emit_opcode(*cbuf, 0x6E);
1481           emit_rm(*cbuf, 0x3,
1482                   Matcher::_regEncode[dst_first] & 7,
1483                   Matcher::_regEncode[src_first] & 7);
1484 #ifndef PRODUCT
1485         } else if (!do_size) {
1486           st->print("movdq   %s, %s\t# spill",
1487                      Matcher::regName[dst_first],
1488                      Matcher::regName[src_first]);
1489 #endif
1490         }
1491         return 5; // REX
1492       } else {
1493         // 32-bit
1494         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1495         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1496         if (cbuf) {
1497           emit_opcode(*cbuf, 0x66);
1498           if (Matcher::_regEncode[dst_first] < 8) {
1499             if (Matcher::_regEncode[src_first] >= 8) {
1500               emit_opcode(*cbuf, Assembler::REX_B);
1501             }
1502           } else {
1503             if (Matcher::_regEncode[src_first] < 8) {
1504               emit_opcode(*cbuf, Assembler::REX_R);
1505             } else {
1506               emit_opcode(*cbuf, Assembler::REX_RB);
1507             }
1508           }
1509           emit_opcode(*cbuf, 0x0F);
1510           emit_opcode(*cbuf, 0x6E);
1511           emit_rm(*cbuf, 0x3,
1512                   Matcher::_regEncode[dst_first] & 7,
1513                   Matcher::_regEncode[src_first] & 7);
1514 #ifndef PRODUCT
1515         } else if (!do_size) {
1516           st->print("movdl   %s, %s\t# spill",
1517                      Matcher::regName[dst_first],
1518                      Matcher::regName[src_first]);
1519 #endif
1520         }
1521         return
1522           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1523           ? 4
1524           : 5; // REX
1525       }
1526     }
1527   } else if (src_first_rc == rc_float) {
1528     // xmm ->
1529     if (dst_first_rc == rc_stack) {
1530       // xmm -> mem
1531       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1532           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1533         // 64-bit
1534         int offset = ra_->reg2offset(dst_first);
1535         if (cbuf) {
1536           emit_opcode(*cbuf, 0xF2);
1537           if (Matcher::_regEncode[src_first] >= 8) {
1538               emit_opcode(*cbuf, Assembler::REX_R);
1539           }
1540           emit_opcode(*cbuf, 0x0F);
1541           emit_opcode(*cbuf, 0x11);
1542           encode_RegMem(*cbuf,
1543                         Matcher::_regEncode[src_first],
1544                         RSP_enc, 0x4, 0, offset,
1545                         false);
1546 #ifndef PRODUCT
1547         } else if (!do_size) {
1548           st->print("movsd   [rsp + #%d], %s\t# spill",
1549                      offset,
1550                      Matcher::regName[src_first]);
1551 #endif
1552         }
1553         return
1554           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1555           ((Matcher::_regEncode[src_first] < 8)
1556            ? 5
1557            : 6); // REX
1558       } else {
1559         // 32-bit
1560         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1561         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1562         int offset = ra_->reg2offset(dst_first);
1563         if (cbuf) {
1564           emit_opcode(*cbuf, 0xF3);
1565           if (Matcher::_regEncode[src_first] >= 8) {
1566               emit_opcode(*cbuf, Assembler::REX_R);
1567           }
1568           emit_opcode(*cbuf, 0x0F);
1569           emit_opcode(*cbuf, 0x11);
1570           encode_RegMem(*cbuf,
1571                         Matcher::_regEncode[src_first],
1572                         RSP_enc, 0x4, 0, offset,
1573                         false);
1574 #ifndef PRODUCT
1575         } else if (!do_size) {
1576           st->print("movss   [rsp + #%d], %s\t# spill",
1577                      offset,
1578                      Matcher::regName[src_first]);
1579 #endif
1580         }
1581         return
1582           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1583           ((Matcher::_regEncode[src_first] < 8)
1584            ? 5
1585            : 6); // REX
1586       }
1587     } else if (dst_first_rc == rc_int) {
1588       // xmm -> gpr
1589       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1590           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1591         // 64-bit
1592         if (cbuf) {
1593           emit_opcode(*cbuf, 0x66);
1594           if (Matcher::_regEncode[dst_first] < 8) {
1595             if (Matcher::_regEncode[src_first] < 8) {
1596               emit_opcode(*cbuf, Assembler::REX_W);
1597             } else {
1598               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1599             }
1600           } else {
1601             if (Matcher::_regEncode[src_first] < 8) {
1602               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1603             } else {
1604               emit_opcode(*cbuf, Assembler::REX_WRB);
1605             }
1606           }
1607           emit_opcode(*cbuf, 0x0F);
1608           emit_opcode(*cbuf, 0x7E);
1609           emit_rm(*cbuf, 0x3,
1610                   Matcher::_regEncode[dst_first] & 7,
1611                   Matcher::_regEncode[src_first] & 7);
1612 #ifndef PRODUCT
1613         } else if (!do_size) {
1614           st->print("movdq   %s, %s\t# spill",
1615                      Matcher::regName[dst_first],
1616                      Matcher::regName[src_first]);
1617 #endif
1618         }
1619         return 5; // REX
1620       } else {
1621         // 32-bit
1622         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1623         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1624         if (cbuf) {
1625           emit_opcode(*cbuf, 0x66);
1626           if (Matcher::_regEncode[dst_first] < 8) {
1627             if (Matcher::_regEncode[src_first] >= 8) {
1628               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1629             }
1630           } else {
1631             if (Matcher::_regEncode[src_first] < 8) {
1632               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1633             } else {
1634               emit_opcode(*cbuf, Assembler::REX_RB);
1635             }
1636           }
1637           emit_opcode(*cbuf, 0x0F);
1638           emit_opcode(*cbuf, 0x7E);
1639           emit_rm(*cbuf, 0x3,
1640                   Matcher::_regEncode[dst_first] & 7,
1641                   Matcher::_regEncode[src_first] & 7);
1642 #ifndef PRODUCT
1643         } else if (!do_size) {
1644           st->print("movdl   %s, %s\t# spill",
1645                      Matcher::regName[dst_first],
1646                      Matcher::regName[src_first]);
1647 #endif
1648         }
1649         return
1650           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1651           ? 4
1652           : 5; // REX
1653       }
1654     } else if (dst_first_rc == rc_float) {
1655       // xmm -> xmm
1656       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1657           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1658         // 64-bit
1659         if (cbuf) {
1660           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1661           if (Matcher::_regEncode[dst_first] < 8) {
1662             if (Matcher::_regEncode[src_first] >= 8) {
1663               emit_opcode(*cbuf, Assembler::REX_B);
1664             }
1665           } else {
1666             if (Matcher::_regEncode[src_first] < 8) {
1667               emit_opcode(*cbuf, Assembler::REX_R);
1668             } else {
1669               emit_opcode(*cbuf, Assembler::REX_RB);
1670             }
1671           }
1672           emit_opcode(*cbuf, 0x0F);
1673           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1674           emit_rm(*cbuf, 0x3,
1675                   Matcher::_regEncode[dst_first] & 7,
1676                   Matcher::_regEncode[src_first] & 7);
1677 #ifndef PRODUCT
1678         } else if (!do_size) {
1679           st->print("%s  %s, %s\t# spill",
1680                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1681                      Matcher::regName[dst_first],
1682                      Matcher::regName[src_first]);
1683 #endif
1684         }
1685         return
1686           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1687           ? 4
1688           : 5; // REX
1689       } else {
1690         // 32-bit
1691         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1692         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1693         if (cbuf) {
1694           if (!UseXmmRegToRegMoveAll)
1695             emit_opcode(*cbuf, 0xF3);
1696           if (Matcher::_regEncode[dst_first] < 8) {
1697             if (Matcher::_regEncode[src_first] >= 8) {
1698               emit_opcode(*cbuf, Assembler::REX_B);
1699             }
1700           } else {
1701             if (Matcher::_regEncode[src_first] < 8) {
1702               emit_opcode(*cbuf, Assembler::REX_R);
1703             } else {
1704               emit_opcode(*cbuf, Assembler::REX_RB);
1705             }
1706           }
1707           emit_opcode(*cbuf, 0x0F);
1708           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1709           emit_rm(*cbuf, 0x3,
1710                   Matcher::_regEncode[dst_first] & 7,
1711                   Matcher::_regEncode[src_first] & 7);
1712 #ifndef PRODUCT
1713         } else if (!do_size) {
1714           st->print("%s  %s, %s\t# spill",
1715                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1716                      Matcher::regName[dst_first],
1717                      Matcher::regName[src_first]);
1718 #endif
1719         }
1720         return
1721           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1722           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1723           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1724       }
1725     }
1726   }
1727 
1728   assert(0," foo ");
1729   Unimplemented();
1730 
1731   return 0;
1732 }
1733 
1734 #ifndef PRODUCT
1735 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1736 {
1737   implementation(NULL, ra_, false, st);
1738 }
1739 #endif
1740 
1741 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1742 {
1743   implementation(&cbuf, ra_, false, NULL);
1744 }
1745 
1746 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1747 {
1748   return implementation(NULL, ra_, true, NULL);
1749 }
1750 
1751 //=============================================================================
1752 #ifndef PRODUCT
1753 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1754 {
1755   st->print("nop \t# %d bytes pad for loops and calls", _count);
1756 }
1757 #endif
1758 
1759 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1760 {
1761   MacroAssembler _masm(&cbuf);
1762   __ nop(_count);
1763 }
1764 
1765 uint MachNopNode::size(PhaseRegAlloc*) const
1766 {
1767   return _count;
1768 }
1769 
1770 
1771 //=============================================================================
1772 #ifndef PRODUCT
1773 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1774 {
1775   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1776   int reg = ra_->get_reg_first(this);
1777   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1778             Matcher::regName[reg], offset);
1779 }
1780 #endif
1781 
1782 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1783 {
1784   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1785   int reg = ra_->get_encode(this);
1786   if (offset >= 0x80) {
1787     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1788     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1789     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1790     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1791     emit_d32(cbuf, offset);
1792   } else {
1793     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1794     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1795     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1796     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1797     emit_d8(cbuf, offset);
1798   }
1799 }
1800 
1801 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1802 {
1803   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1804   return (offset < 0x80) ? 5 : 8; // REX
1805 }
1806 
1807 //=============================================================================
1808 
1809 // emit call stub, compiled java to interpreter
1810 void emit_java_to_interp(CodeBuffer& cbuf)
1811 {
1812   // Stub is fixed up when the corresponding call is converted from
1813   // calling compiled code to calling interpreted code.
1814   // movq rbx, 0
1815   // jmp -5 # to self
1816 
1817   address mark = cbuf.inst_mark();  // get mark within main instrs section
1818 
1819   // Note that the code buffer's inst_mark is always relative to insts.
1820   // That's why we must use the macroassembler to generate a stub.
1821   MacroAssembler _masm(&cbuf);
1822 
1823   address base =
1824   __ start_a_stub(Compile::MAX_stubs_size);
1825   if (base == NULL)  return;  // CodeBuffer::expand failed
1826   // static stub relocation stores the instruction address of the call
1827   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1828   // static stub relocation also tags the methodOop in the code-stream.
1829   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1830   // This is recognized as unresolved by relocs/nativeinst/ic code
1831   __ jump(RuntimeAddress(__ pc()));
1832 
1833   // Update current stubs pointer and restore code_end.
1834   __ end_a_stub();
1835 }
1836 
1837 // size of call stub, compiled java to interpretor
1838 uint size_java_to_interp()
1839 {
1840   return 15;  // movq (1+1+8); jmp (1+4)
1841 }
1842 
1843 // relocation entries for call stub, compiled java to interpretor
1844 uint reloc_java_to_interp()
1845 {
1846   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1847 }
1848 
1849 //=============================================================================
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   if (UseCompressedOops) {
1854     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1855     if (Universe::narrow_oop_shift() != 0) {
1856       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1857     }
1858     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1859   } else {
1860     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1861                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1862   }
1863   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1864   st->print_cr("\tnop");
1865   if (!OptoBreakpoint) {
1866     st->print_cr("\tnop");
1867   }
1868 }
1869 #endif
1870 
1871 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1872 {
1873   MacroAssembler masm(&cbuf);
1874 #ifdef ASSERT
1875   uint code_size = cbuf.code_size();
1876 #endif
1877   if (UseCompressedOops) {
1878     masm.load_klass(rscratch1, j_rarg0);
1879     masm.cmpptr(rax, rscratch1);
1880   } else {
1881     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1882   }
1883 
1884   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1885 
1886   /* WARNING these NOPs are critical so that verified entry point is properly
1887      aligned for patching by NativeJump::patch_verified_entry() */
1888   int nops_cnt = 1;
1889   if (!OptoBreakpoint) {
1890     // Leave space for int3
1891      nops_cnt += 1;
1892   }
1893   if (UseCompressedOops) {
1894     // ??? divisible by 4 is aligned?
1895     nops_cnt += 1;
1896   }
1897   masm.nop(nops_cnt);
1898 
1899   assert(cbuf.code_size() - code_size == size(ra_),
1900          "checking code size of inline cache node");
1901 }
1902 
1903 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1904 {
1905   if (UseCompressedOops) {
1906     if (Universe::narrow_oop_shift() == 0) {
1907       return OptoBreakpoint ? 15 : 16;
1908     } else {
1909       return OptoBreakpoint ? 19 : 20;
1910     }
1911   } else {
1912     return OptoBreakpoint ? 11 : 12;
1913   }
1914 }
1915 
1916 
1917 //=============================================================================
1918 uint size_exception_handler()
1919 {
1920   // NativeCall instruction size is the same as NativeJump.
1921   // Note that this value is also credited (in output.cpp) to
1922   // the size of the code section.
1923   return NativeJump::instruction_size;
1924 }
1925 
1926 // Emit exception handler code.
1927 int emit_exception_handler(CodeBuffer& cbuf)
1928 {
1929 
1930   // Note that the code buffer's inst_mark is always relative to insts.
1931   // That's why we must use the macroassembler to generate a handler.
1932   MacroAssembler _masm(&cbuf);
1933   address base =
1934   __ start_a_stub(size_exception_handler());
1935   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1936   int offset = __ offset();
1937   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1938   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1939   __ end_a_stub();
1940   return offset;
1941 }
1942 
1943 uint size_deopt_handler()
1944 {
1945   // three 5 byte instructions
1946   return 15;
1947 }
1948 
1949 // Emit deopt handler code.
1950 int emit_deopt_handler(CodeBuffer& cbuf)
1951 {
1952 
1953   // Note that the code buffer's inst_mark is always relative to insts.
1954   // That's why we must use the macroassembler to generate a handler.
1955   MacroAssembler _masm(&cbuf);
1956   address base =
1957   __ start_a_stub(size_deopt_handler());
1958   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1959   int offset = __ offset();
1960   address the_pc = (address) __ pc();
1961   Label next;
1962   // push a "the_pc" on the stack without destroying any registers
1963   // as they all may be live.
1964 
1965   // push address of "next"
1966   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1967   __ bind(next);
1968   // adjust it so it matches "the_pc"
1969   __ subptr(Address(rsp, 0), __ offset() - offset);
1970   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1971   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1972   __ end_a_stub();
1973   return offset;
1974 }
1975 
1976 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1977   int mark = cbuf.insts()->mark_off();
1978   MacroAssembler _masm(&cbuf);
1979   address double_address = __ double_constant(x);
1980   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1981   emit_d32_reloc(cbuf,
1982                  (int) (double_address - cbuf.code_end() - 4),
1983                  internal_word_Relocation::spec(double_address),
1984                  RELOC_DISP32);
1985 }
1986 
1987 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1988   int mark = cbuf.insts()->mark_off();
1989   MacroAssembler _masm(&cbuf);
1990   address float_address = __ float_constant(x);
1991   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1992   emit_d32_reloc(cbuf,
1993                  (int) (float_address - cbuf.code_end() - 4),
1994                  internal_word_Relocation::spec(float_address),
1995                  RELOC_DISP32);
1996 }
1997 
1998 
1999 const bool Matcher::match_rule_supported(int opcode) {
2000   if (!has_match_rule(opcode))
2001     return false;
2002 
2003   return true;  // Per default match rules are supported.
2004 }
2005 
2006 int Matcher::regnum_to_fpu_offset(int regnum)
2007 {
2008   return regnum - 32; // The FP registers are in the second chunk
2009 }
2010 
2011 // This is UltraSparc specific, true just means we have fast l2f conversion
2012 const bool Matcher::convL2FSupported(void) {
2013   return true;
2014 }
2015 
2016 // Vector width in bytes
2017 const uint Matcher::vector_width_in_bytes(void) {
2018   return 8;
2019 }
2020 
2021 // Vector ideal reg
2022 const uint Matcher::vector_ideal_reg(void) {
2023   return Op_RegD;
2024 }
2025 
2026 // Is this branch offset short enough that a short branch can be used?
2027 //
2028 // NOTE: If the platform does not provide any short branch variants, then
2029 //       this method should return false for offset 0.
2030 bool Matcher::is_short_branch_offset(int rule, int offset) {
2031   // the short version of jmpConUCF2 contains multiple branches,
2032   // making the reach slightly less
2033   if (rule == jmpConUCF2_rule)
2034     return (-126 <= offset && offset <= 125);
2035   return (-128 <= offset && offset <= 127);
2036 }
2037 
2038 const bool Matcher::isSimpleConstant64(jlong value) {
2039   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2040   //return value == (int) value;  // Cf. storeImmL and immL32.
2041 
2042   // Probably always true, even if a temp register is required.
2043   return true;
2044 }
2045 
2046 // The ecx parameter to rep stosq for the ClearArray node is in words.
2047 const bool Matcher::init_array_count_is_in_bytes = false;
2048 
2049 // Threshold size for cleararray.
2050 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2051 
2052 // Should the Matcher clone shifts on addressing modes, expecting them
2053 // to be subsumed into complex addressing expressions or compute them
2054 // into registers?  True for Intel but false for most RISCs
2055 const bool Matcher::clone_shift_expressions = true;
2056 
2057 // Is it better to copy float constants, or load them directly from
2058 // memory?  Intel can load a float constant from a direct address,
2059 // requiring no extra registers.  Most RISCs will have to materialize
2060 // an address into a register first, so they would do better to copy
2061 // the constant from stack.
2062 const bool Matcher::rematerialize_float_constants = true; // XXX
2063 
2064 // If CPU can load and store mis-aligned doubles directly then no
2065 // fixup is needed.  Else we split the double into 2 integer pieces
2066 // and move it piece-by-piece.  Only happens when passing doubles into
2067 // C code as the Java calling convention forces doubles to be aligned.
2068 const bool Matcher::misaligned_doubles_ok = true;
2069 
2070 // No-op on amd64
2071 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2072 
2073 // Advertise here if the CPU requires explicit rounding operations to
2074 // implement the UseStrictFP mode.
2075 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2076 
2077 // Are floats conerted to double when stored to stack during deoptimization?
2078 // On x64 it is stored without convertion so we can use normal access.
2079 bool Matcher::float_in_double() { return false; }
2080 
2081 // Do ints take an entire long register or just half?
2082 const bool Matcher::int_in_long = true;
2083 
2084 // Return whether or not this register is ever used as an argument.
2085 // This function is used on startup to build the trampoline stubs in
2086 // generateOptoStub.  Registers not mentioned will be killed by the VM
2087 // call in the trampoline, and arguments in those registers not be
2088 // available to the callee.
2089 bool Matcher::can_be_java_arg(int reg)
2090 {
2091   return
2092     reg ==  RDI_num || reg ==  RDI_H_num ||
2093     reg ==  RSI_num || reg ==  RSI_H_num ||
2094     reg ==  RDX_num || reg ==  RDX_H_num ||
2095     reg ==  RCX_num || reg ==  RCX_H_num ||
2096     reg ==   R8_num || reg ==   R8_H_num ||
2097     reg ==   R9_num || reg ==   R9_H_num ||
2098     reg ==  R12_num || reg ==  R12_H_num ||
2099     reg == XMM0_num || reg == XMM0_H_num ||
2100     reg == XMM1_num || reg == XMM1_H_num ||
2101     reg == XMM2_num || reg == XMM2_H_num ||
2102     reg == XMM3_num || reg == XMM3_H_num ||
2103     reg == XMM4_num || reg == XMM4_H_num ||
2104     reg == XMM5_num || reg == XMM5_H_num ||
2105     reg == XMM6_num || reg == XMM6_H_num ||
2106     reg == XMM7_num || reg == XMM7_H_num;
2107 }
2108 
2109 bool Matcher::is_spillable_arg(int reg)
2110 {
2111   return can_be_java_arg(reg);
2112 }
2113 
2114 // Register for DIVI projection of divmodI
2115 RegMask Matcher::divI_proj_mask() {
2116   return INT_RAX_REG_mask;
2117 }
2118 
2119 // Register for MODI projection of divmodI
2120 RegMask Matcher::modI_proj_mask() {
2121   return INT_RDX_REG_mask;
2122 }
2123 
2124 // Register for DIVL projection of divmodL
2125 RegMask Matcher::divL_proj_mask() {
2126   return LONG_RAX_REG_mask;
2127 }
2128 
2129 // Register for MODL projection of divmodL
2130 RegMask Matcher::modL_proj_mask() {
2131   return LONG_RDX_REG_mask;
2132 }
2133 
2134 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2135   return PTR_RBP_REG_mask;
2136 }
2137 
2138 static Address build_address(int b, int i, int s, int d) {
2139   Register index = as_Register(i);
2140   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2141   if (index == rsp) {
2142     index = noreg;
2143     scale = Address::no_scale;
2144   }
2145   Address addr(as_Register(b), index, scale, d);
2146   return addr;
2147 }
2148 
2149 %}
2150 
2151 //----------ENCODING BLOCK-----------------------------------------------------
2152 // This block specifies the encoding classes used by the compiler to
2153 // output byte streams.  Encoding classes are parameterized macros
2154 // used by Machine Instruction Nodes in order to generate the bit
2155 // encoding of the instruction.  Operands specify their base encoding
2156 // interface with the interface keyword.  There are currently
2157 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2158 // COND_INTER.  REG_INTER causes an operand to generate a function
2159 // which returns its register number when queried.  CONST_INTER causes
2160 // an operand to generate a function which returns the value of the
2161 // constant when queried.  MEMORY_INTER causes an operand to generate
2162 // four functions which return the Base Register, the Index Register,
2163 // the Scale Value, and the Offset Value of the operand when queried.
2164 // COND_INTER causes an operand to generate six functions which return
2165 // the encoding code (ie - encoding bits for the instruction)
2166 // associated with each basic boolean condition for a conditional
2167 // instruction.
2168 //
2169 // Instructions specify two basic values for encoding.  Again, a
2170 // function is available to check if the constant displacement is an
2171 // oop. They use the ins_encode keyword to specify their encoding
2172 // classes (which must be a sequence of enc_class names, and their
2173 // parameters, specified in the encoding block), and they use the
2174 // opcode keyword to specify, in order, their primary, secondary, and
2175 // tertiary opcode.  Only the opcode sections which a particular
2176 // instruction needs for encoding need to be specified.
2177 encode %{
2178   // Build emit functions for each basic byte or larger field in the
2179   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2180   // from C++ code in the enc_class source block.  Emit functions will
2181   // live in the main source block for now.  In future, we can
2182   // generalize this by adding a syntax that specifies the sizes of
2183   // fields in an order, so that the adlc can build the emit functions
2184   // automagically
2185 
2186   // Emit primary opcode
2187   enc_class OpcP
2188   %{
2189     emit_opcode(cbuf, $primary);
2190   %}
2191 
2192   // Emit secondary opcode
2193   enc_class OpcS
2194   %{
2195     emit_opcode(cbuf, $secondary);
2196   %}
2197 
2198   // Emit tertiary opcode
2199   enc_class OpcT
2200   %{
2201     emit_opcode(cbuf, $tertiary);
2202   %}
2203 
2204   // Emit opcode directly
2205   enc_class Opcode(immI d8)
2206   %{
2207     emit_opcode(cbuf, $d8$$constant);
2208   %}
2209 
2210   // Emit size prefix
2211   enc_class SizePrefix
2212   %{
2213     emit_opcode(cbuf, 0x66);
2214   %}
2215 
2216   enc_class reg(rRegI reg)
2217   %{
2218     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2219   %}
2220 
2221   enc_class reg_reg(rRegI dst, rRegI src)
2222   %{
2223     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2224   %}
2225 
2226   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2227   %{
2228     emit_opcode(cbuf, $opcode$$constant);
2229     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2230   %}
2231 
2232   enc_class cmpfp_fixup()
2233   %{
2234     // jnp,s exit
2235     emit_opcode(cbuf, 0x7B);
2236     emit_d8(cbuf, 0x0A);
2237 
2238     // pushfq
2239     emit_opcode(cbuf, 0x9C);
2240 
2241     // andq $0xffffff2b, (%rsp)
2242     emit_opcode(cbuf, Assembler::REX_W);
2243     emit_opcode(cbuf, 0x81);
2244     emit_opcode(cbuf, 0x24);
2245     emit_opcode(cbuf, 0x24);
2246     emit_d32(cbuf, 0xffffff2b);
2247 
2248     // popfq
2249     emit_opcode(cbuf, 0x9D);
2250 
2251     // nop (target for branch to avoid branch to branch)
2252     emit_opcode(cbuf, 0x90);
2253   %}
2254 
2255   enc_class cmpfp3(rRegI dst)
2256   %{
2257     int dstenc = $dst$$reg;
2258 
2259     // movl $dst, -1
2260     if (dstenc >= 8) {
2261       emit_opcode(cbuf, Assembler::REX_B);
2262     }
2263     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2264     emit_d32(cbuf, -1);
2265 
2266     // jp,s done
2267     emit_opcode(cbuf, 0x7A);
2268     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2269 
2270     // jb,s done
2271     emit_opcode(cbuf, 0x72);
2272     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2273 
2274     // setne $dst
2275     if (dstenc >= 4) {
2276       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2277     }
2278     emit_opcode(cbuf, 0x0F);
2279     emit_opcode(cbuf, 0x95);
2280     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2281 
2282     // movzbl $dst, $dst
2283     if (dstenc >= 4) {
2284       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2285     }
2286     emit_opcode(cbuf, 0x0F);
2287     emit_opcode(cbuf, 0xB6);
2288     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2289   %}
2290 
2291   enc_class cdql_enc(no_rax_rdx_RegI div)
2292   %{
2293     // Full implementation of Java idiv and irem; checks for
2294     // special case as described in JVM spec., p.243 & p.271.
2295     //
2296     //         normal case                           special case
2297     //
2298     // input : rax: dividend                         min_int
2299     //         reg: divisor                          -1
2300     //
2301     // output: rax: quotient  (= rax idiv reg)       min_int
2302     //         rdx: remainder (= rax irem reg)       0
2303     //
2304     //  Code sequnce:
2305     //
2306     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2307     //    5:   75 07/08                jne    e <normal>
2308     //    7:   33 d2                   xor    %edx,%edx
2309     //  [div >= 8 -> offset + 1]
2310     //  [REX_B]
2311     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2312     //    c:   74 03/04                je     11 <done>
2313     // 000000000000000e <normal>:
2314     //    e:   99                      cltd
2315     //  [div >= 8 -> offset + 1]
2316     //  [REX_B]
2317     //    f:   f7 f9                   idiv   $div
2318     // 0000000000000011 <done>:
2319 
2320     // cmp    $0x80000000,%eax
2321     emit_opcode(cbuf, 0x3d);
2322     emit_d8(cbuf, 0x00);
2323     emit_d8(cbuf, 0x00);
2324     emit_d8(cbuf, 0x00);
2325     emit_d8(cbuf, 0x80);
2326 
2327     // jne    e <normal>
2328     emit_opcode(cbuf, 0x75);
2329     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2330 
2331     // xor    %edx,%edx
2332     emit_opcode(cbuf, 0x33);
2333     emit_d8(cbuf, 0xD2);
2334 
2335     // cmp    $0xffffffffffffffff,%ecx
2336     if ($div$$reg >= 8) {
2337       emit_opcode(cbuf, Assembler::REX_B);
2338     }
2339     emit_opcode(cbuf, 0x83);
2340     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2341     emit_d8(cbuf, 0xFF);
2342 
2343     // je     11 <done>
2344     emit_opcode(cbuf, 0x74);
2345     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2346 
2347     // <normal>
2348     // cltd
2349     emit_opcode(cbuf, 0x99);
2350 
2351     // idivl (note: must be emitted by the user of this rule)
2352     // <done>
2353   %}
2354 
2355   enc_class cdqq_enc(no_rax_rdx_RegL div)
2356   %{
2357     // Full implementation of Java ldiv and lrem; checks for
2358     // special case as described in JVM spec., p.243 & p.271.
2359     //
2360     //         normal case                           special case
2361     //
2362     // input : rax: dividend                         min_long
2363     //         reg: divisor                          -1
2364     //
2365     // output: rax: quotient  (= rax idiv reg)       min_long
2366     //         rdx: remainder (= rax irem reg)       0
2367     //
2368     //  Code sequnce:
2369     //
2370     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2371     //    7:   00 00 80
2372     //    a:   48 39 d0                cmp    %rdx,%rax
2373     //    d:   75 08                   jne    17 <normal>
2374     //    f:   33 d2                   xor    %edx,%edx
2375     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2376     //   15:   74 05                   je     1c <done>
2377     // 0000000000000017 <normal>:
2378     //   17:   48 99                   cqto
2379     //   19:   48 f7 f9                idiv   $div
2380     // 000000000000001c <done>:
2381 
2382     // mov    $0x8000000000000000,%rdx
2383     emit_opcode(cbuf, Assembler::REX_W);
2384     emit_opcode(cbuf, 0xBA);
2385     emit_d8(cbuf, 0x00);
2386     emit_d8(cbuf, 0x00);
2387     emit_d8(cbuf, 0x00);
2388     emit_d8(cbuf, 0x00);
2389     emit_d8(cbuf, 0x00);
2390     emit_d8(cbuf, 0x00);
2391     emit_d8(cbuf, 0x00);
2392     emit_d8(cbuf, 0x80);
2393 
2394     // cmp    %rdx,%rax
2395     emit_opcode(cbuf, Assembler::REX_W);
2396     emit_opcode(cbuf, 0x39);
2397     emit_d8(cbuf, 0xD0);
2398 
2399     // jne    17 <normal>
2400     emit_opcode(cbuf, 0x75);
2401     emit_d8(cbuf, 0x08);
2402 
2403     // xor    %edx,%edx
2404     emit_opcode(cbuf, 0x33);
2405     emit_d8(cbuf, 0xD2);
2406 
2407     // cmp    $0xffffffffffffffff,$div
2408     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2409     emit_opcode(cbuf, 0x83);
2410     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2411     emit_d8(cbuf, 0xFF);
2412 
2413     // je     1e <done>
2414     emit_opcode(cbuf, 0x74);
2415     emit_d8(cbuf, 0x05);
2416 
2417     // <normal>
2418     // cqto
2419     emit_opcode(cbuf, Assembler::REX_W);
2420     emit_opcode(cbuf, 0x99);
2421 
2422     // idivq (note: must be emitted by the user of this rule)
2423     // <done>
2424   %}
2425 
2426   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2427   enc_class OpcSE(immI imm)
2428   %{
2429     // Emit primary opcode and set sign-extend bit
2430     // Check for 8-bit immediate, and set sign extend bit in opcode
2431     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2432       emit_opcode(cbuf, $primary | 0x02);
2433     } else {
2434       // 32-bit immediate
2435       emit_opcode(cbuf, $primary);
2436     }
2437   %}
2438 
2439   enc_class OpcSErm(rRegI dst, immI imm)
2440   %{
2441     // OpcSEr/m
2442     int dstenc = $dst$$reg;
2443     if (dstenc >= 8) {
2444       emit_opcode(cbuf, Assembler::REX_B);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class OpcSErm_wide(rRegL dst, immI imm)
2460   %{
2461     // OpcSEr/m
2462     int dstenc = $dst$$reg;
2463     if (dstenc < 8) {
2464       emit_opcode(cbuf, Assembler::REX_W);
2465     } else {
2466       emit_opcode(cbuf, Assembler::REX_WB);
2467       dstenc -= 8;
2468     }
2469     // Emit primary opcode and set sign-extend bit
2470     // Check for 8-bit immediate, and set sign extend bit in opcode
2471     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2472       emit_opcode(cbuf, $primary | 0x02);
2473     } else {
2474       // 32-bit immediate
2475       emit_opcode(cbuf, $primary);
2476     }
2477     // Emit r/m byte with secondary opcode, after primary opcode.
2478     emit_rm(cbuf, 0x3, $secondary, dstenc);
2479   %}
2480 
2481   enc_class Con8or32(immI imm)
2482   %{
2483     // Check for 8-bit immediate, and set sign extend bit in opcode
2484     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2485       $$$emit8$imm$$constant;
2486     } else {
2487       // 32-bit immediate
2488       $$$emit32$imm$$constant;
2489     }
2490   %}
2491 
2492   enc_class Lbl(label labl)
2493   %{
2494     // JMP, CALL
2495     Label* l = $labl$$label;
2496     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2497   %}
2498 
2499   enc_class LblShort(label labl)
2500   %{
2501     // JMP, CALL
2502     Label* l = $labl$$label;
2503     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2504     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2505     emit_d8(cbuf, disp);
2506   %}
2507 
2508   enc_class opc2_reg(rRegI dst)
2509   %{
2510     // BSWAP
2511     emit_cc(cbuf, $secondary, $dst$$reg);
2512   %}
2513 
2514   enc_class opc3_reg(rRegI dst)
2515   %{
2516     // BSWAP
2517     emit_cc(cbuf, $tertiary, $dst$$reg);
2518   %}
2519 
2520   enc_class reg_opc(rRegI div)
2521   %{
2522     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2523     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2524   %}
2525 
2526   enc_class Jcc(cmpOp cop, label labl)
2527   %{
2528     // JCC
2529     Label* l = $labl$$label;
2530     $$$emit8$primary;
2531     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2532     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2533   %}
2534 
2535   enc_class JccShort (cmpOp cop, label labl)
2536   %{
2537   // JCC
2538     Label *l = $labl$$label;
2539     emit_cc(cbuf, $primary, $cop$$cmpcode);
2540     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2541     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2542     emit_d8(cbuf, disp);
2543   %}
2544 
2545   enc_class enc_cmov(cmpOp cop)
2546   %{
2547     // CMOV
2548     $$$emit8$primary;
2549     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2550   %}
2551 
2552   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2553   %{
2554     // Invert sense of branch from sense of cmov
2555     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2556     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2557                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2558                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2559     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2560     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2561     if ($dst$$reg < 8) {
2562       if ($src$$reg >= 8) {
2563         emit_opcode(cbuf, Assembler::REX_B);
2564       }
2565     } else {
2566       if ($src$$reg < 8) {
2567         emit_opcode(cbuf, Assembler::REX_R);
2568       } else {
2569         emit_opcode(cbuf, Assembler::REX_RB);
2570       }
2571     }
2572     emit_opcode(cbuf, 0x0F);
2573     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2574     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2575   %}
2576 
2577   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2578   %{
2579     // Invert sense of branch from sense of cmov
2580     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2581     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2582 
2583     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2584     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2585     if ($dst$$reg < 8) {
2586       if ($src$$reg >= 8) {
2587         emit_opcode(cbuf, Assembler::REX_B);
2588       }
2589     } else {
2590       if ($src$$reg < 8) {
2591         emit_opcode(cbuf, Assembler::REX_R);
2592       } else {
2593         emit_opcode(cbuf, Assembler::REX_RB);
2594       }
2595     }
2596     emit_opcode(cbuf, 0x0F);
2597     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2598     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2599   %}
2600 
2601   enc_class enc_PartialSubtypeCheck()
2602   %{
2603     Register Rrdi = as_Register(RDI_enc); // result register
2604     Register Rrax = as_Register(RAX_enc); // super class
2605     Register Rrcx = as_Register(RCX_enc); // killed
2606     Register Rrsi = as_Register(RSI_enc); // sub class
2607     Label miss;
2608     const bool set_cond_codes = true;
2609 
2610     MacroAssembler _masm(&cbuf);
2611     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2612                                      NULL, &miss,
2613                                      /*set_cond_codes:*/ true);
2614     if ($primary) {
2615       __ xorptr(Rrdi, Rrdi);
2616     }
2617     __ bind(miss);
2618   %}
2619 
2620   enc_class Java_To_Interpreter(method meth)
2621   %{
2622     // CALL Java_To_Interpreter
2623     // This is the instruction starting address for relocation info.
2624     cbuf.set_inst_mark();
2625     $$$emit8$primary;
2626     // CALL directly to the runtime
2627     emit_d32_reloc(cbuf,
2628                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2629                    runtime_call_Relocation::spec(),
2630                    RELOC_DISP32);
2631   %}
2632 
2633   enc_class preserve_SP %{
2634     debug_only(int off0 = cbuf.code_size());
2635     MacroAssembler _masm(&cbuf);
2636     // RBP is preserved across all calls, even compiled calls.
2637     // Use it to preserve RSP in places where the callee might change the SP.
2638     __ movptr(rbp, rsp);
2639     debug_only(int off1 = cbuf.code_size());
2640     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2641   %}
2642 
2643   enc_class restore_SP %{
2644     MacroAssembler _masm(&cbuf);
2645     __ movptr(rsp, rbp);
2646   %}
2647 
2648   enc_class Java_Static_Call(method meth)
2649   %{
2650     // JAVA STATIC CALL
2651     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2652     // determine who we intended to call.
2653     cbuf.set_inst_mark();
2654     $$$emit8$primary;
2655 
2656     if (!_method) {
2657       emit_d32_reloc(cbuf,
2658                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2659                      runtime_call_Relocation::spec(),
2660                      RELOC_DISP32);
2661     } else if (_optimized_virtual) {
2662       emit_d32_reloc(cbuf,
2663                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2664                      opt_virtual_call_Relocation::spec(),
2665                      RELOC_DISP32);
2666     } else {
2667       emit_d32_reloc(cbuf,
2668                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2669                      static_call_Relocation::spec(),
2670                      RELOC_DISP32);
2671     }
2672     if (_method) {
2673       // Emit stub for static call
2674       emit_java_to_interp(cbuf);
2675     }
2676   %}
2677 
2678   enc_class Java_Dynamic_Call(method meth)
2679   %{
2680     // JAVA DYNAMIC CALL
2681     // !!!!!
2682     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2683     // emit_call_dynamic_prologue( cbuf );
2684     cbuf.set_inst_mark();
2685 
2686     // movq rax, -1
2687     emit_opcode(cbuf, Assembler::REX_W);
2688     emit_opcode(cbuf, 0xB8 | RAX_enc);
2689     emit_d64_reloc(cbuf,
2690                    (int64_t) Universe::non_oop_word(),
2691                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2692     address virtual_call_oop_addr = cbuf.inst_mark();
2693     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2694     // who we intended to call.
2695     cbuf.set_inst_mark();
2696     $$$emit8$primary;
2697     emit_d32_reloc(cbuf,
2698                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2699                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2700                    RELOC_DISP32);
2701   %}
2702 
2703   enc_class Java_Compiled_Call(method meth)
2704   %{
2705     // JAVA COMPILED CALL
2706     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2707 
2708     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2709     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2710 
2711     // callq *disp(%rax)
2712     cbuf.set_inst_mark();
2713     $$$emit8$primary;
2714     if (disp < 0x80) {
2715       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2716       emit_d8(cbuf, disp); // Displacement
2717     } else {
2718       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2719       emit_d32(cbuf, disp); // Displacement
2720     }
2721   %}
2722 
2723   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2724   %{
2725     // SAL, SAR, SHR
2726     int dstenc = $dst$$reg;
2727     if (dstenc >= 8) {
2728       emit_opcode(cbuf, Assembler::REX_B);
2729       dstenc -= 8;
2730     }
2731     $$$emit8$primary;
2732     emit_rm(cbuf, 0x3, $secondary, dstenc);
2733     $$$emit8$shift$$constant;
2734   %}
2735 
2736   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2737   %{
2738     // SAL, SAR, SHR
2739     int dstenc = $dst$$reg;
2740     if (dstenc < 8) {
2741       emit_opcode(cbuf, Assembler::REX_W);
2742     } else {
2743       emit_opcode(cbuf, Assembler::REX_WB);
2744       dstenc -= 8;
2745     }
2746     $$$emit8$primary;
2747     emit_rm(cbuf, 0x3, $secondary, dstenc);
2748     $$$emit8$shift$$constant;
2749   %}
2750 
2751   enc_class load_immI(rRegI dst, immI src)
2752   %{
2753     int dstenc = $dst$$reg;
2754     if (dstenc >= 8) {
2755       emit_opcode(cbuf, Assembler::REX_B);
2756       dstenc -= 8;
2757     }
2758     emit_opcode(cbuf, 0xB8 | dstenc);
2759     $$$emit32$src$$constant;
2760   %}
2761 
2762   enc_class load_immL(rRegL dst, immL src)
2763   %{
2764     int dstenc = $dst$$reg;
2765     if (dstenc < 8) {
2766       emit_opcode(cbuf, Assembler::REX_W);
2767     } else {
2768       emit_opcode(cbuf, Assembler::REX_WB);
2769       dstenc -= 8;
2770     }
2771     emit_opcode(cbuf, 0xB8 | dstenc);
2772     emit_d64(cbuf, $src$$constant);
2773   %}
2774 
2775   enc_class load_immUL32(rRegL dst, immUL32 src)
2776   %{
2777     // same as load_immI, but this time we care about zeroes in the high word
2778     int dstenc = $dst$$reg;
2779     if (dstenc >= 8) {
2780       emit_opcode(cbuf, Assembler::REX_B);
2781       dstenc -= 8;
2782     }
2783     emit_opcode(cbuf, 0xB8 | dstenc);
2784     $$$emit32$src$$constant;
2785   %}
2786 
2787   enc_class load_immL32(rRegL dst, immL32 src)
2788   %{
2789     int dstenc = $dst$$reg;
2790     if (dstenc < 8) {
2791       emit_opcode(cbuf, Assembler::REX_W);
2792     } else {
2793       emit_opcode(cbuf, Assembler::REX_WB);
2794       dstenc -= 8;
2795     }
2796     emit_opcode(cbuf, 0xC7);
2797     emit_rm(cbuf, 0x03, 0x00, dstenc);
2798     $$$emit32$src$$constant;
2799   %}
2800 
2801   enc_class load_immP31(rRegP dst, immP32 src)
2802   %{
2803     // same as load_immI, but this time we care about zeroes in the high word
2804     int dstenc = $dst$$reg;
2805     if (dstenc >= 8) {
2806       emit_opcode(cbuf, Assembler::REX_B);
2807       dstenc -= 8;
2808     }
2809     emit_opcode(cbuf, 0xB8 | dstenc);
2810     $$$emit32$src$$constant;
2811   %}
2812 
2813   enc_class load_immP(rRegP dst, immP src)
2814   %{
2815     int dstenc = $dst$$reg;
2816     if (dstenc < 8) {
2817       emit_opcode(cbuf, Assembler::REX_W);
2818     } else {
2819       emit_opcode(cbuf, Assembler::REX_WB);
2820       dstenc -= 8;
2821     }
2822     emit_opcode(cbuf, 0xB8 | dstenc);
2823     // This next line should be generated from ADLC
2824     if ($src->constant_is_oop()) {
2825       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2826     } else {
2827       emit_d64(cbuf, $src$$constant);
2828     }
2829   %}
2830 
2831   enc_class load_immF(regF dst, immF con)
2832   %{
2833     // XXX reg_mem doesn't support RIP-relative addressing yet
2834     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2835     emit_float_constant(cbuf, $con$$constant);
2836   %}
2837 
2838   enc_class load_immD(regD dst, immD con)
2839   %{
2840     // XXX reg_mem doesn't support RIP-relative addressing yet
2841     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2842     emit_double_constant(cbuf, $con$$constant);
2843   %}
2844 
2845   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2846     emit_opcode(cbuf, 0xF3);
2847     if ($dst$$reg >= 8) {
2848       emit_opcode(cbuf, Assembler::REX_R);
2849     }
2850     emit_opcode(cbuf, 0x0F);
2851     emit_opcode(cbuf, 0x10);
2852     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2853     emit_float_constant(cbuf, $con$$constant);
2854   %}
2855 
2856   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2857     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2858     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2859     if ($dst$$reg >= 8) {
2860       emit_opcode(cbuf, Assembler::REX_R);
2861     }
2862     emit_opcode(cbuf, 0x0F);
2863     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2864     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2865     emit_double_constant(cbuf, $con$$constant);
2866   %}
2867 
2868   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2869   enc_class enc_copy(rRegI dst, rRegI src)
2870   %{
2871     encode_copy(cbuf, $dst$$reg, $src$$reg);
2872   %}
2873 
2874   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2875   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2876     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2877   %}
2878 
2879   enc_class enc_copy_always(rRegI dst, rRegI src)
2880   %{
2881     int srcenc = $src$$reg;
2882     int dstenc = $dst$$reg;
2883 
2884     if (dstenc < 8) {
2885       if (srcenc >= 8) {
2886         emit_opcode(cbuf, Assembler::REX_B);
2887         srcenc -= 8;
2888       }
2889     } else {
2890       if (srcenc < 8) {
2891         emit_opcode(cbuf, Assembler::REX_R);
2892       } else {
2893         emit_opcode(cbuf, Assembler::REX_RB);
2894         srcenc -= 8;
2895       }
2896       dstenc -= 8;
2897     }
2898 
2899     emit_opcode(cbuf, 0x8B);
2900     emit_rm(cbuf, 0x3, dstenc, srcenc);
2901   %}
2902 
2903   enc_class enc_copy_wide(rRegL dst, rRegL src)
2904   %{
2905     int srcenc = $src$$reg;
2906     int dstenc = $dst$$reg;
2907 
2908     if (dstenc != srcenc) {
2909       if (dstenc < 8) {
2910         if (srcenc < 8) {
2911           emit_opcode(cbuf, Assembler::REX_W);
2912         } else {
2913           emit_opcode(cbuf, Assembler::REX_WB);
2914           srcenc -= 8;
2915         }
2916       } else {
2917         if (srcenc < 8) {
2918           emit_opcode(cbuf, Assembler::REX_WR);
2919         } else {
2920           emit_opcode(cbuf, Assembler::REX_WRB);
2921           srcenc -= 8;
2922         }
2923         dstenc -= 8;
2924       }
2925       emit_opcode(cbuf, 0x8B);
2926       emit_rm(cbuf, 0x3, dstenc, srcenc);
2927     }
2928   %}
2929 
2930   enc_class Con32(immI src)
2931   %{
2932     // Output immediate
2933     $$$emit32$src$$constant;
2934   %}
2935 
2936   enc_class Con64(immL src)
2937   %{
2938     // Output immediate
2939     emit_d64($src$$constant);
2940   %}
2941 
2942   enc_class Con32F_as_bits(immF src)
2943   %{
2944     // Output Float immediate bits
2945     jfloat jf = $src$$constant;
2946     jint jf_as_bits = jint_cast(jf);
2947     emit_d32(cbuf, jf_as_bits);
2948   %}
2949 
2950   enc_class Con16(immI src)
2951   %{
2952     // Output immediate
2953     $$$emit16$src$$constant;
2954   %}
2955 
2956   // How is this different from Con32??? XXX
2957   enc_class Con_d32(immI src)
2958   %{
2959     emit_d32(cbuf,$src$$constant);
2960   %}
2961 
2962   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2963     // Output immediate memory reference
2964     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2965     emit_d32(cbuf, 0x00);
2966   %}
2967 
2968   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2969     MacroAssembler masm(&cbuf);
2970 
2971     Register switch_reg = as_Register($switch_val$$reg);
2972     Register dest_reg   = as_Register($dest$$reg);
2973     address table_base  = masm.address_table_constant(_index2label);
2974 
2975     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2976     // to do that and the compiler is using that register as one it can allocate.
2977     // So we build it all by hand.
2978     // Address index(noreg, switch_reg, Address::times_1);
2979     // ArrayAddress dispatch(table, index);
2980 
2981     Address dispatch(dest_reg, switch_reg, Address::times_1);
2982 
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985   %}
2986 
2987   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2988     MacroAssembler masm(&cbuf);
2989 
2990     Register switch_reg = as_Register($switch_val$$reg);
2991     Register dest_reg   = as_Register($dest$$reg);
2992     address table_base  = masm.address_table_constant(_index2label);
2993 
2994     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2995     // to do that and the compiler is using that register as one it can allocate.
2996     // So we build it all by hand.
2997     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2998     // ArrayAddress dispatch(table, index);
2999 
3000     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
3001 
3002     masm.lea(dest_reg, InternalAddress(table_base));
3003     masm.jmp(dispatch);
3004   %}
3005 
3006   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
3007     MacroAssembler masm(&cbuf);
3008 
3009     Register switch_reg = as_Register($switch_val$$reg);
3010     Register dest_reg   = as_Register($dest$$reg);
3011     address table_base  = masm.address_table_constant(_index2label);
3012 
3013     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
3014     // to do that and the compiler is using that register as one it can allocate.
3015     // So we build it all by hand.
3016     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3017     // ArrayAddress dispatch(table, index);
3018 
3019     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3020     masm.lea(dest_reg, InternalAddress(table_base));
3021     masm.jmp(dispatch);
3022 
3023   %}
3024 
3025   enc_class lock_prefix()
3026   %{
3027     if (os::is_MP()) {
3028       emit_opcode(cbuf, 0xF0); // lock
3029     }
3030   %}
3031 
3032   enc_class REX_mem(memory mem)
3033   %{
3034     if ($mem$$base >= 8) {
3035       if ($mem$$index < 8) {
3036         emit_opcode(cbuf, Assembler::REX_B);
3037       } else {
3038         emit_opcode(cbuf, Assembler::REX_XB);
3039       }
3040     } else {
3041       if ($mem$$index >= 8) {
3042         emit_opcode(cbuf, Assembler::REX_X);
3043       }
3044     }
3045   %}
3046 
3047   enc_class REX_mem_wide(memory mem)
3048   %{
3049     if ($mem$$base >= 8) {
3050       if ($mem$$index < 8) {
3051         emit_opcode(cbuf, Assembler::REX_WB);
3052       } else {
3053         emit_opcode(cbuf, Assembler::REX_WXB);
3054       }
3055     } else {
3056       if ($mem$$index < 8) {
3057         emit_opcode(cbuf, Assembler::REX_W);
3058       } else {
3059         emit_opcode(cbuf, Assembler::REX_WX);
3060       }
3061     }
3062   %}
3063 
3064   // for byte regs
3065   enc_class REX_breg(rRegI reg)
3066   %{
3067     if ($reg$$reg >= 4) {
3068       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3069     }
3070   %}
3071 
3072   // for byte regs
3073   enc_class REX_reg_breg(rRegI dst, rRegI src)
3074   %{
3075     if ($dst$$reg < 8) {
3076       if ($src$$reg >= 4) {
3077         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3078       }
3079     } else {
3080       if ($src$$reg < 8) {
3081         emit_opcode(cbuf, Assembler::REX_R);
3082       } else {
3083         emit_opcode(cbuf, Assembler::REX_RB);
3084       }
3085     }
3086   %}
3087 
3088   // for byte regs
3089   enc_class REX_breg_mem(rRegI reg, memory mem)
3090   %{
3091     if ($reg$$reg < 8) {
3092       if ($mem$$base < 8) {
3093         if ($mem$$index >= 8) {
3094           emit_opcode(cbuf, Assembler::REX_X);
3095         } else if ($reg$$reg >= 4) {
3096           emit_opcode(cbuf, Assembler::REX);
3097         }
3098       } else {
3099         if ($mem$$index < 8) {
3100           emit_opcode(cbuf, Assembler::REX_B);
3101         } else {
3102           emit_opcode(cbuf, Assembler::REX_XB);
3103         }
3104       }
3105     } else {
3106       if ($mem$$base < 8) {
3107         if ($mem$$index < 8) {
3108           emit_opcode(cbuf, Assembler::REX_R);
3109         } else {
3110           emit_opcode(cbuf, Assembler::REX_RX);
3111         }
3112       } else {
3113         if ($mem$$index < 8) {
3114           emit_opcode(cbuf, Assembler::REX_RB);
3115         } else {
3116           emit_opcode(cbuf, Assembler::REX_RXB);
3117         }
3118       }
3119     }
3120   %}
3121 
3122   enc_class REX_reg(rRegI reg)
3123   %{
3124     if ($reg$$reg >= 8) {
3125       emit_opcode(cbuf, Assembler::REX_B);
3126     }
3127   %}
3128 
3129   enc_class REX_reg_wide(rRegI reg)
3130   %{
3131     if ($reg$$reg < 8) {
3132       emit_opcode(cbuf, Assembler::REX_W);
3133     } else {
3134       emit_opcode(cbuf, Assembler::REX_WB);
3135     }
3136   %}
3137 
3138   enc_class REX_reg_reg(rRegI dst, rRegI src)
3139   %{
3140     if ($dst$$reg < 8) {
3141       if ($src$$reg >= 8) {
3142         emit_opcode(cbuf, Assembler::REX_B);
3143       }
3144     } else {
3145       if ($src$$reg < 8) {
3146         emit_opcode(cbuf, Assembler::REX_R);
3147       } else {
3148         emit_opcode(cbuf, Assembler::REX_RB);
3149       }
3150     }
3151   %}
3152 
3153   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3154   %{
3155     if ($dst$$reg < 8) {
3156       if ($src$$reg < 8) {
3157         emit_opcode(cbuf, Assembler::REX_W);
3158       } else {
3159         emit_opcode(cbuf, Assembler::REX_WB);
3160       }
3161     } else {
3162       if ($src$$reg < 8) {
3163         emit_opcode(cbuf, Assembler::REX_WR);
3164       } else {
3165         emit_opcode(cbuf, Assembler::REX_WRB);
3166       }
3167     }
3168   %}
3169 
3170   enc_class REX_reg_mem(rRegI reg, memory mem)
3171   %{
3172     if ($reg$$reg < 8) {
3173       if ($mem$$base < 8) {
3174         if ($mem$$index >= 8) {
3175           emit_opcode(cbuf, Assembler::REX_X);
3176         }
3177       } else {
3178         if ($mem$$index < 8) {
3179           emit_opcode(cbuf, Assembler::REX_B);
3180         } else {
3181           emit_opcode(cbuf, Assembler::REX_XB);
3182         }
3183       }
3184     } else {
3185       if ($mem$$base < 8) {
3186         if ($mem$$index < 8) {
3187           emit_opcode(cbuf, Assembler::REX_R);
3188         } else {
3189           emit_opcode(cbuf, Assembler::REX_RX);
3190         }
3191       } else {
3192         if ($mem$$index < 8) {
3193           emit_opcode(cbuf, Assembler::REX_RB);
3194         } else {
3195           emit_opcode(cbuf, Assembler::REX_RXB);
3196         }
3197       }
3198     }
3199   %}
3200 
3201   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3202   %{
3203     if ($reg$$reg < 8) {
3204       if ($mem$$base < 8) {
3205         if ($mem$$index < 8) {
3206           emit_opcode(cbuf, Assembler::REX_W);
3207         } else {
3208           emit_opcode(cbuf, Assembler::REX_WX);
3209         }
3210       } else {
3211         if ($mem$$index < 8) {
3212           emit_opcode(cbuf, Assembler::REX_WB);
3213         } else {
3214           emit_opcode(cbuf, Assembler::REX_WXB);
3215         }
3216       }
3217     } else {
3218       if ($mem$$base < 8) {
3219         if ($mem$$index < 8) {
3220           emit_opcode(cbuf, Assembler::REX_WR);
3221         } else {
3222           emit_opcode(cbuf, Assembler::REX_WRX);
3223         }
3224       } else {
3225         if ($mem$$index < 8) {
3226           emit_opcode(cbuf, Assembler::REX_WRB);
3227         } else {
3228           emit_opcode(cbuf, Assembler::REX_WRXB);
3229         }
3230       }
3231     }
3232   %}
3233 
3234   enc_class reg_mem(rRegI ereg, memory mem)
3235   %{
3236     // High registers handle in encode_RegMem
3237     int reg = $ereg$$reg;
3238     int base = $mem$$base;
3239     int index = $mem$$index;
3240     int scale = $mem$$scale;
3241     int disp = $mem$$disp;
3242     bool disp_is_oop = $mem->disp_is_oop();
3243 
3244     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3245   %}
3246 
3247   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3248   %{
3249     int rm_byte_opcode = $rm_opcode$$constant;
3250 
3251     // High registers handle in encode_RegMem
3252     int base = $mem$$base;
3253     int index = $mem$$index;
3254     int scale = $mem$$scale;
3255     int displace = $mem$$disp;
3256 
3257     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3258                                             // working with static
3259                                             // globals
3260     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3261                   disp_is_oop);
3262   %}
3263 
3264   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3265   %{
3266     int reg_encoding = $dst$$reg;
3267     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3268     int index        = 0x04;            // 0x04 indicates no index
3269     int scale        = 0x00;            // 0x00 indicates no scale
3270     int displace     = $src1$$constant; // 0x00 indicates no displacement
3271     bool disp_is_oop = false;
3272     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3273                   disp_is_oop);
3274   %}
3275 
3276   enc_class neg_reg(rRegI dst)
3277   %{
3278     int dstenc = $dst$$reg;
3279     if (dstenc >= 8) {
3280       emit_opcode(cbuf, Assembler::REX_B);
3281       dstenc -= 8;
3282     }
3283     // NEG $dst
3284     emit_opcode(cbuf, 0xF7);
3285     emit_rm(cbuf, 0x3, 0x03, dstenc);
3286   %}
3287 
3288   enc_class neg_reg_wide(rRegI dst)
3289   %{
3290     int dstenc = $dst$$reg;
3291     if (dstenc < 8) {
3292       emit_opcode(cbuf, Assembler::REX_W);
3293     } else {
3294       emit_opcode(cbuf, Assembler::REX_WB);
3295       dstenc -= 8;
3296     }
3297     // NEG $dst
3298     emit_opcode(cbuf, 0xF7);
3299     emit_rm(cbuf, 0x3, 0x03, dstenc);
3300   %}
3301 
3302   enc_class setLT_reg(rRegI dst)
3303   %{
3304     int dstenc = $dst$$reg;
3305     if (dstenc >= 8) {
3306       emit_opcode(cbuf, Assembler::REX_B);
3307       dstenc -= 8;
3308     } else if (dstenc >= 4) {
3309       emit_opcode(cbuf, Assembler::REX);
3310     }
3311     // SETLT $dst
3312     emit_opcode(cbuf, 0x0F);
3313     emit_opcode(cbuf, 0x9C);
3314     emit_rm(cbuf, 0x3, 0x0, dstenc);
3315   %}
3316 
3317   enc_class setNZ_reg(rRegI dst)
3318   %{
3319     int dstenc = $dst$$reg;
3320     if (dstenc >= 8) {
3321       emit_opcode(cbuf, Assembler::REX_B);
3322       dstenc -= 8;
3323     } else if (dstenc >= 4) {
3324       emit_opcode(cbuf, Assembler::REX);
3325     }
3326     // SETNZ $dst
3327     emit_opcode(cbuf, 0x0F);
3328     emit_opcode(cbuf, 0x95);
3329     emit_rm(cbuf, 0x3, 0x0, dstenc);
3330   %}
3331 
3332   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3333                        rcx_RegI tmp)
3334   %{
3335     // cadd_cmpLT
3336 
3337     int tmpReg = $tmp$$reg;
3338 
3339     int penc = $p$$reg;
3340     int qenc = $q$$reg;
3341     int yenc = $y$$reg;
3342 
3343     // subl $p,$q
3344     if (penc < 8) {
3345       if (qenc >= 8) {
3346         emit_opcode(cbuf, Assembler::REX_B);
3347       }
3348     } else {
3349       if (qenc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_R);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_RB);
3353       }
3354     }
3355     emit_opcode(cbuf, 0x2B);
3356     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3357 
3358     // sbbl $tmp, $tmp
3359     emit_opcode(cbuf, 0x1B);
3360     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3361 
3362     // andl $tmp, $y
3363     if (yenc >= 8) {
3364       emit_opcode(cbuf, Assembler::REX_B);
3365     }
3366     emit_opcode(cbuf, 0x23);
3367     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3368 
3369     // addl $p,$tmp
3370     if (penc >= 8) {
3371         emit_opcode(cbuf, Assembler::REX_R);
3372     }
3373     emit_opcode(cbuf, 0x03);
3374     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3375   %}
3376 
3377   // Compare the lonogs and set -1, 0, or 1 into dst
3378   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3379   %{
3380     int src1enc = $src1$$reg;
3381     int src2enc = $src2$$reg;
3382     int dstenc = $dst$$reg;
3383 
3384     // cmpq $src1, $src2
3385     if (src1enc < 8) {
3386       if (src2enc < 8) {
3387         emit_opcode(cbuf, Assembler::REX_W);
3388       } else {
3389         emit_opcode(cbuf, Assembler::REX_WB);
3390       }
3391     } else {
3392       if (src2enc < 8) {
3393         emit_opcode(cbuf, Assembler::REX_WR);
3394       } else {
3395         emit_opcode(cbuf, Assembler::REX_WRB);
3396       }
3397     }
3398     emit_opcode(cbuf, 0x3B);
3399     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3400 
3401     // movl $dst, -1
3402     if (dstenc >= 8) {
3403       emit_opcode(cbuf, Assembler::REX_B);
3404     }
3405     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3406     emit_d32(cbuf, -1);
3407 
3408     // jl,s done
3409     emit_opcode(cbuf, 0x7C);
3410     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3411 
3412     // setne $dst
3413     if (dstenc >= 4) {
3414       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3415     }
3416     emit_opcode(cbuf, 0x0F);
3417     emit_opcode(cbuf, 0x95);
3418     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3419 
3420     // movzbl $dst, $dst
3421     if (dstenc >= 4) {
3422       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3423     }
3424     emit_opcode(cbuf, 0x0F);
3425     emit_opcode(cbuf, 0xB6);
3426     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3427   %}
3428 
3429   enc_class Push_ResultXD(regD dst) %{
3430     int dstenc = $dst$$reg;
3431 
3432     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3433 
3434     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3435     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3436     if (dstenc >= 8) {
3437       emit_opcode(cbuf, Assembler::REX_R);
3438     }
3439     emit_opcode  (cbuf, 0x0F );
3440     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3441     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3442 
3443     // add rsp,8
3444     emit_opcode(cbuf, Assembler::REX_W);
3445     emit_opcode(cbuf,0x83);
3446     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3447     emit_d8(cbuf,0x08);
3448   %}
3449 
3450   enc_class Push_SrcXD(regD src) %{
3451     int srcenc = $src$$reg;
3452 
3453     // subq rsp,#8
3454     emit_opcode(cbuf, Assembler::REX_W);
3455     emit_opcode(cbuf, 0x83);
3456     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3457     emit_d8(cbuf, 0x8);
3458 
3459     // movsd [rsp],src
3460     emit_opcode(cbuf, 0xF2);
3461     if (srcenc >= 8) {
3462       emit_opcode(cbuf, Assembler::REX_R);
3463     }
3464     emit_opcode(cbuf, 0x0F);
3465     emit_opcode(cbuf, 0x11);
3466     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3467 
3468     // fldd [rsp]
3469     emit_opcode(cbuf, 0x66);
3470     emit_opcode(cbuf, 0xDD);
3471     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3472   %}
3473 
3474 
3475   enc_class movq_ld(regD dst, memory mem) %{
3476     MacroAssembler _masm(&cbuf);
3477     __ movq($dst$$XMMRegister, $mem$$Address);
3478   %}
3479 
3480   enc_class movq_st(memory mem, regD src) %{
3481     MacroAssembler _masm(&cbuf);
3482     __ movq($mem$$Address, $src$$XMMRegister);
3483   %}
3484 
3485   enc_class pshufd_8x8(regF dst, regF src) %{
3486     MacroAssembler _masm(&cbuf);
3487 
3488     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3489     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3490     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3491   %}
3492 
3493   enc_class pshufd_4x16(regF dst, regF src) %{
3494     MacroAssembler _masm(&cbuf);
3495 
3496     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3497   %}
3498 
3499   enc_class pshufd(regD dst, regD src, int mode) %{
3500     MacroAssembler _masm(&cbuf);
3501 
3502     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3503   %}
3504 
3505   enc_class pxor(regD dst, regD src) %{
3506     MacroAssembler _masm(&cbuf);
3507 
3508     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3509   %}
3510 
3511   enc_class mov_i2x(regD dst, rRegI src) %{
3512     MacroAssembler _masm(&cbuf);
3513 
3514     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3515   %}
3516 
3517   // obj: object to lock
3518   // box: box address (header location) -- killed
3519   // tmp: rax -- killed
3520   // scr: rbx -- killed
3521   //
3522   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3523   // from i486.ad.  See that file for comments.
3524   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3525   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3526 
3527 
3528   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3529   %{
3530     Register objReg = as_Register((int)$obj$$reg);
3531     Register boxReg = as_Register((int)$box$$reg);
3532     Register tmpReg = as_Register($tmp$$reg);
3533     Register scrReg = as_Register($scr$$reg);
3534     MacroAssembler masm(&cbuf);
3535 
3536     // Verify uniqueness of register assignments -- necessary but not sufficient
3537     assert (objReg != boxReg && objReg != tmpReg &&
3538             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3539 
3540     if (_counters != NULL) {
3541       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3542     }
3543     if (EmitSync & 1) {
3544         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3545         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3546         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3547     } else
3548     if (EmitSync & 2) {
3549         Label DONE_LABEL;
3550         if (UseBiasedLocking) {
3551            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3552           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3553         }
3554         // QQQ was movl...
3555         masm.movptr(tmpReg, 0x1);
3556         masm.orptr(tmpReg, Address(objReg, 0));
3557         masm.movptr(Address(boxReg, 0), tmpReg);
3558         if (os::is_MP()) {
3559           masm.lock();
3560         }
3561         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3562         masm.jcc(Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568 
3569         masm.bind(DONE_LABEL);
3570         masm.nop(); // avoid branch to branch
3571     } else {
3572         Label DONE_LABEL, IsInflated, Egress;
3573 
3574         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3575         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3576         masm.jcc   (Assembler::notZero, IsInflated) ; 
3577          
3578         // it's stack-locked, biased or neutral
3579         // TODO: optimize markword triage order to reduce the number of
3580         // conditional branches in the most common cases.
3581         // Beware -- there's a subtle invariant that fetch of the markword
3582         // at [FETCH], below, will never observe a biased encoding (*101b).
3583         // If this invariant is not held we'll suffer exclusion (safety) failure.
3584 
3585         if (UseBiasedLocking && !UseOptoBiasInlining) {
3586           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3587           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3588         }
3589 
3590         // was q will it destroy high?
3591         masm.orl   (tmpReg, 1) ; 
3592         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3593         if (os::is_MP()) { masm.lock(); } 
3594         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3595         if (_counters != NULL) {
3596            masm.cond_inc32(Assembler::equal,
3597                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3598         }
3599         masm.jcc   (Assembler::equal, DONE_LABEL);
3600 
3601         // Recursive locking
3602         masm.subptr(tmpReg, rsp);
3603         masm.andptr(tmpReg, 7 - os::vm_page_size());
3604         masm.movptr(Address(boxReg, 0), tmpReg);
3605         if (_counters != NULL) {
3606            masm.cond_inc32(Assembler::equal,
3607                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3608         }
3609         masm.jmp   (DONE_LABEL) ;
3610 
3611         masm.bind  (IsInflated) ;
3612         // It's inflated
3613 
3614         // TODO: someday avoid the ST-before-CAS penalty by
3615         // relocating (deferring) the following ST.
3616         // We should also think about trying a CAS without having
3617         // fetched _owner.  If the CAS is successful we may
3618         // avoid an RTO->RTS upgrade on the $line.
3619         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3620         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3621 
3622         masm.mov    (boxReg, tmpReg) ; 
3623         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3624         masm.testptr(tmpReg, tmpReg) ;   
3625         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3626 
3627         // It's inflated and appears unlocked
3628         if (os::is_MP()) { masm.lock(); } 
3629         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3630         // Intentional fall-through into DONE_LABEL ...
3631 
3632         masm.bind  (DONE_LABEL) ;
3633         masm.nop   () ;                 // avoid jmp to jmp
3634     }
3635   %}
3636 
3637   // obj: object to unlock
3638   // box: box address (displaced header location), killed
3639   // RBX: killed tmp; cannot be obj nor box
3640   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3641   %{
3642 
3643     Register objReg = as_Register($obj$$reg);
3644     Register boxReg = as_Register($box$$reg);
3645     Register tmpReg = as_Register($tmp$$reg);
3646     MacroAssembler masm(&cbuf);
3647 
3648     if (EmitSync & 4) { 
3649        masm.cmpptr(rsp, 0) ; 
3650     } else
3651     if (EmitSync & 8) {
3652        Label DONE_LABEL;
3653        if (UseBiasedLocking) {
3654          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3655        }
3656 
3657        // Check whether the displaced header is 0
3658        //(=> recursive unlock)
3659        masm.movptr(tmpReg, Address(boxReg, 0));
3660        masm.testptr(tmpReg, tmpReg);
3661        masm.jcc(Assembler::zero, DONE_LABEL);
3662 
3663        // If not recursive lock, reset the header to displaced header
3664        if (os::is_MP()) {
3665          masm.lock();
3666        }
3667        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3668        masm.bind(DONE_LABEL);
3669        masm.nop(); // avoid branch to branch
3670     } else {
3671        Label DONE_LABEL, Stacked, CheckSucc ;
3672 
3673        if (UseBiasedLocking && !UseOptoBiasInlining) {
3674          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3675        }
3676         
3677        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3678        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3679        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3680        masm.testl (tmpReg, 0x02) ; 
3681        masm.jcc   (Assembler::zero, Stacked) ; 
3682         
3683        // It's inflated
3684        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3685        masm.xorptr(boxReg, r15_thread) ; 
3686        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3687        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3688        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3689        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3690        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3691        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3692        masm.jmp   (DONE_LABEL) ; 
3693         
3694        if ((EmitSync & 65536) == 0) { 
3695          Label LSuccess, LGoSlowPath ;
3696          masm.bind  (CheckSucc) ;
3697          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3698          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3699 
3700          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3701          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3702          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3703          // are all faster when the write buffer is populated.
3704          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3705          if (os::is_MP()) {
3706             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3707          }
3708          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3709          masm.jcc   (Assembler::notZero, LSuccess) ;
3710 
3711          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3712          if (os::is_MP()) { masm.lock(); }
3713          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3714          masm.jcc   (Assembler::notEqual, LSuccess) ;
3715          // Intentional fall-through into slow-path
3716 
3717          masm.bind  (LGoSlowPath) ;
3718          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3719          masm.jmp   (DONE_LABEL) ;
3720 
3721          masm.bind  (LSuccess) ;
3722          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3723          masm.jmp   (DONE_LABEL) ;
3724        }
3725 
3726        masm.bind  (Stacked) ; 
3727        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3728        if (os::is_MP()) { masm.lock(); } 
3729        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3730 
3731        if (EmitSync & 65536) {
3732           masm.bind (CheckSucc) ;
3733        }
3734        masm.bind(DONE_LABEL);
3735        if (EmitSync & 32768) {
3736           masm.nop();                      // avoid branch to branch
3737        }
3738     }
3739   %}
3740 
3741 
3742   enc_class enc_rethrow()
3743   %{
3744     cbuf.set_inst_mark();
3745     emit_opcode(cbuf, 0xE9); // jmp entry
3746     emit_d32_reloc(cbuf,
3747                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3748                    runtime_call_Relocation::spec(),
3749                    RELOC_DISP32);
3750   %}
3751 
3752   enc_class absF_encoding(regF dst)
3753   %{
3754     int dstenc = $dst$$reg;
3755     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3756 
3757     cbuf.set_inst_mark();
3758     if (dstenc >= 8) {
3759       emit_opcode(cbuf, Assembler::REX_R);
3760       dstenc -= 8;
3761     }
3762     // XXX reg_mem doesn't support RIP-relative addressing yet
3763     emit_opcode(cbuf, 0x0F);
3764     emit_opcode(cbuf, 0x54);
3765     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3766     emit_d32_reloc(cbuf, signmask_address);
3767   %}
3768 
3769   enc_class absD_encoding(regD dst)
3770   %{
3771     int dstenc = $dst$$reg;
3772     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3773 
3774     cbuf.set_inst_mark();
3775     emit_opcode(cbuf, 0x66);
3776     if (dstenc >= 8) {
3777       emit_opcode(cbuf, Assembler::REX_R);
3778       dstenc -= 8;
3779     }
3780     // XXX reg_mem doesn't support RIP-relative addressing yet
3781     emit_opcode(cbuf, 0x0F);
3782     emit_opcode(cbuf, 0x54);
3783     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3784     emit_d32_reloc(cbuf, signmask_address);
3785   %}
3786 
3787   enc_class negF_encoding(regF dst)
3788   %{
3789     int dstenc = $dst$$reg;
3790     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3791 
3792     cbuf.set_inst_mark();
3793     if (dstenc >= 8) {
3794       emit_opcode(cbuf, Assembler::REX_R);
3795       dstenc -= 8;
3796     }
3797     // XXX reg_mem doesn't support RIP-relative addressing yet
3798     emit_opcode(cbuf, 0x0F);
3799     emit_opcode(cbuf, 0x57);
3800     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3801     emit_d32_reloc(cbuf, signflip_address);
3802   %}
3803 
3804   enc_class negD_encoding(regD dst)
3805   %{
3806     int dstenc = $dst$$reg;
3807     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3808 
3809     cbuf.set_inst_mark();
3810     emit_opcode(cbuf, 0x66);
3811     if (dstenc >= 8) {
3812       emit_opcode(cbuf, Assembler::REX_R);
3813       dstenc -= 8;
3814     }
3815     // XXX reg_mem doesn't support RIP-relative addressing yet
3816     emit_opcode(cbuf, 0x0F);
3817     emit_opcode(cbuf, 0x57);
3818     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3819     emit_d32_reloc(cbuf, signflip_address);
3820   %}
3821 
3822   enc_class f2i_fixup(rRegI dst, regF src)
3823   %{
3824     int dstenc = $dst$$reg;
3825     int srcenc = $src$$reg;
3826 
3827     // cmpl $dst, #0x80000000
3828     if (dstenc >= 8) {
3829       emit_opcode(cbuf, Assembler::REX_B);
3830     }
3831     emit_opcode(cbuf, 0x81);
3832     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3833     emit_d32(cbuf, 0x80000000);
3834 
3835     // jne,s done
3836     emit_opcode(cbuf, 0x75);
3837     if (srcenc < 8 && dstenc < 8) {
3838       emit_d8(cbuf, 0xF);
3839     } else if (srcenc >= 8 && dstenc >= 8) {
3840       emit_d8(cbuf, 0x11);
3841     } else {
3842       emit_d8(cbuf, 0x10);
3843     }
3844 
3845     // subq rsp, #8
3846     emit_opcode(cbuf, Assembler::REX_W);
3847     emit_opcode(cbuf, 0x83);
3848     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3849     emit_d8(cbuf, 8);
3850 
3851     // movss [rsp], $src
3852     emit_opcode(cbuf, 0xF3);
3853     if (srcenc >= 8) {
3854       emit_opcode(cbuf, Assembler::REX_R);
3855     }
3856     emit_opcode(cbuf, 0x0F);
3857     emit_opcode(cbuf, 0x11);
3858     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3859 
3860     // call f2i_fixup
3861     cbuf.set_inst_mark();
3862     emit_opcode(cbuf, 0xE8);
3863     emit_d32_reloc(cbuf,
3864                    (int)
3865                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3866                    runtime_call_Relocation::spec(),
3867                    RELOC_DISP32);
3868 
3869     // popq $dst
3870     if (dstenc >= 8) {
3871       emit_opcode(cbuf, Assembler::REX_B);
3872     }
3873     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3874 
3875     // done:
3876   %}
3877 
3878   enc_class f2l_fixup(rRegL dst, regF src)
3879   %{
3880     int dstenc = $dst$$reg;
3881     int srcenc = $src$$reg;
3882     address const_address = (address) StubRoutines::x86::double_sign_flip();
3883 
3884     // cmpq $dst, [0x8000000000000000]
3885     cbuf.set_inst_mark();
3886     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3887     emit_opcode(cbuf, 0x39);
3888     // XXX reg_mem doesn't support RIP-relative addressing yet
3889     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3890     emit_d32_reloc(cbuf, const_address);
3891 
3892 
3893     // jne,s done
3894     emit_opcode(cbuf, 0x75);
3895     if (srcenc < 8 && dstenc < 8) {
3896       emit_d8(cbuf, 0xF);
3897     } else if (srcenc >= 8 && dstenc >= 8) {
3898       emit_d8(cbuf, 0x11);
3899     } else {
3900       emit_d8(cbuf, 0x10);
3901     }
3902 
3903     // subq rsp, #8
3904     emit_opcode(cbuf, Assembler::REX_W);
3905     emit_opcode(cbuf, 0x83);
3906     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3907     emit_d8(cbuf, 8);
3908 
3909     // movss [rsp], $src
3910     emit_opcode(cbuf, 0xF3);
3911     if (srcenc >= 8) {
3912       emit_opcode(cbuf, Assembler::REX_R);
3913     }
3914     emit_opcode(cbuf, 0x0F);
3915     emit_opcode(cbuf, 0x11);
3916     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3917 
3918     // call f2l_fixup
3919     cbuf.set_inst_mark();
3920     emit_opcode(cbuf, 0xE8);
3921     emit_d32_reloc(cbuf,
3922                    (int)
3923                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
3924                    runtime_call_Relocation::spec(),
3925                    RELOC_DISP32);
3926 
3927     // popq $dst
3928     if (dstenc >= 8) {
3929       emit_opcode(cbuf, Assembler::REX_B);
3930     }
3931     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3932 
3933     // done:
3934   %}
3935 
3936   enc_class d2i_fixup(rRegI dst, regD src)
3937   %{
3938     int dstenc = $dst$$reg;
3939     int srcenc = $src$$reg;
3940 
3941     // cmpl $dst, #0x80000000
3942     if (dstenc >= 8) {
3943       emit_opcode(cbuf, Assembler::REX_B);
3944     }
3945     emit_opcode(cbuf, 0x81);
3946     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3947     emit_d32(cbuf, 0x80000000);
3948 
3949     // jne,s done
3950     emit_opcode(cbuf, 0x75);
3951     if (srcenc < 8 && dstenc < 8) {
3952       emit_d8(cbuf, 0xF);
3953     } else if (srcenc >= 8 && dstenc >= 8) {
3954       emit_d8(cbuf, 0x11);
3955     } else {
3956       emit_d8(cbuf, 0x10);
3957     }
3958 
3959     // subq rsp, #8
3960     emit_opcode(cbuf, Assembler::REX_W);
3961     emit_opcode(cbuf, 0x83);
3962     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3963     emit_d8(cbuf, 8);
3964 
3965     // movsd [rsp], $src
3966     emit_opcode(cbuf, 0xF2);
3967     if (srcenc >= 8) {
3968       emit_opcode(cbuf, Assembler::REX_R);
3969     }
3970     emit_opcode(cbuf, 0x0F);
3971     emit_opcode(cbuf, 0x11);
3972     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3973 
3974     // call d2i_fixup
3975     cbuf.set_inst_mark();
3976     emit_opcode(cbuf, 0xE8);
3977     emit_d32_reloc(cbuf,
3978                    (int)
3979                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
3980                    runtime_call_Relocation::spec(),
3981                    RELOC_DISP32);
3982 
3983     // popq $dst
3984     if (dstenc >= 8) {
3985       emit_opcode(cbuf, Assembler::REX_B);
3986     }
3987     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3988 
3989     // done:
3990   %}
3991 
3992   enc_class d2l_fixup(rRegL dst, regD src)
3993   %{
3994     int dstenc = $dst$$reg;
3995     int srcenc = $src$$reg;
3996     address const_address = (address) StubRoutines::x86::double_sign_flip();
3997 
3998     // cmpq $dst, [0x8000000000000000]
3999     cbuf.set_inst_mark();
4000     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4001     emit_opcode(cbuf, 0x39);
4002     // XXX reg_mem doesn't support RIP-relative addressing yet
4003     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4004     emit_d32_reloc(cbuf, const_address);
4005 
4006 
4007     // jne,s done
4008     emit_opcode(cbuf, 0x75);
4009     if (srcenc < 8 && dstenc < 8) {
4010       emit_d8(cbuf, 0xF);
4011     } else if (srcenc >= 8 && dstenc >= 8) {
4012       emit_d8(cbuf, 0x11);
4013     } else {
4014       emit_d8(cbuf, 0x10);
4015     }
4016 
4017     // subq rsp, #8
4018     emit_opcode(cbuf, Assembler::REX_W);
4019     emit_opcode(cbuf, 0x83);
4020     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4021     emit_d8(cbuf, 8);
4022 
4023     // movsd [rsp], $src
4024     emit_opcode(cbuf, 0xF2);
4025     if (srcenc >= 8) {
4026       emit_opcode(cbuf, Assembler::REX_R);
4027     }
4028     emit_opcode(cbuf, 0x0F);
4029     emit_opcode(cbuf, 0x11);
4030     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4031 
4032     // call d2l_fixup
4033     cbuf.set_inst_mark();
4034     emit_opcode(cbuf, 0xE8);
4035     emit_d32_reloc(cbuf,
4036                    (int)
4037                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4038                    runtime_call_Relocation::spec(),
4039                    RELOC_DISP32);
4040 
4041     // popq $dst
4042     if (dstenc >= 8) {
4043       emit_opcode(cbuf, Assembler::REX_B);
4044     }
4045     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4046 
4047     // done:
4048   %}
4049 
4050   // Safepoint Poll.  This polls the safepoint page, and causes an
4051   // exception if it is not readable. Unfortunately, it kills
4052   // RFLAGS in the process.
4053   enc_class enc_safepoint_poll
4054   %{
4055     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4056     // XXX reg_mem doesn't support RIP-relative addressing yet
4057     cbuf.set_inst_mark();
4058     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4059     emit_opcode(cbuf, 0x85); // testl
4060     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4061     // cbuf.inst_mark() is beginning of instruction
4062     emit_d32_reloc(cbuf, os::get_polling_page());
4063 //                    relocInfo::poll_type,
4064   %}
4065 %}
4066 
4067 
4068 
4069 //----------FRAME--------------------------------------------------------------
4070 // Definition of frame structure and management information.
4071 //
4072 //  S T A C K   L A Y O U T    Allocators stack-slot number
4073 //                             |   (to get allocators register number
4074 //  G  Owned by    |        |  v    add OptoReg::stack0())
4075 //  r   CALLER     |        |
4076 //  o     |        +--------+      pad to even-align allocators stack-slot
4077 //  w     V        |  pad0  |        numbers; owned by CALLER
4078 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4079 //  h     ^        |   in   |  5
4080 //        |        |  args  |  4   Holes in incoming args owned by SELF
4081 //  |     |        |        |  3
4082 //  |     |        +--------+
4083 //  V     |        | old out|      Empty on Intel, window on Sparc
4084 //        |    old |preserve|      Must be even aligned.
4085 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4086 //        |        |   in   |  3   area for Intel ret address
4087 //     Owned by    |preserve|      Empty on Sparc.
4088 //       SELF      +--------+
4089 //        |        |  pad2  |  2   pad to align old SP
4090 //        |        +--------+  1
4091 //        |        | locks  |  0
4092 //        |        +--------+----> OptoReg::stack0(), even aligned
4093 //        |        |  pad1  | 11   pad to align new SP
4094 //        |        +--------+
4095 //        |        |        | 10
4096 //        |        | spills |  9   spills
4097 //        V        |        |  8   (pad0 slot for callee)
4098 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4099 //        ^        |  out   |  7
4100 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4101 //     Owned by    +--------+
4102 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4103 //        |    new |preserve|      Must be even-aligned.
4104 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4105 //        |        |        |
4106 //
4107 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4108 //         known from SELF's arguments and the Java calling convention.
4109 //         Region 6-7 is determined per call site.
4110 // Note 2: If the calling convention leaves holes in the incoming argument
4111 //         area, those holes are owned by SELF.  Holes in the outgoing area
4112 //         are owned by the CALLEE.  Holes should not be nessecary in the
4113 //         incoming area, as the Java calling convention is completely under
4114 //         the control of the AD file.  Doubles can be sorted and packed to
4115 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4116 //         varargs C calling conventions.
4117 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4118 //         even aligned with pad0 as needed.
4119 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4120 //         region 6-11 is even aligned; it may be padded out more so that
4121 //         the region from SP to FP meets the minimum stack alignment.
4122 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4123 //         alignment.  Region 11, pad1, may be dynamically extended so that
4124 //         SP meets the minimum alignment.
4125 
4126 frame
4127 %{
4128   // What direction does stack grow in (assumed to be same for C & Java)
4129   stack_direction(TOWARDS_LOW);
4130 
4131   // These three registers define part of the calling convention
4132   // between compiled code and the interpreter.
4133   inline_cache_reg(RAX);                // Inline Cache Register
4134   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4135                                         // calling interpreter
4136 
4137   // Optional: name the operand used by cisc-spilling to access
4138   // [stack_pointer + offset]
4139   cisc_spilling_operand_name(indOffset32);
4140 
4141   // Number of stack slots consumed by locking an object
4142   sync_stack_slots(2);
4143 
4144   // Compiled code's Frame Pointer
4145   frame_pointer(RSP);
4146 
4147   // Interpreter stores its frame pointer in a register which is
4148   // stored to the stack by I2CAdaptors.
4149   // I2CAdaptors convert from interpreted java to compiled java.
4150   interpreter_frame_pointer(RBP);
4151 
4152   // Stack alignment requirement
4153   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4154 
4155   // Number of stack slots between incoming argument block and the start of
4156   // a new frame.  The PROLOG must add this many slots to the stack.  The
4157   // EPILOG must remove this many slots.  amd64 needs two slots for
4158   // return address.
4159   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4160 
4161   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4162   // for calls to C.  Supports the var-args backing area for register parms.
4163   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4164 
4165   // The after-PROLOG location of the return address.  Location of
4166   // return address specifies a type (REG or STACK) and a number
4167   // representing the register number (i.e. - use a register name) or
4168   // stack slot.
4169   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4170   // Otherwise, it is above the locks and verification slot and alignment word
4171   return_addr(STACK - 2 +
4172               round_to(2 + 2 * VerifyStackAtCalls +
4173                        Compile::current()->fixed_slots(),
4174                        WordsPerLong * 2));
4175 
4176   // Body of function which returns an integer array locating
4177   // arguments either in registers or in stack slots.  Passed an array
4178   // of ideal registers called "sig" and a "length" count.  Stack-slot
4179   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4180   // arguments for a CALLEE.  Incoming stack arguments are
4181   // automatically biased by the preserve_stack_slots field above.
4182 
4183   calling_convention
4184   %{
4185     // No difference between ingoing/outgoing just pass false
4186     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4187   %}
4188 
4189   c_calling_convention
4190   %{
4191     // This is obviously always outgoing
4192     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4193   %}
4194 
4195   // Location of compiled Java return values.  Same as C for now.
4196   return_value
4197   %{
4198     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4199            "only return normal values");
4200 
4201     static const int lo[Op_RegL + 1] = {
4202       0,
4203       0,
4204       RAX_num,  // Op_RegN
4205       RAX_num,  // Op_RegI
4206       RAX_num,  // Op_RegP
4207       XMM0_num, // Op_RegF
4208       XMM0_num, // Op_RegD
4209       RAX_num   // Op_RegL
4210     };
4211     static const int hi[Op_RegL + 1] = {
4212       0,
4213       0,
4214       OptoReg::Bad, // Op_RegN
4215       OptoReg::Bad, // Op_RegI
4216       RAX_H_num,    // Op_RegP
4217       OptoReg::Bad, // Op_RegF
4218       XMM0_H_num,   // Op_RegD
4219       RAX_H_num     // Op_RegL
4220     };
4221     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4222     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4223   %}
4224 %}
4225 
4226 //----------ATTRIBUTES---------------------------------------------------------
4227 //----------Operand Attributes-------------------------------------------------
4228 op_attrib op_cost(0);        // Required cost attribute
4229 
4230 //----------Instruction Attributes---------------------------------------------
4231 ins_attrib ins_cost(100);       // Required cost attribute
4232 ins_attrib ins_size(8);         // Required size attribute (in bits)
4233 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4234 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4235                                 // a non-matching short branch variant
4236                                 // of some long branch?
4237 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4238                                 // be a power of 2) specifies the
4239                                 // alignment that some part of the
4240                                 // instruction (not necessarily the
4241                                 // start) requires.  If > 1, a
4242                                 // compute_padding() function must be
4243                                 // provided for the instruction
4244 
4245 //----------OPERANDS-----------------------------------------------------------
4246 // Operand definitions must precede instruction definitions for correct parsing
4247 // in the ADLC because operands constitute user defined types which are used in
4248 // instruction definitions.
4249 
4250 //----------Simple Operands----------------------------------------------------
4251 // Immediate Operands
4252 // Integer Immediate
4253 operand immI()
4254 %{
4255   match(ConI);
4256 
4257   op_cost(10);
4258   format %{ %}
4259   interface(CONST_INTER);
4260 %}
4261 
4262 // Constant for test vs zero
4263 operand immI0()
4264 %{
4265   predicate(n->get_int() == 0);
4266   match(ConI);
4267 
4268   op_cost(0);
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 // Constant for increment
4274 operand immI1()
4275 %{
4276   predicate(n->get_int() == 1);
4277   match(ConI);
4278 
4279   op_cost(0);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // Constant for decrement
4285 operand immI_M1()
4286 %{
4287   predicate(n->get_int() == -1);
4288   match(ConI);
4289 
4290   op_cost(0);
4291   format %{ %}
4292   interface(CONST_INTER);
4293 %}
4294 
4295 // Valid scale values for addressing modes
4296 operand immI2()
4297 %{
4298   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4299   match(ConI);
4300 
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 operand immI8()
4306 %{
4307   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4308   match(ConI);
4309 
4310   op_cost(5);
4311   format %{ %}
4312   interface(CONST_INTER);
4313 %}
4314 
4315 operand immI16()
4316 %{
4317   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4318   match(ConI);
4319 
4320   op_cost(10);
4321   format %{ %}
4322   interface(CONST_INTER);
4323 %}
4324 
4325 // Constant for long shifts
4326 operand immI_32()
4327 %{
4328   predicate( n->get_int() == 32 );
4329   match(ConI);
4330 
4331   op_cost(0);
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Constant for long shifts
4337 operand immI_64()
4338 %{
4339   predicate( n->get_int() == 64 );
4340   match(ConI);
4341 
4342   op_cost(0);
4343   format %{ %}
4344   interface(CONST_INTER);
4345 %}
4346 
4347 // Pointer Immediate
4348 operand immP()
4349 %{
4350   match(ConP);
4351 
4352   op_cost(10);
4353   format %{ %}
4354   interface(CONST_INTER);
4355 %}
4356 
4357 // NULL Pointer Immediate
4358 operand immP0()
4359 %{
4360   predicate(n->get_ptr() == 0);
4361   match(ConP);
4362 
4363   op_cost(5);
4364   format %{ %}
4365   interface(CONST_INTER);
4366 %}
4367 
4368 // Pointer Immediate
4369 operand immN() %{
4370   match(ConN);
4371 
4372   op_cost(10);
4373   format %{ %}
4374   interface(CONST_INTER);
4375 %}
4376 
4377 // NULL Pointer Immediate
4378 operand immN0() %{
4379   predicate(n->get_narrowcon() == 0);
4380   match(ConN);
4381 
4382   op_cost(5);
4383   format %{ %}
4384   interface(CONST_INTER);
4385 %}
4386 
4387 operand immP31()
4388 %{
4389   predicate(!n->as_Type()->type()->isa_oopptr()
4390             && (n->get_ptr() >> 31) == 0);
4391   match(ConP);
4392 
4393   op_cost(5);
4394   format %{ %}
4395   interface(CONST_INTER);
4396 %}
4397 
4398 
4399 // Long Immediate
4400 operand immL()
4401 %{
4402   match(ConL);
4403 
4404   op_cost(20);
4405   format %{ %}
4406   interface(CONST_INTER);
4407 %}
4408 
4409 // Long Immediate 8-bit
4410 operand immL8()
4411 %{
4412   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4413   match(ConL);
4414 
4415   op_cost(5);
4416   format %{ %}
4417   interface(CONST_INTER);
4418 %}
4419 
4420 // Long Immediate 32-bit unsigned
4421 operand immUL32()
4422 %{
4423   predicate(n->get_long() == (unsigned int) (n->get_long()));
4424   match(ConL);
4425 
4426   op_cost(10);
4427   format %{ %}
4428   interface(CONST_INTER);
4429 %}
4430 
4431 // Long Immediate 32-bit signed
4432 operand immL32()
4433 %{
4434   predicate(n->get_long() == (int) (n->get_long()));
4435   match(ConL);
4436 
4437   op_cost(15);
4438   format %{ %}
4439   interface(CONST_INTER);
4440 %}
4441 
4442 // Long Immediate zero
4443 operand immL0()
4444 %{
4445   predicate(n->get_long() == 0L);
4446   match(ConL);
4447 
4448   op_cost(10);
4449   format %{ %}
4450   interface(CONST_INTER);
4451 %}
4452 
4453 // Constant for increment
4454 operand immL1()
4455 %{
4456   predicate(n->get_long() == 1);
4457   match(ConL);
4458 
4459   format %{ %}
4460   interface(CONST_INTER);
4461 %}
4462 
4463 // Constant for decrement
4464 operand immL_M1()
4465 %{
4466   predicate(n->get_long() == -1);
4467   match(ConL);
4468 
4469   format %{ %}
4470   interface(CONST_INTER);
4471 %}
4472 
4473 // Long Immediate: the value 10
4474 operand immL10()
4475 %{
4476   predicate(n->get_long() == 10);
4477   match(ConL);
4478 
4479   format %{ %}
4480   interface(CONST_INTER);
4481 %}
4482 
4483 // Long immediate from 0 to 127.
4484 // Used for a shorter form of long mul by 10.
4485 operand immL_127()
4486 %{
4487   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4488   match(ConL);
4489 
4490   op_cost(10);
4491   format %{ %}
4492   interface(CONST_INTER);
4493 %}
4494 
4495 // Long Immediate: low 32-bit mask
4496 operand immL_32bits()
4497 %{
4498   predicate(n->get_long() == 0xFFFFFFFFL);
4499   match(ConL);
4500   op_cost(20);
4501 
4502   format %{ %}
4503   interface(CONST_INTER);
4504 %}
4505 
4506 // Float Immediate zero
4507 operand immF0()
4508 %{
4509   predicate(jint_cast(n->getf()) == 0);
4510   match(ConF);
4511 
4512   op_cost(5);
4513   format %{ %}
4514   interface(CONST_INTER);
4515 %}
4516 
4517 // Float Immediate
4518 operand immF()
4519 %{
4520   match(ConF);
4521 
4522   op_cost(15);
4523   format %{ %}
4524   interface(CONST_INTER);
4525 %}
4526 
4527 // Double Immediate zero
4528 operand immD0()
4529 %{
4530   predicate(jlong_cast(n->getd()) == 0);
4531   match(ConD);
4532 
4533   op_cost(5);
4534   format %{ %}
4535   interface(CONST_INTER);
4536 %}
4537 
4538 // Double Immediate
4539 operand immD()
4540 %{
4541   match(ConD);
4542 
4543   op_cost(15);
4544   format %{ %}
4545   interface(CONST_INTER);
4546 %}
4547 
4548 // Immediates for special shifts (sign extend)
4549 
4550 // Constants for increment
4551 operand immI_16()
4552 %{
4553   predicate(n->get_int() == 16);
4554   match(ConI);
4555 
4556   format %{ %}
4557   interface(CONST_INTER);
4558 %}
4559 
4560 operand immI_24()
4561 %{
4562   predicate(n->get_int() == 24);
4563   match(ConI);
4564 
4565   format %{ %}
4566   interface(CONST_INTER);
4567 %}
4568 
4569 // Constant for byte-wide masking
4570 operand immI_255()
4571 %{
4572   predicate(n->get_int() == 255);
4573   match(ConI);
4574 
4575   format %{ %}
4576   interface(CONST_INTER);
4577 %}
4578 
4579 // Constant for short-wide masking
4580 operand immI_65535()
4581 %{
4582   predicate(n->get_int() == 65535);
4583   match(ConI);
4584 
4585   format %{ %}
4586   interface(CONST_INTER);
4587 %}
4588 
4589 // Constant for byte-wide masking
4590 operand immL_255()
4591 %{
4592   predicate(n->get_long() == 255);
4593   match(ConL);
4594 
4595   format %{ %}
4596   interface(CONST_INTER);
4597 %}
4598 
4599 // Constant for short-wide masking
4600 operand immL_65535()
4601 %{
4602   predicate(n->get_long() == 65535);
4603   match(ConL);
4604 
4605   format %{ %}
4606   interface(CONST_INTER);
4607 %}
4608 
4609 // Register Operands
4610 // Integer Register
4611 operand rRegI()
4612 %{
4613   constraint(ALLOC_IN_RC(int_reg));
4614   match(RegI);
4615 
4616   match(rax_RegI);
4617   match(rbx_RegI);
4618   match(rcx_RegI);
4619   match(rdx_RegI);
4620   match(rdi_RegI);
4621 
4622   format %{ %}
4623   interface(REG_INTER);
4624 %}
4625 
4626 // Special Registers
4627 operand rax_RegI()
4628 %{
4629   constraint(ALLOC_IN_RC(int_rax_reg));
4630   match(RegI);
4631   match(rRegI);
4632 
4633   format %{ "RAX" %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 // Special Registers
4638 operand rbx_RegI()
4639 %{
4640   constraint(ALLOC_IN_RC(int_rbx_reg));
4641   match(RegI);
4642   match(rRegI);
4643 
4644   format %{ "RBX" %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 operand rcx_RegI()
4649 %{
4650   constraint(ALLOC_IN_RC(int_rcx_reg));
4651   match(RegI);
4652   match(rRegI);
4653 
4654   format %{ "RCX" %}
4655   interface(REG_INTER);
4656 %}
4657 
4658 operand rdx_RegI()
4659 %{
4660   constraint(ALLOC_IN_RC(int_rdx_reg));
4661   match(RegI);
4662   match(rRegI);
4663 
4664   format %{ "RDX" %}
4665   interface(REG_INTER);
4666 %}
4667 
4668 operand rdi_RegI()
4669 %{
4670   constraint(ALLOC_IN_RC(int_rdi_reg));
4671   match(RegI);
4672   match(rRegI);
4673 
4674   format %{ "RDI" %}
4675   interface(REG_INTER);
4676 %}
4677 
4678 operand no_rcx_RegI()
4679 %{
4680   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4681   match(RegI);
4682   match(rax_RegI);
4683   match(rbx_RegI);
4684   match(rdx_RegI);
4685   match(rdi_RegI);
4686 
4687   format %{ %}
4688   interface(REG_INTER);
4689 %}
4690 
4691 operand no_rax_rdx_RegI()
4692 %{
4693   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4694   match(RegI);
4695   match(rbx_RegI);
4696   match(rcx_RegI);
4697   match(rdi_RegI);
4698 
4699   format %{ %}
4700   interface(REG_INTER);
4701 %}
4702 
4703 // Pointer Register
4704 operand any_RegP()
4705 %{
4706   constraint(ALLOC_IN_RC(any_reg));
4707   match(RegP);
4708   match(rax_RegP);
4709   match(rbx_RegP);
4710   match(rdi_RegP);
4711   match(rsi_RegP);
4712   match(rbp_RegP);
4713   match(r15_RegP);
4714   match(rRegP);
4715 
4716   format %{ %}
4717   interface(REG_INTER);
4718 %}
4719 
4720 operand rRegP()
4721 %{
4722   constraint(ALLOC_IN_RC(ptr_reg));
4723   match(RegP);
4724   match(rax_RegP);
4725   match(rbx_RegP);
4726   match(rdi_RegP);
4727   match(rsi_RegP);
4728   match(rbp_RegP);
4729   match(r15_RegP);  // See Q&A below about r15_RegP.
4730 
4731   format %{ %}
4732   interface(REG_INTER);
4733 %}
4734 
4735 operand rRegN() %{
4736   constraint(ALLOC_IN_RC(int_reg));
4737   match(RegN);
4738 
4739   format %{ %}
4740   interface(REG_INTER);
4741 %}
4742 
4743 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4744 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4745 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4746 // The output of an instruction is controlled by the allocator, which respects
4747 // register class masks, not match rules.  Unless an instruction mentions
4748 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4749 // by the allocator as an input.
4750 
4751 operand no_rax_RegP()
4752 %{
4753   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4754   match(RegP);
4755   match(rbx_RegP);
4756   match(rsi_RegP);
4757   match(rdi_RegP);
4758 
4759   format %{ %}
4760   interface(REG_INTER);
4761 %}
4762 
4763 operand no_rbp_RegP()
4764 %{
4765   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4766   match(RegP);
4767   match(rbx_RegP);
4768   match(rsi_RegP);
4769   match(rdi_RegP);
4770 
4771   format %{ %}
4772   interface(REG_INTER);
4773 %}
4774 
4775 operand no_rax_rbx_RegP()
4776 %{
4777   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4778   match(RegP);
4779   match(rsi_RegP);
4780   match(rdi_RegP);
4781 
4782   format %{ %}
4783   interface(REG_INTER);
4784 %}
4785 
4786 // Special Registers
4787 // Return a pointer value
4788 operand rax_RegP()
4789 %{
4790   constraint(ALLOC_IN_RC(ptr_rax_reg));
4791   match(RegP);
4792   match(rRegP);
4793 
4794   format %{ %}
4795   interface(REG_INTER);
4796 %}
4797 
4798 // Special Registers
4799 // Return a compressed pointer value
4800 operand rax_RegN()
4801 %{
4802   constraint(ALLOC_IN_RC(int_rax_reg));
4803   match(RegN);
4804   match(rRegN);
4805 
4806   format %{ %}
4807   interface(REG_INTER);
4808 %}
4809 
4810 // Used in AtomicAdd
4811 operand rbx_RegP()
4812 %{
4813   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4814   match(RegP);
4815   match(rRegP);
4816 
4817   format %{ %}
4818   interface(REG_INTER);
4819 %}
4820 
4821 operand rsi_RegP()
4822 %{
4823   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4824   match(RegP);
4825   match(rRegP);
4826 
4827   format %{ %}
4828   interface(REG_INTER);
4829 %}
4830 
4831 // Used in rep stosq
4832 operand rdi_RegP()
4833 %{
4834   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4835   match(RegP);
4836   match(rRegP);
4837 
4838   format %{ %}
4839   interface(REG_INTER);
4840 %}
4841 
4842 operand rbp_RegP()
4843 %{
4844   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4845   match(RegP);
4846   match(rRegP);
4847 
4848   format %{ %}
4849   interface(REG_INTER);
4850 %}
4851 
4852 operand r15_RegP()
4853 %{
4854   constraint(ALLOC_IN_RC(ptr_r15_reg));
4855   match(RegP);
4856   match(rRegP);
4857 
4858   format %{ %}
4859   interface(REG_INTER);
4860 %}
4861 
4862 operand rRegL()
4863 %{
4864   constraint(ALLOC_IN_RC(long_reg));
4865   match(RegL);
4866   match(rax_RegL);
4867   match(rdx_RegL);
4868 
4869   format %{ %}
4870   interface(REG_INTER);
4871 %}
4872 
4873 // Special Registers
4874 operand no_rax_rdx_RegL()
4875 %{
4876   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4877   match(RegL);
4878   match(rRegL);
4879 
4880   format %{ %}
4881   interface(REG_INTER);
4882 %}
4883 
4884 operand no_rax_RegL()
4885 %{
4886   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4887   match(RegL);
4888   match(rRegL);
4889   match(rdx_RegL);
4890 
4891   format %{ %}
4892   interface(REG_INTER);
4893 %}
4894 
4895 operand no_rcx_RegL()
4896 %{
4897   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4898   match(RegL);
4899   match(rRegL);
4900 
4901   format %{ %}
4902   interface(REG_INTER);
4903 %}
4904 
4905 operand rax_RegL()
4906 %{
4907   constraint(ALLOC_IN_RC(long_rax_reg));
4908   match(RegL);
4909   match(rRegL);
4910 
4911   format %{ "RAX" %}
4912   interface(REG_INTER);
4913 %}
4914 
4915 operand rcx_RegL()
4916 %{
4917   constraint(ALLOC_IN_RC(long_rcx_reg));
4918   match(RegL);
4919   match(rRegL);
4920 
4921   format %{ %}
4922   interface(REG_INTER);
4923 %}
4924 
4925 operand rdx_RegL()
4926 %{
4927   constraint(ALLOC_IN_RC(long_rdx_reg));
4928   match(RegL);
4929   match(rRegL);
4930 
4931   format %{ %}
4932   interface(REG_INTER);
4933 %}
4934 
4935 // Flags register, used as output of compare instructions
4936 operand rFlagsReg()
4937 %{
4938   constraint(ALLOC_IN_RC(int_flags));
4939   match(RegFlags);
4940 
4941   format %{ "RFLAGS" %}
4942   interface(REG_INTER);
4943 %}
4944 
4945 // Flags register, used as output of FLOATING POINT compare instructions
4946 operand rFlagsRegU()
4947 %{
4948   constraint(ALLOC_IN_RC(int_flags));
4949   match(RegFlags);
4950 
4951   format %{ "RFLAGS_U" %}
4952   interface(REG_INTER);
4953 %}
4954 
4955 operand rFlagsRegUCF() %{
4956   constraint(ALLOC_IN_RC(int_flags));
4957   match(RegFlags);
4958   predicate(false);
4959 
4960   format %{ "RFLAGS_U_CF" %}
4961   interface(REG_INTER);
4962 %}
4963 
4964 // Float register operands
4965 operand regF()
4966 %{
4967   constraint(ALLOC_IN_RC(float_reg));
4968   match(RegF);
4969 
4970   format %{ %}
4971   interface(REG_INTER);
4972 %}
4973 
4974 // Double register operands
4975 operand regD() 
4976 %{
4977   constraint(ALLOC_IN_RC(double_reg));
4978   match(RegD);
4979 
4980   format %{ %}
4981   interface(REG_INTER);
4982 %}
4983 
4984 
4985 //----------Memory Operands----------------------------------------------------
4986 // Direct Memory Operand
4987 // operand direct(immP addr)
4988 // %{
4989 //   match(addr);
4990 
4991 //   format %{ "[$addr]" %}
4992 //   interface(MEMORY_INTER) %{
4993 //     base(0xFFFFFFFF);
4994 //     index(0x4);
4995 //     scale(0x0);
4996 //     disp($addr);
4997 //   %}
4998 // %}
4999 
5000 // Indirect Memory Operand
5001 operand indirect(any_RegP reg)
5002 %{
5003   constraint(ALLOC_IN_RC(ptr_reg));
5004   match(reg);
5005 
5006   format %{ "[$reg]" %}
5007   interface(MEMORY_INTER) %{
5008     base($reg);
5009     index(0x4);
5010     scale(0x0);
5011     disp(0x0);
5012   %}
5013 %}
5014 
5015 // Indirect Memory Plus Short Offset Operand
5016 operand indOffset8(any_RegP reg, immL8 off)
5017 %{
5018   constraint(ALLOC_IN_RC(ptr_reg));
5019   match(AddP reg off);
5020 
5021   format %{ "[$reg + $off (8-bit)]" %}
5022   interface(MEMORY_INTER) %{
5023     base($reg);
5024     index(0x4);
5025     scale(0x0);
5026     disp($off);
5027   %}
5028 %}
5029 
5030 // Indirect Memory Plus Long Offset Operand
5031 operand indOffset32(any_RegP reg, immL32 off)
5032 %{
5033   constraint(ALLOC_IN_RC(ptr_reg));
5034   match(AddP reg off);
5035 
5036   format %{ "[$reg + $off (32-bit)]" %}
5037   interface(MEMORY_INTER) %{
5038     base($reg);
5039     index(0x4);
5040     scale(0x0);
5041     disp($off);
5042   %}
5043 %}
5044 
5045 // Indirect Memory Plus Index Register Plus Offset Operand
5046 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5047 %{
5048   constraint(ALLOC_IN_RC(ptr_reg));
5049   match(AddP (AddP reg lreg) off);
5050 
5051   op_cost(10);
5052   format %{"[$reg + $off + $lreg]" %}
5053   interface(MEMORY_INTER) %{
5054     base($reg);
5055     index($lreg);
5056     scale(0x0);
5057     disp($off);
5058   %}
5059 %}
5060 
5061 // Indirect Memory Plus Index Register Plus Offset Operand
5062 operand indIndex(any_RegP reg, rRegL lreg)
5063 %{
5064   constraint(ALLOC_IN_RC(ptr_reg));
5065   match(AddP reg lreg);
5066 
5067   op_cost(10);
5068   format %{"[$reg + $lreg]" %}
5069   interface(MEMORY_INTER) %{
5070     base($reg);
5071     index($lreg);
5072     scale(0x0);
5073     disp(0x0);
5074   %}
5075 %}
5076 
5077 // Indirect Memory Times Scale Plus Index Register
5078 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5079 %{
5080   constraint(ALLOC_IN_RC(ptr_reg));
5081   match(AddP reg (LShiftL lreg scale));
5082 
5083   op_cost(10);
5084   format %{"[$reg + $lreg << $scale]" %}
5085   interface(MEMORY_INTER) %{
5086     base($reg);
5087     index($lreg);
5088     scale($scale);
5089     disp(0x0);
5090   %}
5091 %}
5092 
5093 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5094 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5095 %{
5096   constraint(ALLOC_IN_RC(ptr_reg));
5097   match(AddP (AddP reg (LShiftL lreg scale)) off);
5098 
5099   op_cost(10);
5100   format %{"[$reg + $off + $lreg << $scale]" %}
5101   interface(MEMORY_INTER) %{
5102     base($reg);
5103     index($lreg);
5104     scale($scale);
5105     disp($off);
5106   %}
5107 %}
5108 
5109 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5110 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5111 %{
5112   constraint(ALLOC_IN_RC(ptr_reg));
5113   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5114   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5115 
5116   op_cost(10);
5117   format %{"[$reg + $off + $idx << $scale]" %}
5118   interface(MEMORY_INTER) %{
5119     base($reg);
5120     index($idx);
5121     scale($scale);
5122     disp($off);
5123   %}
5124 %}
5125 
5126 // Indirect Narrow Oop Plus Offset Operand
5127 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5128 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5129 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5130   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5131   constraint(ALLOC_IN_RC(ptr_reg));
5132   match(AddP (DecodeN reg) off);
5133 
5134   op_cost(10);
5135   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5136   interface(MEMORY_INTER) %{
5137     base(0xc); // R12
5138     index($reg);
5139     scale(0x3);
5140     disp($off);
5141   %}
5142 %}
5143 
5144 // Indirect Memory Operand
5145 operand indirectNarrow(rRegN reg)
5146 %{
5147   predicate(Universe::narrow_oop_shift() == 0);
5148   constraint(ALLOC_IN_RC(ptr_reg));
5149   match(DecodeN reg);
5150 
5151   format %{ "[$reg]" %}
5152   interface(MEMORY_INTER) %{
5153     base($reg);
5154     index(0x4);
5155     scale(0x0);
5156     disp(0x0);
5157   %}
5158 %}
5159 
5160 // Indirect Memory Plus Short Offset Operand
5161 operand indOffset8Narrow(rRegN reg, immL8 off)
5162 %{
5163   predicate(Universe::narrow_oop_shift() == 0);
5164   constraint(ALLOC_IN_RC(ptr_reg));
5165   match(AddP (DecodeN reg) off);
5166 
5167   format %{ "[$reg + $off (8-bit)]" %}
5168   interface(MEMORY_INTER) %{
5169     base($reg);
5170     index(0x4);
5171     scale(0x0);
5172     disp($off);
5173   %}
5174 %}
5175 
5176 // Indirect Memory Plus Long Offset Operand
5177 operand indOffset32Narrow(rRegN reg, immL32 off)
5178 %{
5179   predicate(Universe::narrow_oop_shift() == 0);
5180   constraint(ALLOC_IN_RC(ptr_reg));
5181   match(AddP (DecodeN reg) off);
5182 
5183   format %{ "[$reg + $off (32-bit)]" %}
5184   interface(MEMORY_INTER) %{
5185     base($reg);
5186     index(0x4);
5187     scale(0x0);
5188     disp($off);
5189   %}
5190 %}
5191 
5192 // Indirect Memory Plus Index Register Plus Offset Operand
5193 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5194 %{
5195   predicate(Universe::narrow_oop_shift() == 0);
5196   constraint(ALLOC_IN_RC(ptr_reg));
5197   match(AddP (AddP (DecodeN reg) lreg) off);
5198 
5199   op_cost(10);
5200   format %{"[$reg + $off + $lreg]" %}
5201   interface(MEMORY_INTER) %{
5202     base($reg);
5203     index($lreg);
5204     scale(0x0);
5205     disp($off);
5206   %}
5207 %}
5208 
5209 // Indirect Memory Plus Index Register Plus Offset Operand
5210 operand indIndexNarrow(rRegN reg, rRegL lreg)
5211 %{
5212   predicate(Universe::narrow_oop_shift() == 0);
5213   constraint(ALLOC_IN_RC(ptr_reg));
5214   match(AddP (DecodeN reg) lreg);
5215 
5216   op_cost(10);
5217   format %{"[$reg + $lreg]" %}
5218   interface(MEMORY_INTER) %{
5219     base($reg);
5220     index($lreg);
5221     scale(0x0);
5222     disp(0x0);
5223   %}
5224 %}
5225 
5226 // Indirect Memory Times Scale Plus Index Register
5227 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5228 %{
5229   predicate(Universe::narrow_oop_shift() == 0);
5230   constraint(ALLOC_IN_RC(ptr_reg));
5231   match(AddP (DecodeN reg) (LShiftL lreg scale));
5232 
5233   op_cost(10);
5234   format %{"[$reg + $lreg << $scale]" %}
5235   interface(MEMORY_INTER) %{
5236     base($reg);
5237     index($lreg);
5238     scale($scale);
5239     disp(0x0);
5240   %}
5241 %}
5242 
5243 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5244 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5245 %{
5246   predicate(Universe::narrow_oop_shift() == 0);
5247   constraint(ALLOC_IN_RC(ptr_reg));
5248   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5249 
5250   op_cost(10);
5251   format %{"[$reg + $off + $lreg << $scale]" %}
5252   interface(MEMORY_INTER) %{
5253     base($reg);
5254     index($lreg);
5255     scale($scale);
5256     disp($off);
5257   %}
5258 %}
5259 
5260 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5261 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5262 %{
5263   constraint(ALLOC_IN_RC(ptr_reg));
5264   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5265   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5266 
5267   op_cost(10);
5268   format %{"[$reg + $off + $idx << $scale]" %}
5269   interface(MEMORY_INTER) %{
5270     base($reg);
5271     index($idx);
5272     scale($scale);
5273     disp($off);
5274   %}
5275 %}
5276 
5277 
5278 //----------Special Memory Operands--------------------------------------------
5279 // Stack Slot Operand - This operand is used for loading and storing temporary
5280 //                      values on the stack where a match requires a value to
5281 //                      flow through memory.
5282 operand stackSlotP(sRegP reg)
5283 %{
5284   constraint(ALLOC_IN_RC(stack_slots));
5285   // No match rule because this operand is only generated in matching
5286 
5287   format %{ "[$reg]" %}
5288   interface(MEMORY_INTER) %{
5289     base(0x4);   // RSP
5290     index(0x4);  // No Index
5291     scale(0x0);  // No Scale
5292     disp($reg);  // Stack Offset
5293   %}
5294 %}
5295 
5296 operand stackSlotI(sRegI reg)
5297 %{
5298   constraint(ALLOC_IN_RC(stack_slots));
5299   // No match rule because this operand is only generated in matching
5300 
5301   format %{ "[$reg]" %}
5302   interface(MEMORY_INTER) %{
5303     base(0x4);   // RSP
5304     index(0x4);  // No Index
5305     scale(0x0);  // No Scale
5306     disp($reg);  // Stack Offset
5307   %}
5308 %}
5309 
5310 operand stackSlotF(sRegF reg)
5311 %{
5312   constraint(ALLOC_IN_RC(stack_slots));
5313   // No match rule because this operand is only generated in matching
5314 
5315   format %{ "[$reg]" %}
5316   interface(MEMORY_INTER) %{
5317     base(0x4);   // RSP
5318     index(0x4);  // No Index
5319     scale(0x0);  // No Scale
5320     disp($reg);  // Stack Offset
5321   %}
5322 %}
5323 
5324 operand stackSlotD(sRegD reg)
5325 %{
5326   constraint(ALLOC_IN_RC(stack_slots));
5327   // No match rule because this operand is only generated in matching
5328 
5329   format %{ "[$reg]" %}
5330   interface(MEMORY_INTER) %{
5331     base(0x4);   // RSP
5332     index(0x4);  // No Index
5333     scale(0x0);  // No Scale
5334     disp($reg);  // Stack Offset
5335   %}
5336 %}
5337 operand stackSlotL(sRegL reg)
5338 %{
5339   constraint(ALLOC_IN_RC(stack_slots));
5340   // No match rule because this operand is only generated in matching
5341 
5342   format %{ "[$reg]" %}
5343   interface(MEMORY_INTER) %{
5344     base(0x4);   // RSP
5345     index(0x4);  // No Index
5346     scale(0x0);  // No Scale
5347     disp($reg);  // Stack Offset
5348   %}
5349 %}
5350 
5351 //----------Conditional Branch Operands----------------------------------------
5352 // Comparison Op  - This is the operation of the comparison, and is limited to
5353 //                  the following set of codes:
5354 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5355 //
5356 // Other attributes of the comparison, such as unsignedness, are specified
5357 // by the comparison instruction that sets a condition code flags register.
5358 // That result is represented by a flags operand whose subtype is appropriate
5359 // to the unsignedness (etc.) of the comparison.
5360 //
5361 // Later, the instruction which matches both the Comparison Op (a Bool) and
5362 // the flags (produced by the Cmp) specifies the coding of the comparison op
5363 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5364 
5365 // Comparision Code
5366 operand cmpOp()
5367 %{
5368   match(Bool);
5369 
5370   format %{ "" %}
5371   interface(COND_INTER) %{
5372     equal(0x4, "e");
5373     not_equal(0x5, "ne");
5374     less(0xC, "l");
5375     greater_equal(0xD, "ge");
5376     less_equal(0xE, "le");
5377     greater(0xF, "g");
5378   %}
5379 %}
5380 
5381 // Comparison Code, unsigned compare.  Used by FP also, with
5382 // C2 (unordered) turned into GT or LT already.  The other bits
5383 // C0 and C3 are turned into Carry & Zero flags.
5384 operand cmpOpU()
5385 %{
5386   match(Bool);
5387 
5388   format %{ "" %}
5389   interface(COND_INTER) %{
5390     equal(0x4, "e");
5391     not_equal(0x5, "ne");
5392     less(0x2, "b");
5393     greater_equal(0x3, "nb");
5394     less_equal(0x6, "be");
5395     greater(0x7, "nbe");
5396   %}
5397 %}
5398 
5399 
5400 // Floating comparisons that don't require any fixup for the unordered case
5401 operand cmpOpUCF() %{
5402   match(Bool);
5403   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5404             n->as_Bool()->_test._test == BoolTest::ge ||
5405             n->as_Bool()->_test._test == BoolTest::le ||
5406             n->as_Bool()->_test._test == BoolTest::gt);
5407   format %{ "" %}
5408   interface(COND_INTER) %{
5409     equal(0x4, "e");
5410     not_equal(0x5, "ne");
5411     less(0x2, "b");
5412     greater_equal(0x3, "nb");
5413     less_equal(0x6, "be");
5414     greater(0x7, "nbe");
5415   %}
5416 %}
5417 
5418 
5419 // Floating comparisons that can be fixed up with extra conditional jumps
5420 operand cmpOpUCF2() %{
5421   match(Bool);
5422   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5423             n->as_Bool()->_test._test == BoolTest::eq);
5424   format %{ "" %}
5425   interface(COND_INTER) %{
5426     equal(0x4, "e");
5427     not_equal(0x5, "ne");
5428     less(0x2, "b");
5429     greater_equal(0x3, "nb");
5430     less_equal(0x6, "be");
5431     greater(0x7, "nbe");
5432   %}
5433 %}
5434 
5435 
5436 //----------OPERAND CLASSES----------------------------------------------------
5437 // Operand Classes are groups of operands that are used as to simplify
5438 // instruction definitions by not requiring the AD writer to specify separate
5439 // instructions for every form of operand when the instruction accepts
5440 // multiple operand types with the same basic encoding and format.  The classic
5441 // case of this is memory operands.
5442 
5443 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5444                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5445                indCompressedOopOffset,
5446                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5447                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5448                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5449 
5450 //----------PIPELINE-----------------------------------------------------------
5451 // Rules which define the behavior of the target architectures pipeline.
5452 pipeline %{
5453 
5454 //----------ATTRIBUTES---------------------------------------------------------
5455 attributes %{
5456   variable_size_instructions;        // Fixed size instructions
5457   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5458   instruction_unit_size = 1;         // An instruction is 1 bytes long
5459   instruction_fetch_unit_size = 16;  // The processor fetches one line
5460   instruction_fetch_units = 1;       // of 16 bytes
5461 
5462   // List of nop instructions
5463   nops( MachNop );
5464 %}
5465 
5466 //----------RESOURCES----------------------------------------------------------
5467 // Resources are the functional units available to the machine
5468 
5469 // Generic P2/P3 pipeline
5470 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5471 // 3 instructions decoded per cycle.
5472 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5473 // 3 ALU op, only ALU0 handles mul instructions.
5474 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5475            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5476            BR, FPU,
5477            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5478 
5479 //----------PIPELINE DESCRIPTION-----------------------------------------------
5480 // Pipeline Description specifies the stages in the machine's pipeline
5481 
5482 // Generic P2/P3 pipeline
5483 pipe_desc(S0, S1, S2, S3, S4, S5);
5484 
5485 //----------PIPELINE CLASSES---------------------------------------------------
5486 // Pipeline Classes describe the stages in which input and output are
5487 // referenced by the hardware pipeline.
5488 
5489 // Naming convention: ialu or fpu
5490 // Then: _reg
5491 // Then: _reg if there is a 2nd register
5492 // Then: _long if it's a pair of instructions implementing a long
5493 // Then: _fat if it requires the big decoder
5494 //   Or: _mem if it requires the big decoder and a memory unit.
5495 
5496 // Integer ALU reg operation
5497 pipe_class ialu_reg(rRegI dst)
5498 %{
5499     single_instruction;
5500     dst    : S4(write);
5501     dst    : S3(read);
5502     DECODE : S0;        // any decoder
5503     ALU    : S3;        // any alu
5504 %}
5505 
5506 // Long ALU reg operation
5507 pipe_class ialu_reg_long(rRegL dst)
5508 %{
5509     instruction_count(2);
5510     dst    : S4(write);
5511     dst    : S3(read);
5512     DECODE : S0(2);     // any 2 decoders
5513     ALU    : S3(2);     // both alus
5514 %}
5515 
5516 // Integer ALU reg operation using big decoder
5517 pipe_class ialu_reg_fat(rRegI dst)
5518 %{
5519     single_instruction;
5520     dst    : S4(write);
5521     dst    : S3(read);
5522     D0     : S0;        // big decoder only
5523     ALU    : S3;        // any alu
5524 %}
5525 
5526 // Long ALU reg operation using big decoder
5527 pipe_class ialu_reg_long_fat(rRegL dst)
5528 %{
5529     instruction_count(2);
5530     dst    : S4(write);
5531     dst    : S3(read);
5532     D0     : S0(2);     // big decoder only; twice
5533     ALU    : S3(2);     // any 2 alus
5534 %}
5535 
5536 // Integer ALU reg-reg operation
5537 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5538 %{
5539     single_instruction;
5540     dst    : S4(write);
5541     src    : S3(read);
5542     DECODE : S0;        // any decoder
5543     ALU    : S3;        // any alu
5544 %}
5545 
5546 // Long ALU reg-reg operation
5547 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5548 %{
5549     instruction_count(2);
5550     dst    : S4(write);
5551     src    : S3(read);
5552     DECODE : S0(2);     // any 2 decoders
5553     ALU    : S3(2);     // both alus
5554 %}
5555 
5556 // Integer ALU reg-reg operation
5557 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5558 %{
5559     single_instruction;
5560     dst    : S4(write);
5561     src    : S3(read);
5562     D0     : S0;        // big decoder only
5563     ALU    : S3;        // any alu
5564 %}
5565 
5566 // Long ALU reg-reg operation
5567 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5568 %{
5569     instruction_count(2);
5570     dst    : S4(write);
5571     src    : S3(read);
5572     D0     : S0(2);     // big decoder only; twice
5573     ALU    : S3(2);     // both alus
5574 %}
5575 
5576 // Integer ALU reg-mem operation
5577 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5578 %{
5579     single_instruction;
5580     dst    : S5(write);
5581     mem    : S3(read);
5582     D0     : S0;        // big decoder only
5583     ALU    : S4;        // any alu
5584     MEM    : S3;        // any mem
5585 %}
5586 
5587 // Integer mem operation (prefetch)
5588 pipe_class ialu_mem(memory mem)
5589 %{
5590     single_instruction;
5591     mem    : S3(read);
5592     D0     : S0;        // big decoder only
5593     MEM    : S3;        // any mem
5594 %}
5595 
5596 // Integer Store to Memory
5597 pipe_class ialu_mem_reg(memory mem, rRegI src)
5598 %{
5599     single_instruction;
5600     mem    : S3(read);
5601     src    : S5(read);
5602     D0     : S0;        // big decoder only
5603     ALU    : S4;        // any alu
5604     MEM    : S3;
5605 %}
5606 
5607 // // Long Store to Memory
5608 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5609 // %{
5610 //     instruction_count(2);
5611 //     mem    : S3(read);
5612 //     src    : S5(read);
5613 //     D0     : S0(2);          // big decoder only; twice
5614 //     ALU    : S4(2);     // any 2 alus
5615 //     MEM    : S3(2);  // Both mems
5616 // %}
5617 
5618 // Integer Store to Memory
5619 pipe_class ialu_mem_imm(memory mem)
5620 %{
5621     single_instruction;
5622     mem    : S3(read);
5623     D0     : S0;        // big decoder only
5624     ALU    : S4;        // any alu
5625     MEM    : S3;
5626 %}
5627 
5628 // Integer ALU0 reg-reg operation
5629 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5630 %{
5631     single_instruction;
5632     dst    : S4(write);
5633     src    : S3(read);
5634     D0     : S0;        // Big decoder only
5635     ALU0   : S3;        // only alu0
5636 %}
5637 
5638 // Integer ALU0 reg-mem operation
5639 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5640 %{
5641     single_instruction;
5642     dst    : S5(write);
5643     mem    : S3(read);
5644     D0     : S0;        // big decoder only
5645     ALU0   : S4;        // ALU0 only
5646     MEM    : S3;        // any mem
5647 %}
5648 
5649 // Integer ALU reg-reg operation
5650 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5651 %{
5652     single_instruction;
5653     cr     : S4(write);
5654     src1   : S3(read);
5655     src2   : S3(read);
5656     DECODE : S0;        // any decoder
5657     ALU    : S3;        // any alu
5658 %}
5659 
5660 // Integer ALU reg-imm operation
5661 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5662 %{
5663     single_instruction;
5664     cr     : S4(write);
5665     src1   : S3(read);
5666     DECODE : S0;        // any decoder
5667     ALU    : S3;        // any alu
5668 %}
5669 
5670 // Integer ALU reg-mem operation
5671 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5672 %{
5673     single_instruction;
5674     cr     : S4(write);
5675     src1   : S3(read);
5676     src2   : S3(read);
5677     D0     : S0;        // big decoder only
5678     ALU    : S4;        // any alu
5679     MEM    : S3;
5680 %}
5681 
5682 // Conditional move reg-reg
5683 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5684 %{
5685     instruction_count(4);
5686     y      : S4(read);
5687     q      : S3(read);
5688     p      : S3(read);
5689     DECODE : S0(4);     // any decoder
5690 %}
5691 
5692 // Conditional move reg-reg
5693 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5694 %{
5695     single_instruction;
5696     dst    : S4(write);
5697     src    : S3(read);
5698     cr     : S3(read);
5699     DECODE : S0;        // any decoder
5700 %}
5701 
5702 // Conditional move reg-mem
5703 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5704 %{
5705     single_instruction;
5706     dst    : S4(write);
5707     src    : S3(read);
5708     cr     : S3(read);
5709     DECODE : S0;        // any decoder
5710     MEM    : S3;
5711 %}
5712 
5713 // Conditional move reg-reg long
5714 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5715 %{
5716     single_instruction;
5717     dst    : S4(write);
5718     src    : S3(read);
5719     cr     : S3(read);
5720     DECODE : S0(2);     // any 2 decoders
5721 %}
5722 
5723 // XXX
5724 // // Conditional move double reg-reg
5725 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5726 // %{
5727 //     single_instruction;
5728 //     dst    : S4(write);
5729 //     src    : S3(read);
5730 //     cr     : S3(read);
5731 //     DECODE : S0;     // any decoder
5732 // %}
5733 
5734 // Float reg-reg operation
5735 pipe_class fpu_reg(regD dst)
5736 %{
5737     instruction_count(2);
5738     dst    : S3(read);
5739     DECODE : S0(2);     // any 2 decoders
5740     FPU    : S3;
5741 %}
5742 
5743 // Float reg-reg operation
5744 pipe_class fpu_reg_reg(regD dst, regD src)
5745 %{
5746     instruction_count(2);
5747     dst    : S4(write);
5748     src    : S3(read);
5749     DECODE : S0(2);     // any 2 decoders
5750     FPU    : S3;
5751 %}
5752 
5753 // Float reg-reg operation
5754 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5755 %{
5756     instruction_count(3);
5757     dst    : S4(write);
5758     src1   : S3(read);
5759     src2   : S3(read);
5760     DECODE : S0(3);     // any 3 decoders
5761     FPU    : S3(2);
5762 %}
5763 
5764 // Float reg-reg operation
5765 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5766 %{
5767     instruction_count(4);
5768     dst    : S4(write);
5769     src1   : S3(read);
5770     src2   : S3(read);
5771     src3   : S3(read);
5772     DECODE : S0(4);     // any 3 decoders
5773     FPU    : S3(2);
5774 %}
5775 
5776 // Float reg-reg operation
5777 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5778 %{
5779     instruction_count(4);
5780     dst    : S4(write);
5781     src1   : S3(read);
5782     src2   : S3(read);
5783     src3   : S3(read);
5784     DECODE : S1(3);     // any 3 decoders
5785     D0     : S0;        // Big decoder only
5786     FPU    : S3(2);
5787     MEM    : S3;
5788 %}
5789 
5790 // Float reg-mem operation
5791 pipe_class fpu_reg_mem(regD dst, memory mem)
5792 %{
5793     instruction_count(2);
5794     dst    : S5(write);
5795     mem    : S3(read);
5796     D0     : S0;        // big decoder only
5797     DECODE : S1;        // any decoder for FPU POP
5798     FPU    : S4;
5799     MEM    : S3;        // any mem
5800 %}
5801 
5802 // Float reg-mem operation
5803 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5804 %{
5805     instruction_count(3);
5806     dst    : S5(write);
5807     src1   : S3(read);
5808     mem    : S3(read);
5809     D0     : S0;        // big decoder only
5810     DECODE : S1(2);     // any decoder for FPU POP
5811     FPU    : S4;
5812     MEM    : S3;        // any mem
5813 %}
5814 
5815 // Float mem-reg operation
5816 pipe_class fpu_mem_reg(memory mem, regD src)
5817 %{
5818     instruction_count(2);
5819     src    : S5(read);
5820     mem    : S3(read);
5821     DECODE : S0;        // any decoder for FPU PUSH
5822     D0     : S1;        // big decoder only
5823     FPU    : S4;
5824     MEM    : S3;        // any mem
5825 %}
5826 
5827 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5828 %{
5829     instruction_count(3);
5830     src1   : S3(read);
5831     src2   : S3(read);
5832     mem    : S3(read);
5833     DECODE : S0(2);     // any decoder for FPU PUSH
5834     D0     : S1;        // big decoder only
5835     FPU    : S4;
5836     MEM    : S3;        // any mem
5837 %}
5838 
5839 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5840 %{
5841     instruction_count(3);
5842     src1   : S3(read);
5843     src2   : S3(read);
5844     mem    : S4(read);
5845     DECODE : S0;        // any decoder for FPU PUSH
5846     D0     : S0(2);     // big decoder only
5847     FPU    : S4;
5848     MEM    : S3(2);     // any mem
5849 %}
5850 
5851 pipe_class fpu_mem_mem(memory dst, memory src1)
5852 %{
5853     instruction_count(2);
5854     src1   : S3(read);
5855     dst    : S4(read);
5856     D0     : S0(2);     // big decoder only
5857     MEM    : S3(2);     // any mem
5858 %}
5859 
5860 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5861 %{
5862     instruction_count(3);
5863     src1   : S3(read);
5864     src2   : S3(read);
5865     dst    : S4(read);
5866     D0     : S0(3);     // big decoder only
5867     FPU    : S4;
5868     MEM    : S3(3);     // any mem
5869 %}
5870 
5871 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5872 %{
5873     instruction_count(3);
5874     src1   : S4(read);
5875     mem    : S4(read);
5876     DECODE : S0;        // any decoder for FPU PUSH
5877     D0     : S0(2);     // big decoder only
5878     FPU    : S4;
5879     MEM    : S3(2);     // any mem
5880 %}
5881 
5882 // Float load constant
5883 pipe_class fpu_reg_con(regD dst)
5884 %{
5885     instruction_count(2);
5886     dst    : S5(write);
5887     D0     : S0;        // big decoder only for the load
5888     DECODE : S1;        // any decoder for FPU POP
5889     FPU    : S4;
5890     MEM    : S3;        // any mem
5891 %}
5892 
5893 // Float load constant
5894 pipe_class fpu_reg_reg_con(regD dst, regD src)
5895 %{
5896     instruction_count(3);
5897     dst    : S5(write);
5898     src    : S3(read);
5899     D0     : S0;        // big decoder only for the load
5900     DECODE : S1(2);     // any decoder for FPU POP
5901     FPU    : S4;
5902     MEM    : S3;        // any mem
5903 %}
5904 
5905 // UnConditional branch
5906 pipe_class pipe_jmp(label labl)
5907 %{
5908     single_instruction;
5909     BR   : S3;
5910 %}
5911 
5912 // Conditional branch
5913 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5914 %{
5915     single_instruction;
5916     cr    : S1(read);
5917     BR    : S3;
5918 %}
5919 
5920 // Allocation idiom
5921 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5922 %{
5923     instruction_count(1); force_serialization;
5924     fixed_latency(6);
5925     heap_ptr : S3(read);
5926     DECODE   : S0(3);
5927     D0       : S2;
5928     MEM      : S3;
5929     ALU      : S3(2);
5930     dst      : S5(write);
5931     BR       : S5;
5932 %}
5933 
5934 // Generic big/slow expanded idiom
5935 pipe_class pipe_slow()
5936 %{
5937     instruction_count(10); multiple_bundles; force_serialization;
5938     fixed_latency(100);
5939     D0  : S0(2);
5940     MEM : S3(2);
5941 %}
5942 
5943 // The real do-nothing guy
5944 pipe_class empty()
5945 %{
5946     instruction_count(0);
5947 %}
5948 
5949 // Define the class for the Nop node
5950 define
5951 %{
5952    MachNop = empty;
5953 %}
5954 
5955 %}
5956 
5957 //----------INSTRUCTIONS-------------------------------------------------------
5958 //
5959 // match      -- States which machine-independent subtree may be replaced
5960 //               by this instruction.
5961 // ins_cost   -- The estimated cost of this instruction is used by instruction
5962 //               selection to identify a minimum cost tree of machine
5963 //               instructions that matches a tree of machine-independent
5964 //               instructions.
5965 // format     -- A string providing the disassembly for this instruction.
5966 //               The value of an instruction's operand may be inserted
5967 //               by referring to it with a '$' prefix.
5968 // opcode     -- Three instruction opcodes may be provided.  These are referred
5969 //               to within an encode class as $primary, $secondary, and $tertiary
5970 //               rrspectively.  The primary opcode is commonly used to
5971 //               indicate the type of machine instruction, while secondary
5972 //               and tertiary are often used for prefix options or addressing
5973 //               modes.
5974 // ins_encode -- A list of encode classes with parameters. The encode class
5975 //               name must have been defined in an 'enc_class' specification
5976 //               in the encode section of the architecture description.
5977 
5978 
5979 //----------Load/Store/Move Instructions---------------------------------------
5980 //----------Load Instructions--------------------------------------------------
5981 
5982 // Load Byte (8 bit signed)
5983 instruct loadB(rRegI dst, memory mem)
5984 %{
5985   match(Set dst (LoadB mem));
5986 
5987   ins_cost(125);
5988   format %{ "movsbl  $dst, $mem\t# byte" %}
5989 
5990   ins_encode %{
5991     __ movsbl($dst$$Register, $mem$$Address);
5992   %}
5993 
5994   ins_pipe(ialu_reg_mem);
5995 %}
5996 
5997 // Load Byte (8 bit signed) into Long Register
5998 instruct loadB2L(rRegL dst, memory mem)
5999 %{
6000   match(Set dst (ConvI2L (LoadB mem)));
6001 
6002   ins_cost(125);
6003   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6004 
6005   ins_encode %{
6006     __ movsbq($dst$$Register, $mem$$Address);
6007   %}
6008 
6009   ins_pipe(ialu_reg_mem);
6010 %}
6011 
6012 // Load Unsigned Byte (8 bit UNsigned)
6013 instruct loadUB(rRegI dst, memory mem)
6014 %{
6015   match(Set dst (LoadUB mem));
6016 
6017   ins_cost(125);
6018   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6019 
6020   ins_encode %{
6021     __ movzbl($dst$$Register, $mem$$Address);
6022   %}
6023 
6024   ins_pipe(ialu_reg_mem);
6025 %}
6026 
6027 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6028 instruct loadUB2L(rRegL dst, memory mem)
6029 %{
6030   match(Set dst (ConvI2L (LoadUB mem)));
6031 
6032   ins_cost(125);
6033   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6034 
6035   ins_encode %{
6036     __ movzbq($dst$$Register, $mem$$Address);
6037   %}
6038 
6039   ins_pipe(ialu_reg_mem);
6040 %}
6041 
6042 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6043 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6044   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6045   effect(KILL cr);
6046 
6047   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6048             "andl    $dst, $mask" %}
6049   ins_encode %{
6050     Register Rdst = $dst$$Register;
6051     __ movzbq(Rdst, $mem$$Address);
6052     __ andl(Rdst, $mask$$constant);
6053   %}
6054   ins_pipe(ialu_reg_mem);
6055 %}
6056 
6057 // Load Short (16 bit signed)
6058 instruct loadS(rRegI dst, memory mem)
6059 %{
6060   match(Set dst (LoadS mem));
6061 
6062   ins_cost(125);
6063   format %{ "movswl $dst, $mem\t# short" %}
6064 
6065   ins_encode %{
6066     __ movswl($dst$$Register, $mem$$Address);
6067   %}
6068 
6069   ins_pipe(ialu_reg_mem);
6070 %}
6071 
6072 // Load Short (16 bit signed) to Byte (8 bit signed)
6073 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6074   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6075 
6076   ins_cost(125);
6077   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6078   ins_encode %{
6079     __ movsbl($dst$$Register, $mem$$Address);
6080   %}
6081   ins_pipe(ialu_reg_mem);
6082 %}
6083 
6084 // Load Short (16 bit signed) into Long Register
6085 instruct loadS2L(rRegL dst, memory mem)
6086 %{
6087   match(Set dst (ConvI2L (LoadS mem)));
6088 
6089   ins_cost(125);
6090   format %{ "movswq $dst, $mem\t# short -> long" %}
6091 
6092   ins_encode %{
6093     __ movswq($dst$$Register, $mem$$Address);
6094   %}
6095 
6096   ins_pipe(ialu_reg_mem);
6097 %}
6098 
6099 // Load Unsigned Short/Char (16 bit UNsigned)
6100 instruct loadUS(rRegI dst, memory mem)
6101 %{
6102   match(Set dst (LoadUS mem));
6103 
6104   ins_cost(125);
6105   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6106 
6107   ins_encode %{
6108     __ movzwl($dst$$Register, $mem$$Address);
6109   %}
6110 
6111   ins_pipe(ialu_reg_mem);
6112 %}
6113 
6114 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6115 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6116   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6117 
6118   ins_cost(125);
6119   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6120   ins_encode %{
6121     __ movsbl($dst$$Register, $mem$$Address);
6122   %}
6123   ins_pipe(ialu_reg_mem);
6124 %}
6125 
6126 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6127 instruct loadUS2L(rRegL dst, memory mem)
6128 %{
6129   match(Set dst (ConvI2L (LoadUS mem)));
6130 
6131   ins_cost(125);
6132   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6133 
6134   ins_encode %{
6135     __ movzwq($dst$$Register, $mem$$Address);
6136   %}
6137 
6138   ins_pipe(ialu_reg_mem);
6139 %}
6140 
6141 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6142 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6143   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6144 
6145   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6146   ins_encode %{
6147     __ movzbq($dst$$Register, $mem$$Address);
6148   %}
6149   ins_pipe(ialu_reg_mem);
6150 %}
6151 
6152 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6153 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6154   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6155   effect(KILL cr);
6156 
6157   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6158             "andl    $dst, $mask" %}
6159   ins_encode %{
6160     Register Rdst = $dst$$Register;
6161     __ movzwq(Rdst, $mem$$Address);
6162     __ andl(Rdst, $mask$$constant);
6163   %}
6164   ins_pipe(ialu_reg_mem);
6165 %}
6166 
6167 // Load Integer
6168 instruct loadI(rRegI dst, memory mem)
6169 %{
6170   match(Set dst (LoadI mem));
6171 
6172   ins_cost(125);
6173   format %{ "movl    $dst, $mem\t# int" %}
6174 
6175   ins_encode %{
6176     __ movl($dst$$Register, $mem$$Address);
6177   %}
6178 
6179   ins_pipe(ialu_reg_mem);
6180 %}
6181 
6182 // Load Integer (32 bit signed) to Byte (8 bit signed)
6183 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6184   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6185 
6186   ins_cost(125);
6187   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6188   ins_encode %{
6189     __ movsbl($dst$$Register, $mem$$Address);
6190   %}
6191   ins_pipe(ialu_reg_mem);
6192 %}
6193 
6194 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6195 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6196   match(Set dst (AndI (LoadI mem) mask));
6197 
6198   ins_cost(125);
6199   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6200   ins_encode %{
6201     __ movzbl($dst$$Register, $mem$$Address);
6202   %}
6203   ins_pipe(ialu_reg_mem);
6204 %}
6205 
6206 // Load Integer (32 bit signed) to Short (16 bit signed)
6207 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6208   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6209 
6210   ins_cost(125);
6211   format %{ "movswl  $dst, $mem\t# int -> short" %}
6212   ins_encode %{
6213     __ movswl($dst$$Register, $mem$$Address);
6214   %}
6215   ins_pipe(ialu_reg_mem);
6216 %}
6217 
6218 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6219 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6220   match(Set dst (AndI (LoadI mem) mask));
6221 
6222   ins_cost(125);
6223   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6224   ins_encode %{
6225     __ movzwl($dst$$Register, $mem$$Address);
6226   %}
6227   ins_pipe(ialu_reg_mem);
6228 %}
6229 
6230 // Load Integer into Long Register
6231 instruct loadI2L(rRegL dst, memory mem)
6232 %{
6233   match(Set dst (ConvI2L (LoadI mem)));
6234 
6235   ins_cost(125);
6236   format %{ "movslq  $dst, $mem\t# int -> long" %}
6237 
6238   ins_encode %{
6239     __ movslq($dst$$Register, $mem$$Address);
6240   %}
6241 
6242   ins_pipe(ialu_reg_mem);
6243 %}
6244 
6245 // Load Integer with mask 0xFF into Long Register
6246 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6247   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6248 
6249   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6250   ins_encode %{
6251     __ movzbq($dst$$Register, $mem$$Address);
6252   %}
6253   ins_pipe(ialu_reg_mem);
6254 %}
6255 
6256 // Load Integer with mask 0xFFFF into Long Register
6257 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6258   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6259 
6260   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6261   ins_encode %{
6262     __ movzwq($dst$$Register, $mem$$Address);
6263   %}
6264   ins_pipe(ialu_reg_mem);
6265 %}
6266 
6267 // Load Integer with a 32-bit mask into Long Register
6268 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6269   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6270   effect(KILL cr);
6271 
6272   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6273             "andl    $dst, $mask" %}
6274   ins_encode %{
6275     Register Rdst = $dst$$Register;
6276     __ movl(Rdst, $mem$$Address);
6277     __ andl(Rdst, $mask$$constant);
6278   %}
6279   ins_pipe(ialu_reg_mem);
6280 %}
6281 
6282 // Load Unsigned Integer into Long Register
6283 instruct loadUI2L(rRegL dst, memory mem)
6284 %{
6285   match(Set dst (LoadUI2L mem));
6286 
6287   ins_cost(125);
6288   format %{ "movl    $dst, $mem\t# uint -> long" %}
6289 
6290   ins_encode %{
6291     __ movl($dst$$Register, $mem$$Address);
6292   %}
6293 
6294   ins_pipe(ialu_reg_mem);
6295 %}
6296 
6297 // Load Long
6298 instruct loadL(rRegL dst, memory mem)
6299 %{
6300   match(Set dst (LoadL mem));
6301 
6302   ins_cost(125);
6303   format %{ "movq    $dst, $mem\t# long" %}
6304 
6305   ins_encode %{
6306     __ movq($dst$$Register, $mem$$Address);
6307   %}
6308 
6309   ins_pipe(ialu_reg_mem); // XXX
6310 %}
6311 
6312 // Load Range
6313 instruct loadRange(rRegI dst, memory mem)
6314 %{
6315   match(Set dst (LoadRange mem));
6316 
6317   ins_cost(125); // XXX
6318   format %{ "movl    $dst, $mem\t# range" %}
6319   opcode(0x8B);
6320   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6321   ins_pipe(ialu_reg_mem);
6322 %}
6323 
6324 // Load Pointer
6325 instruct loadP(rRegP dst, memory mem)
6326 %{
6327   match(Set dst (LoadP mem));
6328 
6329   ins_cost(125); // XXX
6330   format %{ "movq    $dst, $mem\t# ptr" %}
6331   opcode(0x8B);
6332   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6333   ins_pipe(ialu_reg_mem); // XXX
6334 %}
6335 
6336 // Load Compressed Pointer
6337 instruct loadN(rRegN dst, memory mem)
6338 %{
6339    match(Set dst (LoadN mem));
6340 
6341    ins_cost(125); // XXX
6342    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6343    ins_encode %{
6344      __ movl($dst$$Register, $mem$$Address);
6345    %}
6346    ins_pipe(ialu_reg_mem); // XXX
6347 %}
6348 
6349 
6350 // Load Klass Pointer
6351 instruct loadKlass(rRegP dst, memory mem)
6352 %{
6353   match(Set dst (LoadKlass mem));
6354 
6355   ins_cost(125); // XXX
6356   format %{ "movq    $dst, $mem\t# class" %}
6357   opcode(0x8B);
6358   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6359   ins_pipe(ialu_reg_mem); // XXX
6360 %}
6361 
6362 // Load narrow Klass Pointer
6363 instruct loadNKlass(rRegN dst, memory mem)
6364 %{
6365   match(Set dst (LoadNKlass mem));
6366 
6367   ins_cost(125); // XXX
6368   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6369   ins_encode %{
6370     __ movl($dst$$Register, $mem$$Address);
6371   %}
6372   ins_pipe(ialu_reg_mem); // XXX
6373 %}
6374 
6375 // Load Float
6376 instruct loadF(regF dst, memory mem)
6377 %{
6378   match(Set dst (LoadF mem));
6379 
6380   ins_cost(145); // XXX
6381   format %{ "movss   $dst, $mem\t# float" %}
6382   opcode(0xF3, 0x0F, 0x10);
6383   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6384   ins_pipe(pipe_slow); // XXX
6385 %}
6386 
6387 // Load Double
6388 instruct loadD_partial(regD dst, memory mem)
6389 %{
6390   predicate(!UseXmmLoadAndClearUpper);
6391   match(Set dst (LoadD mem));
6392 
6393   ins_cost(145); // XXX
6394   format %{ "movlpd  $dst, $mem\t# double" %}
6395   opcode(0x66, 0x0F, 0x12);
6396   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6397   ins_pipe(pipe_slow); // XXX
6398 %}
6399 
6400 instruct loadD(regD dst, memory mem)
6401 %{
6402   predicate(UseXmmLoadAndClearUpper);
6403   match(Set dst (LoadD mem));
6404 
6405   ins_cost(145); // XXX
6406   format %{ "movsd   $dst, $mem\t# double" %}
6407   opcode(0xF2, 0x0F, 0x10);
6408   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6409   ins_pipe(pipe_slow); // XXX
6410 %}
6411 
6412 // Load Aligned Packed Byte to XMM register
6413 instruct loadA8B(regD dst, memory mem) %{
6414   match(Set dst (Load8B mem));
6415   ins_cost(125);
6416   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6417   ins_encode( movq_ld(dst, mem));
6418   ins_pipe( pipe_slow );
6419 %}
6420 
6421 // Load Aligned Packed Short to XMM register
6422 instruct loadA4S(regD dst, memory mem) %{
6423   match(Set dst (Load4S mem));
6424   ins_cost(125);
6425   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6426   ins_encode( movq_ld(dst, mem));
6427   ins_pipe( pipe_slow );
6428 %}
6429 
6430 // Load Aligned Packed Char to XMM register
6431 instruct loadA4C(regD dst, memory mem) %{
6432   match(Set dst (Load4C mem));
6433   ins_cost(125);
6434   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6435   ins_encode( movq_ld(dst, mem));
6436   ins_pipe( pipe_slow );
6437 %}
6438 
6439 // Load Aligned Packed Integer to XMM register
6440 instruct load2IU(regD dst, memory mem) %{
6441   match(Set dst (Load2I mem));
6442   ins_cost(125);
6443   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6444   ins_encode( movq_ld(dst, mem));
6445   ins_pipe( pipe_slow );
6446 %}
6447 
6448 // Load Aligned Packed Single to XMM
6449 instruct loadA2F(regD dst, memory mem) %{
6450   match(Set dst (Load2F mem));
6451   ins_cost(145);
6452   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6453   ins_encode( movq_ld(dst, mem));
6454   ins_pipe( pipe_slow );
6455 %}
6456 
6457 // Load Effective Address
6458 instruct leaP8(rRegP dst, indOffset8 mem)
6459 %{
6460   match(Set dst mem);
6461 
6462   ins_cost(110); // XXX
6463   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6464   opcode(0x8D);
6465   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6466   ins_pipe(ialu_reg_reg_fat);
6467 %}
6468 
6469 instruct leaP32(rRegP dst, indOffset32 mem)
6470 %{
6471   match(Set dst mem);
6472 
6473   ins_cost(110);
6474   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6475   opcode(0x8D);
6476   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6477   ins_pipe(ialu_reg_reg_fat);
6478 %}
6479 
6480 // instruct leaPIdx(rRegP dst, indIndex mem)
6481 // %{
6482 //   match(Set dst mem);
6483 
6484 //   ins_cost(110);
6485 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6486 //   opcode(0x8D);
6487 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6488 //   ins_pipe(ialu_reg_reg_fat);
6489 // %}
6490 
6491 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6492 %{
6493   match(Set dst mem);
6494 
6495   ins_cost(110);
6496   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6497   opcode(0x8D);
6498   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6499   ins_pipe(ialu_reg_reg_fat);
6500 %}
6501 
6502 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6503 %{
6504   match(Set dst mem);
6505 
6506   ins_cost(110);
6507   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6508   opcode(0x8D);
6509   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6510   ins_pipe(ialu_reg_reg_fat);
6511 %}
6512 
6513 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6514 %{
6515   match(Set dst mem);
6516 
6517   ins_cost(110);
6518   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6519   opcode(0x8D);
6520   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6521   ins_pipe(ialu_reg_reg_fat);
6522 %}
6523 
6524 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6525 %{
6526   match(Set dst mem);
6527 
6528   ins_cost(110);
6529   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6530   opcode(0x8D);
6531   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6532   ins_pipe(ialu_reg_reg_fat);
6533 %}
6534 
6535 // Load Effective Address which uses Narrow (32-bits) oop
6536 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6537 %{
6538   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6539   match(Set dst mem);
6540 
6541   ins_cost(110);
6542   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6543   opcode(0x8D);
6544   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6545   ins_pipe(ialu_reg_reg_fat);
6546 %}
6547 
6548 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6549 %{
6550   predicate(Universe::narrow_oop_shift() == 0);
6551   match(Set dst mem);
6552 
6553   ins_cost(110); // XXX
6554   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6555   opcode(0x8D);
6556   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6557   ins_pipe(ialu_reg_reg_fat);
6558 %}
6559 
6560 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6561 %{
6562   predicate(Universe::narrow_oop_shift() == 0);
6563   match(Set dst mem);
6564 
6565   ins_cost(110);
6566   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6567   opcode(0x8D);
6568   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6569   ins_pipe(ialu_reg_reg_fat);
6570 %}
6571 
6572 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6573 %{
6574   predicate(Universe::narrow_oop_shift() == 0);
6575   match(Set dst mem);
6576 
6577   ins_cost(110);
6578   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6579   opcode(0x8D);
6580   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6581   ins_pipe(ialu_reg_reg_fat);
6582 %}
6583 
6584 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6585 %{
6586   predicate(Universe::narrow_oop_shift() == 0);
6587   match(Set dst mem);
6588 
6589   ins_cost(110);
6590   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6591   opcode(0x8D);
6592   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6593   ins_pipe(ialu_reg_reg_fat);
6594 %}
6595 
6596 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6597 %{
6598   predicate(Universe::narrow_oop_shift() == 0);
6599   match(Set dst mem);
6600 
6601   ins_cost(110);
6602   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6603   opcode(0x8D);
6604   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6605   ins_pipe(ialu_reg_reg_fat);
6606 %}
6607 
6608 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6609 %{
6610   predicate(Universe::narrow_oop_shift() == 0);
6611   match(Set dst mem);
6612 
6613   ins_cost(110);
6614   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6615   opcode(0x8D);
6616   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6617   ins_pipe(ialu_reg_reg_fat);
6618 %}
6619 
6620 instruct loadConI(rRegI dst, immI src)
6621 %{
6622   match(Set dst src);
6623 
6624   format %{ "movl    $dst, $src\t# int" %}
6625   ins_encode(load_immI(dst, src));
6626   ins_pipe(ialu_reg_fat); // XXX
6627 %}
6628 
6629 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6630 %{
6631   match(Set dst src);
6632   effect(KILL cr);
6633 
6634   ins_cost(50);
6635   format %{ "xorl    $dst, $dst\t# int" %}
6636   opcode(0x33); /* + rd */
6637   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6638   ins_pipe(ialu_reg);
6639 %}
6640 
6641 instruct loadConL(rRegL dst, immL src)
6642 %{
6643   match(Set dst src);
6644 
6645   ins_cost(150);
6646   format %{ "movq    $dst, $src\t# long" %}
6647   ins_encode(load_immL(dst, src));
6648   ins_pipe(ialu_reg);
6649 %}
6650 
6651 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6652 %{
6653   match(Set dst src);
6654   effect(KILL cr);
6655 
6656   ins_cost(50);
6657   format %{ "xorl    $dst, $dst\t# long" %}
6658   opcode(0x33); /* + rd */
6659   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6660   ins_pipe(ialu_reg); // XXX
6661 %}
6662 
6663 instruct loadConUL32(rRegL dst, immUL32 src)
6664 %{
6665   match(Set dst src);
6666 
6667   ins_cost(60);
6668   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6669   ins_encode(load_immUL32(dst, src));
6670   ins_pipe(ialu_reg);
6671 %}
6672 
6673 instruct loadConL32(rRegL dst, immL32 src)
6674 %{
6675   match(Set dst src);
6676 
6677   ins_cost(70);
6678   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6679   ins_encode(load_immL32(dst, src));
6680   ins_pipe(ialu_reg);
6681 %}
6682 
6683 instruct loadConP(rRegP dst, immP src)
6684 %{
6685   match(Set dst src);
6686 
6687   format %{ "movq    $dst, $src\t# ptr" %}
6688   ins_encode(load_immP(dst, src));
6689   ins_pipe(ialu_reg_fat); // XXX
6690 %}
6691 
6692 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6693 %{
6694   match(Set dst src);
6695   effect(KILL cr);
6696 
6697   ins_cost(50);
6698   format %{ "xorl    $dst, $dst\t# ptr" %}
6699   opcode(0x33); /* + rd */
6700   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6701   ins_pipe(ialu_reg);
6702 %}
6703 
6704 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6705 %{
6706   match(Set dst src);
6707   effect(KILL cr);
6708 
6709   ins_cost(60);
6710   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6711   ins_encode(load_immP31(dst, src));
6712   ins_pipe(ialu_reg);
6713 %}
6714 
6715 instruct loadConF(regF dst, immF src)
6716 %{
6717   match(Set dst src);
6718   ins_cost(125);
6719 
6720   format %{ "movss   $dst, [$src]" %}
6721   ins_encode(load_conF(dst, src));
6722   ins_pipe(pipe_slow);
6723 %}
6724 
6725 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6726   match(Set dst src);
6727   effect(KILL cr);
6728   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6729   ins_encode %{
6730     __ xorq($dst$$Register, $dst$$Register);
6731   %}
6732   ins_pipe(ialu_reg);
6733 %}
6734 
6735 instruct loadConN(rRegN dst, immN src) %{
6736   match(Set dst src);
6737 
6738   ins_cost(125);
6739   format %{ "movl    $dst, $src\t# compressed ptr" %}
6740   ins_encode %{
6741     address con = (address)$src$$constant;
6742     if (con == NULL) {
6743       ShouldNotReachHere();
6744     } else {
6745       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6746     }
6747   %}
6748   ins_pipe(ialu_reg_fat); // XXX
6749 %}
6750 
6751 instruct loadConF0(regF dst, immF0 src)
6752 %{
6753   match(Set dst src);
6754   ins_cost(100);
6755 
6756   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6757   opcode(0x0F, 0x57);
6758   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6759   ins_pipe(pipe_slow);
6760 %}
6761 
6762 // Use the same format since predicate() can not be used here.
6763 instruct loadConD(regD dst, immD src)
6764 %{
6765   match(Set dst src);
6766   ins_cost(125);
6767 
6768   format %{ "movsd   $dst, [$src]" %}
6769   ins_encode(load_conD(dst, src));
6770   ins_pipe(pipe_slow);
6771 %}
6772 
6773 instruct loadConD0(regD dst, immD0 src)
6774 %{
6775   match(Set dst src);
6776   ins_cost(100);
6777 
6778   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6779   opcode(0x66, 0x0F, 0x57);
6780   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6781   ins_pipe(pipe_slow);
6782 %}
6783 
6784 instruct loadSSI(rRegI dst, stackSlotI src)
6785 %{
6786   match(Set dst src);
6787 
6788   ins_cost(125);
6789   format %{ "movl    $dst, $src\t# int stk" %}
6790   opcode(0x8B);
6791   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6792   ins_pipe(ialu_reg_mem);
6793 %}
6794 
6795 instruct loadSSL(rRegL dst, stackSlotL src)
6796 %{
6797   match(Set dst src);
6798 
6799   ins_cost(125);
6800   format %{ "movq    $dst, $src\t# long stk" %}
6801   opcode(0x8B);
6802   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6803   ins_pipe(ialu_reg_mem);
6804 %}
6805 
6806 instruct loadSSP(rRegP dst, stackSlotP src)
6807 %{
6808   match(Set dst src);
6809 
6810   ins_cost(125);
6811   format %{ "movq    $dst, $src\t# ptr stk" %}
6812   opcode(0x8B);
6813   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6814   ins_pipe(ialu_reg_mem);
6815 %}
6816 
6817 instruct loadSSF(regF dst, stackSlotF src)
6818 %{
6819   match(Set dst src);
6820 
6821   ins_cost(125);
6822   format %{ "movss   $dst, $src\t# float stk" %}
6823   opcode(0xF3, 0x0F, 0x10);
6824   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6825   ins_pipe(pipe_slow); // XXX
6826 %}
6827 
6828 // Use the same format since predicate() can not be used here.
6829 instruct loadSSD(regD dst, stackSlotD src)
6830 %{
6831   match(Set dst src);
6832 
6833   ins_cost(125);
6834   format %{ "movsd   $dst, $src\t# double stk" %}
6835   ins_encode  %{
6836     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6837   %}
6838   ins_pipe(pipe_slow); // XXX
6839 %}
6840 
6841 // Prefetch instructions.
6842 // Must be safe to execute with invalid address (cannot fault).
6843 
6844 instruct prefetchr( memory mem ) %{
6845   predicate(ReadPrefetchInstr==3);
6846   match(PrefetchRead mem);
6847   ins_cost(125);
6848 
6849   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6850   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6851   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6852   ins_pipe(ialu_mem);
6853 %}
6854 
6855 instruct prefetchrNTA( memory mem ) %{
6856   predicate(ReadPrefetchInstr==0);
6857   match(PrefetchRead mem);
6858   ins_cost(125);
6859 
6860   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6861   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6862   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6863   ins_pipe(ialu_mem);
6864 %}
6865 
6866 instruct prefetchrT0( memory mem ) %{
6867   predicate(ReadPrefetchInstr==1);
6868   match(PrefetchRead mem);
6869   ins_cost(125);
6870 
6871   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6872   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6873   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6874   ins_pipe(ialu_mem);
6875 %}
6876 
6877 instruct prefetchrT2( memory mem ) %{
6878   predicate(ReadPrefetchInstr==2);
6879   match(PrefetchRead mem);
6880   ins_cost(125);
6881 
6882   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6883   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6884   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6885   ins_pipe(ialu_mem);
6886 %}
6887 
6888 instruct prefetchw( memory mem ) %{
6889   predicate(AllocatePrefetchInstr==3);
6890   match(PrefetchWrite mem);
6891   ins_cost(125);
6892 
6893   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6894   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6895   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6896   ins_pipe(ialu_mem);
6897 %}
6898 
6899 instruct prefetchwNTA( memory mem ) %{
6900   predicate(AllocatePrefetchInstr==0);
6901   match(PrefetchWrite mem);
6902   ins_cost(125);
6903 
6904   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6905   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6906   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6907   ins_pipe(ialu_mem);
6908 %}
6909 
6910 instruct prefetchwT0( memory mem ) %{
6911   predicate(AllocatePrefetchInstr==1);
6912   match(PrefetchWrite mem);
6913   ins_cost(125);
6914 
6915   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6916   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6917   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6918   ins_pipe(ialu_mem);
6919 %}
6920 
6921 instruct prefetchwT2( memory mem ) %{
6922   predicate(AllocatePrefetchInstr==2);
6923   match(PrefetchWrite mem);
6924   ins_cost(125);
6925 
6926   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6927   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6928   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6929   ins_pipe(ialu_mem);
6930 %}
6931 
6932 //----------Store Instructions-------------------------------------------------
6933 
6934 // Store Byte
6935 instruct storeB(memory mem, rRegI src)
6936 %{
6937   match(Set mem (StoreB mem src));
6938 
6939   ins_cost(125); // XXX
6940   format %{ "movb    $mem, $src\t# byte" %}
6941   opcode(0x88);
6942   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6943   ins_pipe(ialu_mem_reg);
6944 %}
6945 
6946 // Store Char/Short
6947 instruct storeC(memory mem, rRegI src)
6948 %{
6949   match(Set mem (StoreC mem src));
6950 
6951   ins_cost(125); // XXX
6952   format %{ "movw    $mem, $src\t# char/short" %}
6953   opcode(0x89);
6954   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6955   ins_pipe(ialu_mem_reg);
6956 %}
6957 
6958 // Store Integer
6959 instruct storeI(memory mem, rRegI src)
6960 %{
6961   match(Set mem (StoreI mem src));
6962 
6963   ins_cost(125); // XXX
6964   format %{ "movl    $mem, $src\t# int" %}
6965   opcode(0x89);
6966   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6967   ins_pipe(ialu_mem_reg);
6968 %}
6969 
6970 // Store Long
6971 instruct storeL(memory mem, rRegL src)
6972 %{
6973   match(Set mem (StoreL mem src));
6974 
6975   ins_cost(125); // XXX
6976   format %{ "movq    $mem, $src\t# long" %}
6977   opcode(0x89);
6978   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6979   ins_pipe(ialu_mem_reg); // XXX
6980 %}
6981 
6982 // Store Pointer
6983 instruct storeP(memory mem, any_RegP src)
6984 %{
6985   match(Set mem (StoreP mem src));
6986 
6987   ins_cost(125); // XXX
6988   format %{ "movq    $mem, $src\t# ptr" %}
6989   opcode(0x89);
6990   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6991   ins_pipe(ialu_mem_reg);
6992 %}
6993 
6994 instruct storeImmP0(memory mem, immP0 zero)
6995 %{
6996   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6997   match(Set mem (StoreP mem zero));
6998 
6999   ins_cost(125); // XXX
7000   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7001   ins_encode %{
7002     __ movq($mem$$Address, r12);
7003   %}
7004   ins_pipe(ialu_mem_reg);
7005 %}
7006 
7007 // Store NULL Pointer, mark word, or other simple pointer constant.
7008 instruct storeImmP(memory mem, immP31 src)
7009 %{
7010   match(Set mem (StoreP mem src));
7011 
7012   ins_cost(150); // XXX
7013   format %{ "movq    $mem, $src\t# ptr" %}
7014   opcode(0xC7); /* C7 /0 */
7015   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7016   ins_pipe(ialu_mem_imm);
7017 %}
7018 
7019 // Store Compressed Pointer
7020 instruct storeN(memory mem, rRegN src)
7021 %{
7022   match(Set mem (StoreN mem src));
7023 
7024   ins_cost(125); // XXX
7025   format %{ "movl    $mem, $src\t# compressed ptr" %}
7026   ins_encode %{
7027     __ movl($mem$$Address, $src$$Register);
7028   %}
7029   ins_pipe(ialu_mem_reg);
7030 %}
7031 
7032 instruct storeImmN0(memory mem, immN0 zero)
7033 %{
7034   predicate(Universe::narrow_oop_base() == NULL);
7035   match(Set mem (StoreN mem zero));
7036 
7037   ins_cost(125); // XXX
7038   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7039   ins_encode %{
7040     __ movl($mem$$Address, r12);
7041   %}
7042   ins_pipe(ialu_mem_reg);
7043 %}
7044 
7045 instruct storeImmN(memory mem, immN src)
7046 %{
7047   match(Set mem (StoreN mem src));
7048 
7049   ins_cost(150); // XXX
7050   format %{ "movl    $mem, $src\t# compressed ptr" %}
7051   ins_encode %{
7052     address con = (address)$src$$constant;
7053     if (con == NULL) {
7054       __ movl($mem$$Address, (int32_t)0);
7055     } else {
7056       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7057     }
7058   %}
7059   ins_pipe(ialu_mem_imm);
7060 %}
7061 
7062 // Store Integer Immediate
7063 instruct storeImmI0(memory mem, immI0 zero)
7064 %{
7065   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7066   match(Set mem (StoreI mem zero));
7067 
7068   ins_cost(125); // XXX
7069   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7070   ins_encode %{
7071     __ movl($mem$$Address, r12);
7072   %}
7073   ins_pipe(ialu_mem_reg);
7074 %}
7075 
7076 instruct storeImmI(memory mem, immI src)
7077 %{
7078   match(Set mem (StoreI mem src));
7079 
7080   ins_cost(150);
7081   format %{ "movl    $mem, $src\t# int" %}
7082   opcode(0xC7); /* C7 /0 */
7083   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7084   ins_pipe(ialu_mem_imm);
7085 %}
7086 
7087 // Store Long Immediate
7088 instruct storeImmL0(memory mem, immL0 zero)
7089 %{
7090   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7091   match(Set mem (StoreL mem zero));
7092 
7093   ins_cost(125); // XXX
7094   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7095   ins_encode %{
7096     __ movq($mem$$Address, r12);
7097   %}
7098   ins_pipe(ialu_mem_reg);
7099 %}
7100 
7101 instruct storeImmL(memory mem, immL32 src)
7102 %{
7103   match(Set mem (StoreL mem src));
7104 
7105   ins_cost(150);
7106   format %{ "movq    $mem, $src\t# long" %}
7107   opcode(0xC7); /* C7 /0 */
7108   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7109   ins_pipe(ialu_mem_imm);
7110 %}
7111 
7112 // Store Short/Char Immediate
7113 instruct storeImmC0(memory mem, immI0 zero)
7114 %{
7115   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7116   match(Set mem (StoreC mem zero));
7117 
7118   ins_cost(125); // XXX
7119   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7120   ins_encode %{
7121     __ movw($mem$$Address, r12);
7122   %}
7123   ins_pipe(ialu_mem_reg);
7124 %}
7125 
7126 instruct storeImmI16(memory mem, immI16 src)
7127 %{
7128   predicate(UseStoreImmI16);
7129   match(Set mem (StoreC mem src));
7130 
7131   ins_cost(150);
7132   format %{ "movw    $mem, $src\t# short/char" %}
7133   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7134   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7135   ins_pipe(ialu_mem_imm);
7136 %}
7137 
7138 // Store Byte Immediate
7139 instruct storeImmB0(memory mem, immI0 zero)
7140 %{
7141   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7142   match(Set mem (StoreB mem zero));
7143 
7144   ins_cost(125); // XXX
7145   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7146   ins_encode %{
7147     __ movb($mem$$Address, r12);
7148   %}
7149   ins_pipe(ialu_mem_reg);
7150 %}
7151 
7152 instruct storeImmB(memory mem, immI8 src)
7153 %{
7154   match(Set mem (StoreB mem src));
7155 
7156   ins_cost(150); // XXX
7157   format %{ "movb    $mem, $src\t# byte" %}
7158   opcode(0xC6); /* C6 /0 */
7159   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7160   ins_pipe(ialu_mem_imm);
7161 %}
7162 
7163 // Store Aligned Packed Byte XMM register to memory
7164 instruct storeA8B(memory mem, regD src) %{
7165   match(Set mem (Store8B mem src));
7166   ins_cost(145);
7167   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7168   ins_encode( movq_st(mem, src));
7169   ins_pipe( pipe_slow );
7170 %}
7171 
7172 // Store Aligned Packed Char/Short XMM register to memory
7173 instruct storeA4C(memory mem, regD src) %{
7174   match(Set mem (Store4C mem src));
7175   ins_cost(145);
7176   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7177   ins_encode( movq_st(mem, src));
7178   ins_pipe( pipe_slow );
7179 %}
7180 
7181 // Store Aligned Packed Integer XMM register to memory
7182 instruct storeA2I(memory mem, regD src) %{
7183   match(Set mem (Store2I mem src));
7184   ins_cost(145);
7185   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7186   ins_encode( movq_st(mem, src));
7187   ins_pipe( pipe_slow );
7188 %}
7189 
7190 // Store CMS card-mark Immediate
7191 instruct storeImmCM0_reg(memory mem, immI0 zero)
7192 %{
7193   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7194   match(Set mem (StoreCM mem zero));
7195 
7196   ins_cost(125); // XXX
7197   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7198   ins_encode %{
7199     __ movb($mem$$Address, r12);
7200   %}
7201   ins_pipe(ialu_mem_reg);
7202 %}
7203 
7204 instruct storeImmCM0(memory mem, immI0 src)
7205 %{
7206   match(Set mem (StoreCM mem src));
7207 
7208   ins_cost(150); // XXX
7209   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7210   opcode(0xC6); /* C6 /0 */
7211   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7212   ins_pipe(ialu_mem_imm);
7213 %}
7214 
7215 // Store Aligned Packed Single Float XMM register to memory
7216 instruct storeA2F(memory mem, regD src) %{
7217   match(Set mem (Store2F mem src));
7218   ins_cost(145);
7219   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7220   ins_encode( movq_st(mem, src));
7221   ins_pipe( pipe_slow );
7222 %}
7223 
7224 // Store Float
7225 instruct storeF(memory mem, regF src)
7226 %{
7227   match(Set mem (StoreF mem src));
7228 
7229   ins_cost(95); // XXX
7230   format %{ "movss   $mem, $src\t# float" %}
7231   opcode(0xF3, 0x0F, 0x11);
7232   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7233   ins_pipe(pipe_slow); // XXX
7234 %}
7235 
7236 // Store immediate Float value (it is faster than store from XMM register)
7237 instruct storeF0(memory mem, immF0 zero)
7238 %{
7239   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7240   match(Set mem (StoreF mem zero));
7241 
7242   ins_cost(25); // XXX
7243   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7244   ins_encode %{
7245     __ movl($mem$$Address, r12);
7246   %}
7247   ins_pipe(ialu_mem_reg);
7248 %}
7249 
7250 instruct storeF_imm(memory mem, immF src)
7251 %{
7252   match(Set mem (StoreF mem src));
7253 
7254   ins_cost(50);
7255   format %{ "movl    $mem, $src\t# float" %}
7256   opcode(0xC7); /* C7 /0 */
7257   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7258   ins_pipe(ialu_mem_imm);
7259 %}
7260 
7261 // Store Double
7262 instruct storeD(memory mem, regD src)
7263 %{
7264   match(Set mem (StoreD mem src));
7265 
7266   ins_cost(95); // XXX
7267   format %{ "movsd   $mem, $src\t# double" %}
7268   opcode(0xF2, 0x0F, 0x11);
7269   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7270   ins_pipe(pipe_slow); // XXX
7271 %}
7272 
7273 // Store immediate double 0.0 (it is faster than store from XMM register)
7274 instruct storeD0_imm(memory mem, immD0 src)
7275 %{
7276   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7277   match(Set mem (StoreD mem src));
7278 
7279   ins_cost(50);
7280   format %{ "movq    $mem, $src\t# double 0." %}
7281   opcode(0xC7); /* C7 /0 */
7282   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7283   ins_pipe(ialu_mem_imm);
7284 %}
7285 
7286 instruct storeD0(memory mem, immD0 zero)
7287 %{
7288   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7289   match(Set mem (StoreD mem zero));
7290 
7291   ins_cost(25); // XXX
7292   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7293   ins_encode %{
7294     __ movq($mem$$Address, r12);
7295   %}
7296   ins_pipe(ialu_mem_reg);
7297 %}
7298 
7299 instruct storeSSI(stackSlotI dst, rRegI src)
7300 %{
7301   match(Set dst src);
7302 
7303   ins_cost(100);
7304   format %{ "movl    $dst, $src\t# int stk" %}
7305   opcode(0x89);
7306   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7307   ins_pipe( ialu_mem_reg );
7308 %}
7309 
7310 instruct storeSSL(stackSlotL dst, rRegL src)
7311 %{
7312   match(Set dst src);
7313 
7314   ins_cost(100);
7315   format %{ "movq    $dst, $src\t# long stk" %}
7316   opcode(0x89);
7317   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7318   ins_pipe(ialu_mem_reg);
7319 %}
7320 
7321 instruct storeSSP(stackSlotP dst, rRegP src)
7322 %{
7323   match(Set dst src);
7324 
7325   ins_cost(100);
7326   format %{ "movq    $dst, $src\t# ptr stk" %}
7327   opcode(0x89);
7328   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7329   ins_pipe(ialu_mem_reg);
7330 %}
7331 
7332 instruct storeSSF(stackSlotF dst, regF src)
7333 %{
7334   match(Set dst src);
7335 
7336   ins_cost(95); // XXX
7337   format %{ "movss   $dst, $src\t# float stk" %}
7338   opcode(0xF3, 0x0F, 0x11);
7339   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7340   ins_pipe(pipe_slow); // XXX
7341 %}
7342 
7343 instruct storeSSD(stackSlotD dst, regD src)
7344 %{
7345   match(Set dst src);
7346 
7347   ins_cost(95); // XXX
7348   format %{ "movsd   $dst, $src\t# double stk" %}
7349   opcode(0xF2, 0x0F, 0x11);
7350   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7351   ins_pipe(pipe_slow); // XXX
7352 %}
7353 
7354 //----------BSWAP Instructions-------------------------------------------------
7355 instruct bytes_reverse_int(rRegI dst) %{
7356   match(Set dst (ReverseBytesI dst));
7357 
7358   format %{ "bswapl  $dst" %}
7359   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7360   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7361   ins_pipe( ialu_reg );
7362 %}
7363 
7364 instruct bytes_reverse_long(rRegL dst) %{
7365   match(Set dst (ReverseBytesL dst));
7366 
7367   format %{ "bswapq  $dst" %}
7368 
7369   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7370   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7371   ins_pipe( ialu_reg);
7372 %}
7373 
7374 instruct bytes_reverse_char(rRegI dst) %{
7375   match(Set dst (ReverseBytesC dst));
7376 
7377   format %{ "bswapl  $dst\n\t" 
7378             "shrl    $dst,16\n\t" %}
7379   ins_encode %{
7380     __ bswapl($dst$$Register);
7381     __ shrl($dst$$Register, 16); 
7382   %}
7383   ins_pipe( ialu_reg );
7384 %}
7385 
7386 instruct bytes_reverse_short(rRegI dst) %{
7387   match(Set dst (ReverseBytesS dst));
7388 
7389   format %{ "bswapl  $dst\n\t" 
7390             "sar     $dst,16\n\t" %}
7391   ins_encode %{
7392     __ bswapl($dst$$Register);
7393     __ sarl($dst$$Register, 16); 
7394   %}
7395   ins_pipe( ialu_reg );
7396 %}
7397 
7398 instruct loadI_reversed(rRegI dst, memory src) %{
7399   match(Set dst (ReverseBytesI (LoadI src)));
7400 
7401   format %{ "bswap_movl $dst, $src" %}
7402   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7403   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7404   ins_pipe( ialu_reg_mem );
7405 %}
7406 
7407 instruct loadL_reversed(rRegL dst, memory src) %{
7408   match(Set dst (ReverseBytesL (LoadL src)));
7409 
7410   format %{ "bswap_movq $dst, $src" %}
7411   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7412   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7413   ins_pipe( ialu_reg_mem );
7414 %}
7415 
7416 instruct storeI_reversed(memory dst, rRegI src) %{
7417   match(Set dst (StoreI dst (ReverseBytesI  src)));
7418 
7419   format %{ "movl_bswap $dst, $src" %}
7420   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7421   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7422   ins_pipe( ialu_mem_reg );
7423 %}
7424 
7425 instruct storeL_reversed(memory dst, rRegL src) %{
7426   match(Set dst (StoreL dst (ReverseBytesL  src)));
7427 
7428   format %{ "movq_bswap $dst, $src" %}
7429   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7430   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7431   ins_pipe( ialu_mem_reg );
7432 %}
7433 
7434 
7435 //---------- Zeros Count Instructions ------------------------------------------
7436 
7437 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7438   predicate(UseCountLeadingZerosInstruction);
7439   match(Set dst (CountLeadingZerosI src));
7440   effect(KILL cr);
7441 
7442   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7443   ins_encode %{
7444     __ lzcntl($dst$$Register, $src$$Register);
7445   %}
7446   ins_pipe(ialu_reg);
7447 %}
7448 
7449 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7450   predicate(!UseCountLeadingZerosInstruction);
7451   match(Set dst (CountLeadingZerosI src));
7452   effect(KILL cr);
7453 
7454   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7455             "jnz     skip\n\t"
7456             "movl    $dst, -1\n"
7457       "skip:\n\t"
7458             "negl    $dst\n\t"
7459             "addl    $dst, 31" %}
7460   ins_encode %{
7461     Register Rdst = $dst$$Register;
7462     Register Rsrc = $src$$Register;
7463     Label skip;
7464     __ bsrl(Rdst, Rsrc);
7465     __ jccb(Assembler::notZero, skip);
7466     __ movl(Rdst, -1);
7467     __ bind(skip);
7468     __ negl(Rdst);
7469     __ addl(Rdst, BitsPerInt - 1);
7470   %}
7471   ins_pipe(ialu_reg);
7472 %}
7473 
7474 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7475   predicate(UseCountLeadingZerosInstruction);
7476   match(Set dst (CountLeadingZerosL src));
7477   effect(KILL cr);
7478 
7479   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7480   ins_encode %{
7481     __ lzcntq($dst$$Register, $src$$Register);
7482   %}
7483   ins_pipe(ialu_reg);
7484 %}
7485 
7486 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7487   predicate(!UseCountLeadingZerosInstruction);
7488   match(Set dst (CountLeadingZerosL src));
7489   effect(KILL cr);
7490 
7491   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7492             "jnz     skip\n\t"
7493             "movl    $dst, -1\n"
7494       "skip:\n\t"
7495             "negl    $dst\n\t"
7496             "addl    $dst, 63" %}
7497   ins_encode %{
7498     Register Rdst = $dst$$Register;
7499     Register Rsrc = $src$$Register;
7500     Label skip;
7501     __ bsrq(Rdst, Rsrc);
7502     __ jccb(Assembler::notZero, skip);
7503     __ movl(Rdst, -1);
7504     __ bind(skip);
7505     __ negl(Rdst);
7506     __ addl(Rdst, BitsPerLong - 1);
7507   %}
7508   ins_pipe(ialu_reg);
7509 %}
7510 
7511 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7512   match(Set dst (CountTrailingZerosI src));
7513   effect(KILL cr);
7514 
7515   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7516             "jnz     done\n\t"
7517             "movl    $dst, 32\n"
7518       "done:" %}
7519   ins_encode %{
7520     Register Rdst = $dst$$Register;
7521     Label done;
7522     __ bsfl(Rdst, $src$$Register);
7523     __ jccb(Assembler::notZero, done);
7524     __ movl(Rdst, BitsPerInt);
7525     __ bind(done);
7526   %}
7527   ins_pipe(ialu_reg);
7528 %}
7529 
7530 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7531   match(Set dst (CountTrailingZerosL src));
7532   effect(KILL cr);
7533 
7534   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7535             "jnz     done\n\t"
7536             "movl    $dst, 64\n"
7537       "done:" %}
7538   ins_encode %{
7539     Register Rdst = $dst$$Register;
7540     Label done;
7541     __ bsfq(Rdst, $src$$Register);
7542     __ jccb(Assembler::notZero, done);
7543     __ movl(Rdst, BitsPerLong);
7544     __ bind(done);
7545   %}
7546   ins_pipe(ialu_reg);
7547 %}
7548 
7549 
7550 //---------- Population Count Instructions -------------------------------------
7551 
7552 instruct popCountI(rRegI dst, rRegI src) %{
7553   predicate(UsePopCountInstruction);
7554   match(Set dst (PopCountI src));
7555 
7556   format %{ "popcnt  $dst, $src" %}
7557   ins_encode %{
7558     __ popcntl($dst$$Register, $src$$Register);
7559   %}
7560   ins_pipe(ialu_reg);
7561 %}
7562 
7563 instruct popCountI_mem(rRegI dst, memory mem) %{
7564   predicate(UsePopCountInstruction);
7565   match(Set dst (PopCountI (LoadI mem)));
7566 
7567   format %{ "popcnt  $dst, $mem" %}
7568   ins_encode %{
7569     __ popcntl($dst$$Register, $mem$$Address);
7570   %}
7571   ins_pipe(ialu_reg);
7572 %}
7573 
7574 // Note: Long.bitCount(long) returns an int.
7575 instruct popCountL(rRegI dst, rRegL src) %{
7576   predicate(UsePopCountInstruction);
7577   match(Set dst (PopCountL src));
7578 
7579   format %{ "popcnt  $dst, $src" %}
7580   ins_encode %{
7581     __ popcntq($dst$$Register, $src$$Register);
7582   %}
7583   ins_pipe(ialu_reg);
7584 %}
7585 
7586 // Note: Long.bitCount(long) returns an int.
7587 instruct popCountL_mem(rRegI dst, memory mem) %{
7588   predicate(UsePopCountInstruction);
7589   match(Set dst (PopCountL (LoadL mem)));
7590 
7591   format %{ "popcnt  $dst, $mem" %}
7592   ins_encode %{
7593     __ popcntq($dst$$Register, $mem$$Address);
7594   %}
7595   ins_pipe(ialu_reg);
7596 %}
7597 
7598 
7599 //----------MemBar Instructions-----------------------------------------------
7600 // Memory barrier flavors
7601 
7602 instruct membar_acquire()
7603 %{
7604   match(MemBarAcquire);
7605   ins_cost(0);
7606 
7607   size(0);
7608   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7609   ins_encode();
7610   ins_pipe(empty);
7611 %}
7612 
7613 instruct membar_acquire_lock()
7614 %{
7615   match(MemBarAcquire);
7616   predicate(Matcher::prior_fast_lock(n));
7617   ins_cost(0);
7618 
7619   size(0);
7620   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7621   ins_encode();
7622   ins_pipe(empty);
7623 %}
7624 
7625 instruct membar_release()
7626 %{
7627   match(MemBarRelease);
7628   ins_cost(0);
7629 
7630   size(0);
7631   format %{ "MEMBAR-release ! (empty encoding)" %}
7632   ins_encode();
7633   ins_pipe(empty);
7634 %}
7635 
7636 instruct membar_release_lock()
7637 %{
7638   match(MemBarRelease);
7639   predicate(Matcher::post_fast_unlock(n));
7640   ins_cost(0);
7641 
7642   size(0);
7643   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7644   ins_encode();
7645   ins_pipe(empty);
7646 %}
7647 
7648 instruct membar_volatile(rFlagsReg cr) %{
7649   match(MemBarVolatile);
7650   effect(KILL cr);
7651   ins_cost(400);
7652 
7653   format %{ 
7654     $$template
7655     if (os::is_MP()) {
7656       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7657     } else {
7658       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7659     }
7660   %}
7661   ins_encode %{
7662     __ membar(Assembler::StoreLoad);
7663   %}
7664   ins_pipe(pipe_slow);
7665 %}
7666 
7667 instruct unnecessary_membar_volatile()
7668 %{
7669   match(MemBarVolatile);
7670   predicate(Matcher::post_store_load_barrier(n));
7671   ins_cost(0);
7672 
7673   size(0);
7674   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7675   ins_encode();
7676   ins_pipe(empty);
7677 %}
7678 
7679 //----------Move Instructions--------------------------------------------------
7680 
7681 instruct castX2P(rRegP dst, rRegL src)
7682 %{
7683   match(Set dst (CastX2P src));
7684 
7685   format %{ "movq    $dst, $src\t# long->ptr" %}
7686   ins_encode(enc_copy_wide(dst, src));
7687   ins_pipe(ialu_reg_reg); // XXX
7688 %}
7689 
7690 instruct castP2X(rRegL dst, rRegP src)
7691 %{
7692   match(Set dst (CastP2X src));
7693 
7694   format %{ "movq    $dst, $src\t# ptr -> long" %}
7695   ins_encode(enc_copy_wide(dst, src));
7696   ins_pipe(ialu_reg_reg); // XXX
7697 %}
7698 
7699 
7700 // Convert oop pointer into compressed form
7701 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7702   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7703   match(Set dst (EncodeP src));
7704   effect(KILL cr);
7705   format %{ "encode_heap_oop $dst,$src" %}
7706   ins_encode %{
7707     Register s = $src$$Register;
7708     Register d = $dst$$Register;
7709     if (s != d) {
7710       __ movq(d, s);
7711     }
7712     __ encode_heap_oop(d);
7713   %}
7714   ins_pipe(ialu_reg_long);
7715 %}
7716 
7717 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7718   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7719   match(Set dst (EncodeP src));
7720   effect(KILL cr);
7721   format %{ "encode_heap_oop_not_null $dst,$src" %}
7722   ins_encode %{
7723     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7724   %}
7725   ins_pipe(ialu_reg_long);
7726 %}
7727 
7728 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7729   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7730             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7731   match(Set dst (DecodeN src));
7732   effect(KILL cr);
7733   format %{ "decode_heap_oop $dst,$src" %}
7734   ins_encode %{
7735     Register s = $src$$Register;
7736     Register d = $dst$$Register;
7737     if (s != d) {
7738       __ movq(d, s);
7739     }
7740     __ decode_heap_oop(d);
7741   %}
7742   ins_pipe(ialu_reg_long);
7743 %}
7744 
7745 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7746   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7747             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7748   match(Set dst (DecodeN src));
7749   format %{ "decode_heap_oop_not_null $dst,$src" %}
7750   ins_encode %{
7751     Register s = $src$$Register;
7752     Register d = $dst$$Register;
7753     if (s != d) {
7754       __ decode_heap_oop_not_null(d, s);
7755     } else {
7756       __ decode_heap_oop_not_null(d);
7757     }
7758   %}
7759   ins_pipe(ialu_reg_long);
7760 %}
7761 
7762 
7763 //----------Conditional Move---------------------------------------------------
7764 // Jump
7765 // dummy instruction for generating temp registers
7766 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7767   match(Jump (LShiftL switch_val shift));
7768   ins_cost(350);
7769   predicate(false);
7770   effect(TEMP dest);
7771 
7772   format %{ "leaq    $dest, table_base\n\t"
7773             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7774   ins_encode(jump_enc_offset(switch_val, shift, dest));
7775   ins_pipe(pipe_jmp);
7776   ins_pc_relative(1);
7777 %}
7778 
7779 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7780   match(Jump (AddL (LShiftL switch_val shift) offset));
7781   ins_cost(350);
7782   effect(TEMP dest);
7783 
7784   format %{ "leaq    $dest, table_base\n\t"
7785             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7786   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7787   ins_pipe(pipe_jmp);
7788   ins_pc_relative(1);
7789 %}
7790 
7791 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7792   match(Jump switch_val);
7793   ins_cost(350);
7794   effect(TEMP dest);
7795 
7796   format %{ "leaq    $dest, table_base\n\t"
7797             "jmp     [$dest + $switch_val]\n\t" %}
7798   ins_encode(jump_enc(switch_val, dest));
7799   ins_pipe(pipe_jmp);
7800   ins_pc_relative(1);
7801 %}
7802 
7803 // Conditional move
7804 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7805 %{
7806   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7807 
7808   ins_cost(200); // XXX
7809   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7810   opcode(0x0F, 0x40);
7811   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7812   ins_pipe(pipe_cmov_reg);
7813 %}
7814 
7815 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7816   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7817 
7818   ins_cost(200); // XXX
7819   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7820   opcode(0x0F, 0x40);
7821   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7822   ins_pipe(pipe_cmov_reg);
7823 %}
7824 
7825 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7826   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7827   ins_cost(200);
7828   expand %{
7829     cmovI_regU(cop, cr, dst, src);
7830   %}
7831 %}
7832 
7833 // Conditional move
7834 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7835   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7836 
7837   ins_cost(250); // XXX
7838   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7839   opcode(0x0F, 0x40);
7840   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7841   ins_pipe(pipe_cmov_mem);
7842 %}
7843 
7844 // Conditional move
7845 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7846 %{
7847   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7848 
7849   ins_cost(250); // XXX
7850   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7851   opcode(0x0F, 0x40);
7852   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7853   ins_pipe(pipe_cmov_mem);
7854 %}
7855 
7856 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7857   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7858   ins_cost(250);
7859   expand %{
7860     cmovI_memU(cop, cr, dst, src);
7861   %}
7862 %}
7863 
7864 // Conditional move
7865 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7866 %{
7867   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7868 
7869   ins_cost(200); // XXX
7870   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7871   opcode(0x0F, 0x40);
7872   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7873   ins_pipe(pipe_cmov_reg);
7874 %}
7875 
7876 // Conditional move
7877 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7878 %{
7879   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7880 
7881   ins_cost(200); // XXX
7882   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7883   opcode(0x0F, 0x40);
7884   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7885   ins_pipe(pipe_cmov_reg);
7886 %}
7887 
7888 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7889   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7890   ins_cost(200);
7891   expand %{
7892     cmovN_regU(cop, cr, dst, src);
7893   %}
7894 %}
7895 
7896 // Conditional move
7897 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7898 %{
7899   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7900 
7901   ins_cost(200); // XXX
7902   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7903   opcode(0x0F, 0x40);
7904   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7905   ins_pipe(pipe_cmov_reg);  // XXX
7906 %}
7907 
7908 // Conditional move
7909 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7910 %{
7911   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7912 
7913   ins_cost(200); // XXX
7914   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7915   opcode(0x0F, 0x40);
7916   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7917   ins_pipe(pipe_cmov_reg); // XXX
7918 %}
7919 
7920 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7921   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7922   ins_cost(200);
7923   expand %{
7924     cmovP_regU(cop, cr, dst, src);
7925   %}
7926 %}
7927 
7928 // DISABLED: Requires the ADLC to emit a bottom_type call that
7929 // correctly meets the two pointer arguments; one is an incoming
7930 // register but the other is a memory operand.  ALSO appears to
7931 // be buggy with implicit null checks.
7932 //
7933 //// Conditional move
7934 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7935 //%{
7936 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7937 //  ins_cost(250);
7938 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7939 //  opcode(0x0F,0x40);
7940 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7941 //  ins_pipe( pipe_cmov_mem );
7942 //%}
7943 //
7944 //// Conditional move
7945 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7946 //%{
7947 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7948 //  ins_cost(250);
7949 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7950 //  opcode(0x0F,0x40);
7951 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7952 //  ins_pipe( pipe_cmov_mem );
7953 //%}
7954 
7955 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7956 %{
7957   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7958 
7959   ins_cost(200); // XXX
7960   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7961   opcode(0x0F, 0x40);
7962   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7963   ins_pipe(pipe_cmov_reg);  // XXX
7964 %}
7965 
7966 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7967 %{
7968   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7969 
7970   ins_cost(200); // XXX
7971   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7972   opcode(0x0F, 0x40);
7973   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7974   ins_pipe(pipe_cmov_mem);  // XXX
7975 %}
7976 
7977 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7978 %{
7979   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7980 
7981   ins_cost(200); // XXX
7982   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7983   opcode(0x0F, 0x40);
7984   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7985   ins_pipe(pipe_cmov_reg); // XXX
7986 %}
7987 
7988 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7989   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7990   ins_cost(200);
7991   expand %{
7992     cmovL_regU(cop, cr, dst, src);
7993   %}
7994 %}
7995 
7996 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7997 %{
7998   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7999 
8000   ins_cost(200); // XXX
8001   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8002   opcode(0x0F, 0x40);
8003   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8004   ins_pipe(pipe_cmov_mem); // XXX
8005 %}
8006 
8007 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8008   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8009   ins_cost(200);
8010   expand %{
8011     cmovL_memU(cop, cr, dst, src);
8012   %}
8013 %}
8014 
8015 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8016 %{
8017   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8018 
8019   ins_cost(200); // XXX
8020   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8021             "movss     $dst, $src\n"
8022     "skip:" %}
8023   ins_encode(enc_cmovf_branch(cop, dst, src));
8024   ins_pipe(pipe_slow);
8025 %}
8026 
8027 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8028 // %{
8029 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8030 
8031 //   ins_cost(200); // XXX
8032 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8033 //             "movss     $dst, $src\n"
8034 //     "skip:" %}
8035 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8036 //   ins_pipe(pipe_slow);
8037 // %}
8038 
8039 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8040 %{
8041   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8042 
8043   ins_cost(200); // XXX
8044   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8045             "movss     $dst, $src\n"
8046     "skip:" %}
8047   ins_encode(enc_cmovf_branch(cop, dst, src));
8048   ins_pipe(pipe_slow);
8049 %}
8050 
8051 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8052   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8053   ins_cost(200);
8054   expand %{
8055     cmovF_regU(cop, cr, dst, src);
8056   %}
8057 %}
8058 
8059 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8060 %{
8061   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8062 
8063   ins_cost(200); // XXX
8064   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8065             "movsd     $dst, $src\n"
8066     "skip:" %}
8067   ins_encode(enc_cmovd_branch(cop, dst, src));
8068   ins_pipe(pipe_slow);
8069 %}
8070 
8071 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8072 %{
8073   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8074 
8075   ins_cost(200); // XXX
8076   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8077             "movsd     $dst, $src\n"
8078     "skip:" %}
8079   ins_encode(enc_cmovd_branch(cop, dst, src));
8080   ins_pipe(pipe_slow);
8081 %}
8082 
8083 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8084   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8085   ins_cost(200);
8086   expand %{
8087     cmovD_regU(cop, cr, dst, src);
8088   %}
8089 %}
8090 
8091 //----------Arithmetic Instructions--------------------------------------------
8092 //----------Addition Instructions----------------------------------------------
8093 
8094 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8095 %{
8096   match(Set dst (AddI dst src));
8097   effect(KILL cr);
8098 
8099   format %{ "addl    $dst, $src\t# int" %}
8100   opcode(0x03);
8101   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8102   ins_pipe(ialu_reg_reg);
8103 %}
8104 
8105 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8106 %{
8107   match(Set dst (AddI dst src));
8108   effect(KILL cr);
8109 
8110   format %{ "addl    $dst, $src\t# int" %}
8111   opcode(0x81, 0x00); /* /0 id */
8112   ins_encode(OpcSErm(dst, src), Con8or32(src));
8113   ins_pipe( ialu_reg );
8114 %}
8115 
8116 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8117 %{
8118   match(Set dst (AddI dst (LoadI src)));
8119   effect(KILL cr);
8120 
8121   ins_cost(125); // XXX
8122   format %{ "addl    $dst, $src\t# int" %}
8123   opcode(0x03);
8124   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8125   ins_pipe(ialu_reg_mem);
8126 %}
8127 
8128 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8129 %{
8130   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8131   effect(KILL cr);
8132 
8133   ins_cost(150); // XXX
8134   format %{ "addl    $dst, $src\t# int" %}
8135   opcode(0x01); /* Opcode 01 /r */
8136   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8137   ins_pipe(ialu_mem_reg);
8138 %}
8139 
8140 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8141 %{
8142   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8143   effect(KILL cr);
8144 
8145   ins_cost(125); // XXX
8146   format %{ "addl    $dst, $src\t# int" %}
8147   opcode(0x81); /* Opcode 81 /0 id */
8148   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8149   ins_pipe(ialu_mem_imm);
8150 %}
8151 
8152 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8153 %{
8154   predicate(UseIncDec);
8155   match(Set dst (AddI dst src));
8156   effect(KILL cr);
8157 
8158   format %{ "incl    $dst\t# int" %}
8159   opcode(0xFF, 0x00); // FF /0
8160   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8161   ins_pipe(ialu_reg);
8162 %}
8163 
8164 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8165 %{
8166   predicate(UseIncDec);
8167   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8168   effect(KILL cr);
8169 
8170   ins_cost(125); // XXX
8171   format %{ "incl    $dst\t# int" %}
8172   opcode(0xFF); /* Opcode FF /0 */
8173   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8174   ins_pipe(ialu_mem_imm);
8175 %}
8176 
8177 // XXX why does that use AddI
8178 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8179 %{
8180   predicate(UseIncDec);
8181   match(Set dst (AddI dst src));
8182   effect(KILL cr);
8183 
8184   format %{ "decl    $dst\t# int" %}
8185   opcode(0xFF, 0x01); // FF /1
8186   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8187   ins_pipe(ialu_reg);
8188 %}
8189 
8190 // XXX why does that use AddI
8191 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8192 %{
8193   predicate(UseIncDec);
8194   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8195   effect(KILL cr);
8196 
8197   ins_cost(125); // XXX
8198   format %{ "decl    $dst\t# int" %}
8199   opcode(0xFF); /* Opcode FF /1 */
8200   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8201   ins_pipe(ialu_mem_imm);
8202 %}
8203 
8204 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8205 %{
8206   match(Set dst (AddI src0 src1));
8207 
8208   ins_cost(110);
8209   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8210   opcode(0x8D); /* 0x8D /r */
8211   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8212   ins_pipe(ialu_reg_reg);
8213 %}
8214 
8215 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8216 %{
8217   match(Set dst (AddL dst src));
8218   effect(KILL cr);
8219 
8220   format %{ "addq    $dst, $src\t# long" %}
8221   opcode(0x03);
8222   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8223   ins_pipe(ialu_reg_reg);
8224 %}
8225 
8226 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8227 %{
8228   match(Set dst (AddL dst src));
8229   effect(KILL cr);
8230 
8231   format %{ "addq    $dst, $src\t# long" %}
8232   opcode(0x81, 0x00); /* /0 id */
8233   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8234   ins_pipe( ialu_reg );
8235 %}
8236 
8237 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8238 %{
8239   match(Set dst (AddL dst (LoadL src)));
8240   effect(KILL cr);
8241 
8242   ins_cost(125); // XXX
8243   format %{ "addq    $dst, $src\t# long" %}
8244   opcode(0x03);
8245   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8246   ins_pipe(ialu_reg_mem);
8247 %}
8248 
8249 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8250 %{
8251   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8252   effect(KILL cr);
8253 
8254   ins_cost(150); // XXX
8255   format %{ "addq    $dst, $src\t# long" %}
8256   opcode(0x01); /* Opcode 01 /r */
8257   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8258   ins_pipe(ialu_mem_reg);
8259 %}
8260 
8261 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8262 %{
8263   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8264   effect(KILL cr);
8265 
8266   ins_cost(125); // XXX
8267   format %{ "addq    $dst, $src\t# long" %}
8268   opcode(0x81); /* Opcode 81 /0 id */
8269   ins_encode(REX_mem_wide(dst),
8270              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8271   ins_pipe(ialu_mem_imm);
8272 %}
8273 
8274 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8275 %{
8276   predicate(UseIncDec);
8277   match(Set dst (AddL dst src));
8278   effect(KILL cr);
8279 
8280   format %{ "incq    $dst\t# long" %}
8281   opcode(0xFF, 0x00); // FF /0
8282   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8283   ins_pipe(ialu_reg);
8284 %}
8285 
8286 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8287 %{
8288   predicate(UseIncDec);
8289   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8290   effect(KILL cr);
8291 
8292   ins_cost(125); // XXX
8293   format %{ "incq    $dst\t# long" %}
8294   opcode(0xFF); /* Opcode FF /0 */
8295   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8296   ins_pipe(ialu_mem_imm);
8297 %}
8298 
8299 // XXX why does that use AddL
8300 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8301 %{
8302   predicate(UseIncDec);
8303   match(Set dst (AddL dst src));
8304   effect(KILL cr);
8305 
8306   format %{ "decq    $dst\t# long" %}
8307   opcode(0xFF, 0x01); // FF /1
8308   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8309   ins_pipe(ialu_reg);
8310 %}
8311 
8312 // XXX why does that use AddL
8313 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8314 %{
8315   predicate(UseIncDec);
8316   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8317   effect(KILL cr);
8318 
8319   ins_cost(125); // XXX
8320   format %{ "decq    $dst\t# long" %}
8321   opcode(0xFF); /* Opcode FF /1 */
8322   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8323   ins_pipe(ialu_mem_imm);
8324 %}
8325 
8326 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8327 %{
8328   match(Set dst (AddL src0 src1));
8329 
8330   ins_cost(110);
8331   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8332   opcode(0x8D); /* 0x8D /r */
8333   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8334   ins_pipe(ialu_reg_reg);
8335 %}
8336 
8337 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8338 %{
8339   match(Set dst (AddP dst src));
8340   effect(KILL cr);
8341 
8342   format %{ "addq    $dst, $src\t# ptr" %}
8343   opcode(0x03);
8344   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8345   ins_pipe(ialu_reg_reg);
8346 %}
8347 
8348 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8349 %{
8350   match(Set dst (AddP dst src));
8351   effect(KILL cr);
8352 
8353   format %{ "addq    $dst, $src\t# ptr" %}
8354   opcode(0x81, 0x00); /* /0 id */
8355   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8356   ins_pipe( ialu_reg );
8357 %}
8358 
8359 // XXX addP mem ops ????
8360 
8361 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8362 %{
8363   match(Set dst (AddP src0 src1));
8364 
8365   ins_cost(110);
8366   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8367   opcode(0x8D); /* 0x8D /r */
8368   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8369   ins_pipe(ialu_reg_reg);
8370 %}
8371 
8372 instruct checkCastPP(rRegP dst)
8373 %{
8374   match(Set dst (CheckCastPP dst));
8375 
8376   size(0);
8377   format %{ "# checkcastPP of $dst" %}
8378   ins_encode(/* empty encoding */);
8379   ins_pipe(empty);
8380 %}
8381 
8382 instruct castPP(rRegP dst)
8383 %{
8384   match(Set dst (CastPP dst));
8385 
8386   size(0);
8387   format %{ "# castPP of $dst" %}
8388   ins_encode(/* empty encoding */);
8389   ins_pipe(empty);
8390 %}
8391 
8392 instruct castII(rRegI dst)
8393 %{
8394   match(Set dst (CastII dst));
8395 
8396   size(0);
8397   format %{ "# castII of $dst" %}
8398   ins_encode(/* empty encoding */);
8399   ins_cost(0);
8400   ins_pipe(empty);
8401 %}
8402 
8403 // LoadP-locked same as a regular LoadP when used with compare-swap
8404 instruct loadPLocked(rRegP dst, memory mem)
8405 %{
8406   match(Set dst (LoadPLocked mem));
8407 
8408   ins_cost(125); // XXX
8409   format %{ "movq    $dst, $mem\t# ptr locked" %}
8410   opcode(0x8B);
8411   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8412   ins_pipe(ialu_reg_mem); // XXX
8413 %}
8414 
8415 // LoadL-locked - same as a regular LoadL when used with compare-swap
8416 instruct loadLLocked(rRegL dst, memory mem)
8417 %{
8418   match(Set dst (LoadLLocked mem));
8419 
8420   ins_cost(125); // XXX
8421   format %{ "movq    $dst, $mem\t# long locked" %}
8422   opcode(0x8B);
8423   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8424   ins_pipe(ialu_reg_mem); // XXX
8425 %}
8426 
8427 // Conditional-store of the updated heap-top.
8428 // Used during allocation of the shared heap.
8429 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8430 
8431 instruct storePConditional(memory heap_top_ptr,
8432                            rax_RegP oldval, rRegP newval,
8433                            rFlagsReg cr)
8434 %{
8435   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8436  
8437   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8438             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8439   opcode(0x0F, 0xB1);
8440   ins_encode(lock_prefix,
8441              REX_reg_mem_wide(newval, heap_top_ptr),
8442              OpcP, OpcS,
8443              reg_mem(newval, heap_top_ptr));
8444   ins_pipe(pipe_cmpxchg);
8445 %}
8446 
8447 // Conditional-store of an int value.
8448 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8449 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8450 %{
8451   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8452   effect(KILL oldval);
8453 
8454   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8455   opcode(0x0F, 0xB1);
8456   ins_encode(lock_prefix,
8457              REX_reg_mem(newval, mem),
8458              OpcP, OpcS,
8459              reg_mem(newval, mem));
8460   ins_pipe(pipe_cmpxchg);
8461 %}
8462 
8463 // Conditional-store of a long value.
8464 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8465 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8466 %{
8467   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8468   effect(KILL oldval);
8469 
8470   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8471   opcode(0x0F, 0xB1);
8472   ins_encode(lock_prefix,
8473              REX_reg_mem_wide(newval, mem),
8474              OpcP, OpcS,
8475              reg_mem(newval, mem));
8476   ins_pipe(pipe_cmpxchg);
8477 %}
8478 
8479 
8480 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8481 instruct compareAndSwapP(rRegI res,
8482                          memory mem_ptr,
8483                          rax_RegP oldval, rRegP newval,
8484                          rFlagsReg cr)
8485 %{
8486   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8487   effect(KILL cr, KILL oldval);
8488 
8489   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8490             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8491             "sete    $res\n\t"
8492             "movzbl  $res, $res" %}
8493   opcode(0x0F, 0xB1);
8494   ins_encode(lock_prefix,
8495              REX_reg_mem_wide(newval, mem_ptr),
8496              OpcP, OpcS,
8497              reg_mem(newval, mem_ptr),
8498              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8499              REX_reg_breg(res, res), // movzbl
8500              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8501   ins_pipe( pipe_cmpxchg );
8502 %}
8503 
8504 instruct compareAndSwapL(rRegI res,
8505                          memory mem_ptr,
8506                          rax_RegL oldval, rRegL newval,
8507                          rFlagsReg cr)
8508 %{
8509   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8510   effect(KILL cr, KILL oldval);
8511 
8512   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8513             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8514             "sete    $res\n\t"
8515             "movzbl  $res, $res" %}
8516   opcode(0x0F, 0xB1);
8517   ins_encode(lock_prefix,
8518              REX_reg_mem_wide(newval, mem_ptr),
8519              OpcP, OpcS,
8520              reg_mem(newval, mem_ptr),
8521              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8522              REX_reg_breg(res, res), // movzbl
8523              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8524   ins_pipe( pipe_cmpxchg );
8525 %}
8526 
8527 instruct compareAndSwapI(rRegI res,
8528                          memory mem_ptr,
8529                          rax_RegI oldval, rRegI newval,
8530                          rFlagsReg cr)
8531 %{
8532   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8533   effect(KILL cr, KILL oldval);
8534 
8535   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8536             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8537             "sete    $res\n\t"
8538             "movzbl  $res, $res" %}
8539   opcode(0x0F, 0xB1);
8540   ins_encode(lock_prefix,
8541              REX_reg_mem(newval, mem_ptr),
8542              OpcP, OpcS,
8543              reg_mem(newval, mem_ptr),
8544              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8545              REX_reg_breg(res, res), // movzbl
8546              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8547   ins_pipe( pipe_cmpxchg );
8548 %}
8549 
8550 
8551 instruct compareAndSwapN(rRegI res,
8552                           memory mem_ptr,
8553                           rax_RegN oldval, rRegN newval,
8554                           rFlagsReg cr) %{
8555   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8556   effect(KILL cr, KILL oldval);
8557 
8558   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8559             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8560             "sete    $res\n\t"
8561             "movzbl  $res, $res" %}
8562   opcode(0x0F, 0xB1);
8563   ins_encode(lock_prefix,
8564              REX_reg_mem(newval, mem_ptr),
8565              OpcP, OpcS,
8566              reg_mem(newval, mem_ptr),
8567              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8568              REX_reg_breg(res, res), // movzbl
8569              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8570   ins_pipe( pipe_cmpxchg );
8571 %}
8572 
8573 //----------Subtraction Instructions-------------------------------------------
8574 
8575 // Integer Subtraction Instructions
8576 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8577 %{
8578   match(Set dst (SubI dst src));
8579   effect(KILL cr);
8580 
8581   format %{ "subl    $dst, $src\t# int" %}
8582   opcode(0x2B);
8583   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8584   ins_pipe(ialu_reg_reg);
8585 %}
8586 
8587 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8588 %{
8589   match(Set dst (SubI dst src));
8590   effect(KILL cr);
8591 
8592   format %{ "subl    $dst, $src\t# int" %}
8593   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8594   ins_encode(OpcSErm(dst, src), Con8or32(src));
8595   ins_pipe(ialu_reg);
8596 %}
8597 
8598 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8599 %{
8600   match(Set dst (SubI dst (LoadI src)));
8601   effect(KILL cr);
8602 
8603   ins_cost(125);
8604   format %{ "subl    $dst, $src\t# int" %}
8605   opcode(0x2B);
8606   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8607   ins_pipe(ialu_reg_mem);
8608 %}
8609 
8610 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8611 %{
8612   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8613   effect(KILL cr);
8614 
8615   ins_cost(150);
8616   format %{ "subl    $dst, $src\t# int" %}
8617   opcode(0x29); /* Opcode 29 /r */
8618   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8619   ins_pipe(ialu_mem_reg);
8620 %}
8621 
8622 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8623 %{
8624   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8625   effect(KILL cr);
8626 
8627   ins_cost(125); // XXX
8628   format %{ "subl    $dst, $src\t# int" %}
8629   opcode(0x81); /* Opcode 81 /5 id */
8630   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8631   ins_pipe(ialu_mem_imm);
8632 %}
8633 
8634 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8635 %{
8636   match(Set dst (SubL dst src));
8637   effect(KILL cr);
8638 
8639   format %{ "subq    $dst, $src\t# long" %}
8640   opcode(0x2B);
8641   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8642   ins_pipe(ialu_reg_reg);
8643 %}
8644 
8645 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8646 %{
8647   match(Set dst (SubL dst src));
8648   effect(KILL cr);
8649 
8650   format %{ "subq    $dst, $src\t# long" %}
8651   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8652   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8653   ins_pipe(ialu_reg);
8654 %}
8655 
8656 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8657 %{
8658   match(Set dst (SubL dst (LoadL src)));
8659   effect(KILL cr);
8660 
8661   ins_cost(125);
8662   format %{ "subq    $dst, $src\t# long" %}
8663   opcode(0x2B);
8664   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8665   ins_pipe(ialu_reg_mem);
8666 %}
8667 
8668 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8669 %{
8670   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8671   effect(KILL cr);
8672 
8673   ins_cost(150);
8674   format %{ "subq    $dst, $src\t# long" %}
8675   opcode(0x29); /* Opcode 29 /r */
8676   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8677   ins_pipe(ialu_mem_reg);
8678 %}
8679 
8680 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8681 %{
8682   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8683   effect(KILL cr);
8684 
8685   ins_cost(125); // XXX
8686   format %{ "subq    $dst, $src\t# long" %}
8687   opcode(0x81); /* Opcode 81 /5 id */
8688   ins_encode(REX_mem_wide(dst),
8689              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8690   ins_pipe(ialu_mem_imm);
8691 %}
8692 
8693 // Subtract from a pointer
8694 // XXX hmpf???
8695 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8696 %{
8697   match(Set dst (AddP dst (SubI zero src)));
8698   effect(KILL cr);
8699 
8700   format %{ "subq    $dst, $src\t# ptr - int" %}
8701   opcode(0x2B);
8702   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8703   ins_pipe(ialu_reg_reg);
8704 %}
8705 
8706 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8707 %{
8708   match(Set dst (SubI zero dst));
8709   effect(KILL cr);
8710 
8711   format %{ "negl    $dst\t# int" %}
8712   opcode(0xF7, 0x03);  // Opcode F7 /3
8713   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8714   ins_pipe(ialu_reg);
8715 %}
8716 
8717 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8718 %{
8719   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8720   effect(KILL cr);
8721 
8722   format %{ "negl    $dst\t# int" %}
8723   opcode(0xF7, 0x03);  // Opcode F7 /3
8724   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8725   ins_pipe(ialu_reg);
8726 %}
8727 
8728 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8729 %{
8730   match(Set dst (SubL zero dst));
8731   effect(KILL cr);
8732 
8733   format %{ "negq    $dst\t# long" %}
8734   opcode(0xF7, 0x03);  // Opcode F7 /3
8735   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8736   ins_pipe(ialu_reg);
8737 %}
8738 
8739 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8740 %{
8741   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8742   effect(KILL cr);
8743 
8744   format %{ "negq    $dst\t# long" %}
8745   opcode(0xF7, 0x03);  // Opcode F7 /3
8746   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8747   ins_pipe(ialu_reg);
8748 %}
8749 
8750 
8751 //----------Multiplication/Division Instructions-------------------------------
8752 // Integer Multiplication Instructions
8753 // Multiply Register
8754 
8755 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8756 %{
8757   match(Set dst (MulI dst src));
8758   effect(KILL cr);
8759 
8760   ins_cost(300);
8761   format %{ "imull   $dst, $src\t# int" %}
8762   opcode(0x0F, 0xAF);
8763   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8764   ins_pipe(ialu_reg_reg_alu0);
8765 %}
8766 
8767 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8768 %{
8769   match(Set dst (MulI src imm));
8770   effect(KILL cr);
8771 
8772   ins_cost(300);
8773   format %{ "imull   $dst, $src, $imm\t# int" %}
8774   opcode(0x69); /* 69 /r id */
8775   ins_encode(REX_reg_reg(dst, src),
8776              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8777   ins_pipe(ialu_reg_reg_alu0);
8778 %}
8779 
8780 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8781 %{
8782   match(Set dst (MulI dst (LoadI src)));
8783   effect(KILL cr);
8784 
8785   ins_cost(350);
8786   format %{ "imull   $dst, $src\t# int" %}
8787   opcode(0x0F, 0xAF);
8788   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8789   ins_pipe(ialu_reg_mem_alu0);
8790 %}
8791 
8792 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8793 %{
8794   match(Set dst (MulI (LoadI src) imm));
8795   effect(KILL cr);
8796 
8797   ins_cost(300);
8798   format %{ "imull   $dst, $src, $imm\t# int" %}
8799   opcode(0x69); /* 69 /r id */
8800   ins_encode(REX_reg_mem(dst, src),
8801              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8802   ins_pipe(ialu_reg_mem_alu0);
8803 %}
8804 
8805 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8806 %{
8807   match(Set dst (MulL dst src));
8808   effect(KILL cr);
8809 
8810   ins_cost(300);
8811   format %{ "imulq   $dst, $src\t# long" %}
8812   opcode(0x0F, 0xAF);
8813   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8814   ins_pipe(ialu_reg_reg_alu0);
8815 %}
8816 
8817 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8818 %{
8819   match(Set dst (MulL src imm));
8820   effect(KILL cr);
8821 
8822   ins_cost(300);
8823   format %{ "imulq   $dst, $src, $imm\t# long" %}
8824   opcode(0x69); /* 69 /r id */
8825   ins_encode(REX_reg_reg_wide(dst, src),
8826              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8827   ins_pipe(ialu_reg_reg_alu0);
8828 %}
8829 
8830 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8831 %{
8832   match(Set dst (MulL dst (LoadL src)));
8833   effect(KILL cr);
8834 
8835   ins_cost(350);
8836   format %{ "imulq   $dst, $src\t# long" %}
8837   opcode(0x0F, 0xAF);
8838   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8839   ins_pipe(ialu_reg_mem_alu0);
8840 %}
8841 
8842 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8843 %{
8844   match(Set dst (MulL (LoadL src) imm));
8845   effect(KILL cr);
8846 
8847   ins_cost(300);
8848   format %{ "imulq   $dst, $src, $imm\t# long" %}
8849   opcode(0x69); /* 69 /r id */
8850   ins_encode(REX_reg_mem_wide(dst, src),
8851              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8852   ins_pipe(ialu_reg_mem_alu0);
8853 %}
8854 
8855 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8856 %{
8857   match(Set dst (MulHiL src rax));
8858   effect(USE_KILL rax, KILL cr);
8859 
8860   ins_cost(300);
8861   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8862   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8863   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8864   ins_pipe(ialu_reg_reg_alu0);
8865 %}
8866 
8867 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8868                    rFlagsReg cr)
8869 %{
8870   match(Set rax (DivI rax div));
8871   effect(KILL rdx, KILL cr);
8872 
8873   ins_cost(30*100+10*100); // XXX
8874   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8875             "jne,s   normal\n\t"
8876             "xorl    rdx, rdx\n\t"
8877             "cmpl    $div, -1\n\t"
8878             "je,s    done\n"
8879     "normal: cdql\n\t"
8880             "idivl   $div\n"
8881     "done:"        %}
8882   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8883   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8884   ins_pipe(ialu_reg_reg_alu0);
8885 %}
8886 
8887 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8888                    rFlagsReg cr)
8889 %{
8890   match(Set rax (DivL rax div));
8891   effect(KILL rdx, KILL cr);
8892 
8893   ins_cost(30*100+10*100); // XXX
8894   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8895             "cmpq    rax, rdx\n\t"
8896             "jne,s   normal\n\t"
8897             "xorl    rdx, rdx\n\t"
8898             "cmpq    $div, -1\n\t"
8899             "je,s    done\n"
8900     "normal: cdqq\n\t"
8901             "idivq   $div\n"
8902     "done:"        %}
8903   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8904   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8905   ins_pipe(ialu_reg_reg_alu0);
8906 %}
8907 
8908 // Integer DIVMOD with Register, both quotient and mod results
8909 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8910                              rFlagsReg cr)
8911 %{
8912   match(DivModI rax div);
8913   effect(KILL cr);
8914 
8915   ins_cost(30*100+10*100); // XXX
8916   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8917             "jne,s   normal\n\t"
8918             "xorl    rdx, rdx\n\t"
8919             "cmpl    $div, -1\n\t"
8920             "je,s    done\n"
8921     "normal: cdql\n\t"
8922             "idivl   $div\n"
8923     "done:"        %}
8924   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8925   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8926   ins_pipe(pipe_slow);
8927 %}
8928 
8929 // Long DIVMOD with Register, both quotient and mod results
8930 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8931                              rFlagsReg cr)
8932 %{
8933   match(DivModL rax div);
8934   effect(KILL cr);
8935 
8936   ins_cost(30*100+10*100); // XXX
8937   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8938             "cmpq    rax, rdx\n\t"
8939             "jne,s   normal\n\t"
8940             "xorl    rdx, rdx\n\t"
8941             "cmpq    $div, -1\n\t"
8942             "je,s    done\n"
8943     "normal: cdqq\n\t"
8944             "idivq   $div\n"
8945     "done:"        %}
8946   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8947   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8948   ins_pipe(pipe_slow);
8949 %}
8950 
8951 //----------- DivL-By-Constant-Expansions--------------------------------------
8952 // DivI cases are handled by the compiler
8953 
8954 // Magic constant, reciprocal of 10
8955 instruct loadConL_0x6666666666666667(rRegL dst)
8956 %{
8957   effect(DEF dst);
8958 
8959   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8960   ins_encode(load_immL(dst, 0x6666666666666667));
8961   ins_pipe(ialu_reg);
8962 %}
8963 
8964 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8965 %{
8966   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8967 
8968   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8969   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8970   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8971   ins_pipe(ialu_reg_reg_alu0);
8972 %}
8973 
8974 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8975 %{
8976   effect(USE_DEF dst, KILL cr);
8977 
8978   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8979   opcode(0xC1, 0x7); /* C1 /7 ib */
8980   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8981   ins_pipe(ialu_reg);
8982 %}
8983 
8984 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8985 %{
8986   effect(USE_DEF dst, KILL cr);
8987 
8988   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8989   opcode(0xC1, 0x7); /* C1 /7 ib */
8990   ins_encode(reg_opc_imm_wide(dst, 0x2));
8991   ins_pipe(ialu_reg);
8992 %}
8993 
8994 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8995 %{
8996   match(Set dst (DivL src div));
8997 
8998   ins_cost((5+8)*100);
8999   expand %{
9000     rax_RegL rax;                     // Killed temp
9001     rFlagsReg cr;                     // Killed
9002     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9003     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9004     sarL_rReg_63(src, cr);            // sarq  src, 63
9005     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9006     subL_rReg(dst, src, cr);          // subl  rdx, src
9007   %}
9008 %}
9009 
9010 //-----------------------------------------------------------------------------
9011 
9012 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9013                    rFlagsReg cr)
9014 %{
9015   match(Set rdx (ModI rax div));
9016   effect(KILL rax, KILL cr);
9017 
9018   ins_cost(300); // XXX
9019   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9020             "jne,s   normal\n\t"
9021             "xorl    rdx, rdx\n\t"
9022             "cmpl    $div, -1\n\t"
9023             "je,s    done\n"
9024     "normal: cdql\n\t"
9025             "idivl   $div\n"
9026     "done:"        %}
9027   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9028   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9029   ins_pipe(ialu_reg_reg_alu0);
9030 %}
9031 
9032 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9033                    rFlagsReg cr)
9034 %{
9035   match(Set rdx (ModL rax div));
9036   effect(KILL rax, KILL cr);
9037 
9038   ins_cost(300); // XXX
9039   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9040             "cmpq    rax, rdx\n\t"
9041             "jne,s   normal\n\t"
9042             "xorl    rdx, rdx\n\t"
9043             "cmpq    $div, -1\n\t"
9044             "je,s    done\n"
9045     "normal: cdqq\n\t"
9046             "idivq   $div\n"
9047     "done:"        %}
9048   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9049   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9050   ins_pipe(ialu_reg_reg_alu0);
9051 %}
9052 
9053 // Integer Shift Instructions
9054 // Shift Left by one
9055 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9056 %{
9057   match(Set dst (LShiftI dst shift));
9058   effect(KILL cr);
9059 
9060   format %{ "sall    $dst, $shift" %}
9061   opcode(0xD1, 0x4); /* D1 /4 */
9062   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9063   ins_pipe(ialu_reg);
9064 %}
9065 
9066 // Shift Left by one
9067 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9068 %{
9069   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9070   effect(KILL cr);
9071 
9072   format %{ "sall    $dst, $shift\t" %}
9073   opcode(0xD1, 0x4); /* D1 /4 */
9074   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9075   ins_pipe(ialu_mem_imm);
9076 %}
9077 
9078 // Shift Left by 8-bit immediate
9079 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9080 %{
9081   match(Set dst (LShiftI dst shift));
9082   effect(KILL cr);
9083 
9084   format %{ "sall    $dst, $shift" %}
9085   opcode(0xC1, 0x4); /* C1 /4 ib */
9086   ins_encode(reg_opc_imm(dst, shift));
9087   ins_pipe(ialu_reg);
9088 %}
9089 
9090 // Shift Left by 8-bit immediate
9091 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9092 %{
9093   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9094   effect(KILL cr);
9095 
9096   format %{ "sall    $dst, $shift" %}
9097   opcode(0xC1, 0x4); /* C1 /4 ib */
9098   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9099   ins_pipe(ialu_mem_imm);
9100 %}
9101 
9102 // Shift Left by variable
9103 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9104 %{
9105   match(Set dst (LShiftI dst shift));
9106   effect(KILL cr);
9107 
9108   format %{ "sall    $dst, $shift" %}
9109   opcode(0xD3, 0x4); /* D3 /4 */
9110   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9111   ins_pipe(ialu_reg_reg);
9112 %}
9113 
9114 // Shift Left by variable
9115 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9116 %{
9117   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9118   effect(KILL cr);
9119 
9120   format %{ "sall    $dst, $shift" %}
9121   opcode(0xD3, 0x4); /* D3 /4 */
9122   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9123   ins_pipe(ialu_mem_reg);
9124 %}
9125 
9126 // Arithmetic shift right by one
9127 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9128 %{
9129   match(Set dst (RShiftI dst shift));
9130   effect(KILL cr);
9131 
9132   format %{ "sarl    $dst, $shift" %}
9133   opcode(0xD1, 0x7); /* D1 /7 */
9134   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9135   ins_pipe(ialu_reg);
9136 %}
9137 
9138 // Arithmetic shift right by one
9139 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9140 %{
9141   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9142   effect(KILL cr);
9143 
9144   format %{ "sarl    $dst, $shift" %}
9145   opcode(0xD1, 0x7); /* D1 /7 */
9146   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9147   ins_pipe(ialu_mem_imm);
9148 %}
9149 
9150 // Arithmetic Shift Right by 8-bit immediate
9151 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9152 %{
9153   match(Set dst (RShiftI dst shift));
9154   effect(KILL cr);
9155 
9156   format %{ "sarl    $dst, $shift" %}
9157   opcode(0xC1, 0x7); /* C1 /7 ib */
9158   ins_encode(reg_opc_imm(dst, shift));
9159   ins_pipe(ialu_mem_imm);
9160 %}
9161 
9162 // Arithmetic Shift Right by 8-bit immediate
9163 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9164 %{
9165   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9166   effect(KILL cr);
9167 
9168   format %{ "sarl    $dst, $shift" %}
9169   opcode(0xC1, 0x7); /* C1 /7 ib */
9170   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9171   ins_pipe(ialu_mem_imm);
9172 %}
9173 
9174 // Arithmetic Shift Right by variable
9175 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9176 %{
9177   match(Set dst (RShiftI dst shift));
9178   effect(KILL cr);
9179 
9180   format %{ "sarl    $dst, $shift" %}
9181   opcode(0xD3, 0x7); /* D3 /7 */
9182   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9183   ins_pipe(ialu_reg_reg);
9184 %}
9185 
9186 // Arithmetic Shift Right by variable
9187 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9188 %{
9189   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9190   effect(KILL cr);
9191 
9192   format %{ "sarl    $dst, $shift" %}
9193   opcode(0xD3, 0x7); /* D3 /7 */
9194   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9195   ins_pipe(ialu_mem_reg);
9196 %}
9197 
9198 // Logical shift right by one
9199 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9200 %{
9201   match(Set dst (URShiftI dst shift));
9202   effect(KILL cr);
9203 
9204   format %{ "shrl    $dst, $shift" %}
9205   opcode(0xD1, 0x5); /* D1 /5 */
9206   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9207   ins_pipe(ialu_reg);
9208 %}
9209 
9210 // Logical shift right by one
9211 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9212 %{
9213   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9214   effect(KILL cr);
9215 
9216   format %{ "shrl    $dst, $shift" %}
9217   opcode(0xD1, 0x5); /* D1 /5 */
9218   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9219   ins_pipe(ialu_mem_imm);
9220 %}
9221 
9222 // Logical Shift Right by 8-bit immediate
9223 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9224 %{
9225   match(Set dst (URShiftI dst shift));
9226   effect(KILL cr);
9227 
9228   format %{ "shrl    $dst, $shift" %}
9229   opcode(0xC1, 0x5); /* C1 /5 ib */
9230   ins_encode(reg_opc_imm(dst, shift));
9231   ins_pipe(ialu_reg);
9232 %}
9233 
9234 // Logical Shift Right by 8-bit immediate
9235 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9236 %{
9237   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9238   effect(KILL cr);
9239 
9240   format %{ "shrl    $dst, $shift" %}
9241   opcode(0xC1, 0x5); /* C1 /5 ib */
9242   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9243   ins_pipe(ialu_mem_imm);
9244 %}
9245 
9246 // Logical Shift Right by variable
9247 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9248 %{
9249   match(Set dst (URShiftI dst shift));
9250   effect(KILL cr);
9251 
9252   format %{ "shrl    $dst, $shift" %}
9253   opcode(0xD3, 0x5); /* D3 /5 */
9254   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9255   ins_pipe(ialu_reg_reg);
9256 %}
9257 
9258 // Logical Shift Right by variable
9259 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9260 %{
9261   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9262   effect(KILL cr);
9263 
9264   format %{ "shrl    $dst, $shift" %}
9265   opcode(0xD3, 0x5); /* D3 /5 */
9266   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9267   ins_pipe(ialu_mem_reg);
9268 %}
9269 
9270 // Long Shift Instructions
9271 // Shift Left by one
9272 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (LShiftL dst shift));
9275   effect(KILL cr);
9276 
9277   format %{ "salq    $dst, $shift" %}
9278   opcode(0xD1, 0x4); /* D1 /4 */
9279   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9280   ins_pipe(ialu_reg);
9281 %}
9282 
9283 // Shift Left by one
9284 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9285 %{
9286   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9287   effect(KILL cr);
9288 
9289   format %{ "salq    $dst, $shift" %}
9290   opcode(0xD1, 0x4); /* D1 /4 */
9291   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9292   ins_pipe(ialu_mem_imm);
9293 %}
9294 
9295 // Shift Left by 8-bit immediate
9296 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9297 %{
9298   match(Set dst (LShiftL dst shift));
9299   effect(KILL cr);
9300 
9301   format %{ "salq    $dst, $shift" %}
9302   opcode(0xC1, 0x4); /* C1 /4 ib */
9303   ins_encode(reg_opc_imm_wide(dst, shift));
9304   ins_pipe(ialu_reg);
9305 %}
9306 
9307 // Shift Left by 8-bit immediate
9308 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9309 %{
9310   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9311   effect(KILL cr);
9312 
9313   format %{ "salq    $dst, $shift" %}
9314   opcode(0xC1, 0x4); /* C1 /4 ib */
9315   ins_encode(REX_mem_wide(dst), OpcP,
9316              RM_opc_mem(secondary, dst), Con8or32(shift));
9317   ins_pipe(ialu_mem_imm);
9318 %}
9319 
9320 // Shift Left by variable
9321 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9322 %{
9323   match(Set dst (LShiftL dst shift));
9324   effect(KILL cr);
9325 
9326   format %{ "salq    $dst, $shift" %}
9327   opcode(0xD3, 0x4); /* D3 /4 */
9328   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9329   ins_pipe(ialu_reg_reg);
9330 %}
9331 
9332 // Shift Left by variable
9333 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9334 %{
9335   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9336   effect(KILL cr);
9337 
9338   format %{ "salq    $dst, $shift" %}
9339   opcode(0xD3, 0x4); /* D3 /4 */
9340   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9341   ins_pipe(ialu_mem_reg);
9342 %}
9343 
9344 // Arithmetic shift right by one
9345 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9346 %{
9347   match(Set dst (RShiftL dst shift));
9348   effect(KILL cr);
9349 
9350   format %{ "sarq    $dst, $shift" %}
9351   opcode(0xD1, 0x7); /* D1 /7 */
9352   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9353   ins_pipe(ialu_reg);
9354 %}
9355 
9356 // Arithmetic shift right by one
9357 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9358 %{
9359   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9360   effect(KILL cr);
9361 
9362   format %{ "sarq    $dst, $shift" %}
9363   opcode(0xD1, 0x7); /* D1 /7 */
9364   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9365   ins_pipe(ialu_mem_imm);
9366 %}
9367 
9368 // Arithmetic Shift Right by 8-bit immediate
9369 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9370 %{
9371   match(Set dst (RShiftL dst shift));
9372   effect(KILL cr);
9373 
9374   format %{ "sarq    $dst, $shift" %}
9375   opcode(0xC1, 0x7); /* C1 /7 ib */
9376   ins_encode(reg_opc_imm_wide(dst, shift));
9377   ins_pipe(ialu_mem_imm);
9378 %}
9379 
9380 // Arithmetic Shift Right by 8-bit immediate
9381 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9382 %{
9383   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9384   effect(KILL cr);
9385 
9386   format %{ "sarq    $dst, $shift" %}
9387   opcode(0xC1, 0x7); /* C1 /7 ib */
9388   ins_encode(REX_mem_wide(dst), OpcP,
9389              RM_opc_mem(secondary, dst), Con8or32(shift));
9390   ins_pipe(ialu_mem_imm);
9391 %}
9392 
9393 // Arithmetic Shift Right by variable
9394 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9395 %{
9396   match(Set dst (RShiftL dst shift));
9397   effect(KILL cr);
9398 
9399   format %{ "sarq    $dst, $shift" %}
9400   opcode(0xD3, 0x7); /* D3 /7 */
9401   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9402   ins_pipe(ialu_reg_reg);
9403 %}
9404 
9405 // Arithmetic Shift Right by variable
9406 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9407 %{
9408   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9409   effect(KILL cr);
9410 
9411   format %{ "sarq    $dst, $shift" %}
9412   opcode(0xD3, 0x7); /* D3 /7 */
9413   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9414   ins_pipe(ialu_mem_reg);
9415 %}
9416 
9417 // Logical shift right by one
9418 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9419 %{
9420   match(Set dst (URShiftL dst shift));
9421   effect(KILL cr);
9422 
9423   format %{ "shrq    $dst, $shift" %}
9424   opcode(0xD1, 0x5); /* D1 /5 */
9425   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9426   ins_pipe(ialu_reg);
9427 %}
9428 
9429 // Logical shift right by one
9430 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9431 %{
9432   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9433   effect(KILL cr);
9434 
9435   format %{ "shrq    $dst, $shift" %}
9436   opcode(0xD1, 0x5); /* D1 /5 */
9437   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9438   ins_pipe(ialu_mem_imm);
9439 %}
9440 
9441 // Logical Shift Right by 8-bit immediate
9442 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9443 %{
9444   match(Set dst (URShiftL dst shift));
9445   effect(KILL cr);
9446 
9447   format %{ "shrq    $dst, $shift" %}
9448   opcode(0xC1, 0x5); /* C1 /5 ib */
9449   ins_encode(reg_opc_imm_wide(dst, shift));
9450   ins_pipe(ialu_reg);
9451 %}
9452 
9453 
9454 // Logical Shift Right by 8-bit immediate
9455 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9456 %{
9457   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9458   effect(KILL cr);
9459 
9460   format %{ "shrq    $dst, $shift" %}
9461   opcode(0xC1, 0x5); /* C1 /5 ib */
9462   ins_encode(REX_mem_wide(dst), OpcP,
9463              RM_opc_mem(secondary, dst), Con8or32(shift));
9464   ins_pipe(ialu_mem_imm);
9465 %}
9466 
9467 // Logical Shift Right by variable
9468 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9469 %{
9470   match(Set dst (URShiftL dst shift));
9471   effect(KILL cr);
9472 
9473   format %{ "shrq    $dst, $shift" %}
9474   opcode(0xD3, 0x5); /* D3 /5 */
9475   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9476   ins_pipe(ialu_reg_reg);
9477 %}
9478 
9479 // Logical Shift Right by variable
9480 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9481 %{
9482   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9483   effect(KILL cr);
9484 
9485   format %{ "shrq    $dst, $shift" %}
9486   opcode(0xD3, 0x5); /* D3 /5 */
9487   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9488   ins_pipe(ialu_mem_reg);
9489 %}
9490 
9491 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9492 // This idiom is used by the compiler for the i2b bytecode.
9493 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9494 %{
9495   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9496 
9497   format %{ "movsbl  $dst, $src\t# i2b" %}
9498   opcode(0x0F, 0xBE);
9499   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9500   ins_pipe(ialu_reg_reg);
9501 %}
9502 
9503 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9504 // This idiom is used by the compiler the i2s bytecode.
9505 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9506 %{
9507   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9508 
9509   format %{ "movswl  $dst, $src\t# i2s" %}
9510   opcode(0x0F, 0xBF);
9511   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9512   ins_pipe(ialu_reg_reg);
9513 %}
9514 
9515 // ROL/ROR instructions
9516 
9517 // ROL expand
9518 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9519   effect(KILL cr, USE_DEF dst);
9520 
9521   format %{ "roll    $dst" %}
9522   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9523   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9524   ins_pipe(ialu_reg);
9525 %}
9526 
9527 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9528   effect(USE_DEF dst, USE shift, KILL cr);
9529 
9530   format %{ "roll    $dst, $shift" %}
9531   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9532   ins_encode( reg_opc_imm(dst, shift) );
9533   ins_pipe(ialu_reg);
9534 %}
9535 
9536 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9537 %{
9538   effect(USE_DEF dst, USE shift, KILL cr);
9539 
9540   format %{ "roll    $dst, $shift" %}
9541   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9542   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9543   ins_pipe(ialu_reg_reg);
9544 %}
9545 // end of ROL expand
9546 
9547 // Rotate Left by one
9548 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9549 %{
9550   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9551 
9552   expand %{
9553     rolI_rReg_imm1(dst, cr);
9554   %}
9555 %}
9556 
9557 // Rotate Left by 8-bit immediate
9558 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9559 %{
9560   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9561   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9562 
9563   expand %{
9564     rolI_rReg_imm8(dst, lshift, cr);
9565   %}
9566 %}
9567 
9568 // Rotate Left by variable
9569 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9570 %{
9571   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9572 
9573   expand %{
9574     rolI_rReg_CL(dst, shift, cr);
9575   %}
9576 %}
9577 
9578 // Rotate Left by variable
9579 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9580 %{
9581   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9582 
9583   expand %{
9584     rolI_rReg_CL(dst, shift, cr);
9585   %}
9586 %}
9587 
9588 // ROR expand
9589 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9590 %{
9591   effect(USE_DEF dst, KILL cr);
9592 
9593   format %{ "rorl    $dst" %}
9594   opcode(0xD1, 0x1); /* D1 /1 */
9595   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9596   ins_pipe(ialu_reg);
9597 %}
9598 
9599 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9600 %{
9601   effect(USE_DEF dst, USE shift, KILL cr);
9602 
9603   format %{ "rorl    $dst, $shift" %}
9604   opcode(0xC1, 0x1); /* C1 /1 ib */
9605   ins_encode(reg_opc_imm(dst, shift));
9606   ins_pipe(ialu_reg);
9607 %}
9608 
9609 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9610 %{
9611   effect(USE_DEF dst, USE shift, KILL cr);
9612 
9613   format %{ "rorl    $dst, $shift" %}
9614   opcode(0xD3, 0x1); /* D3 /1 */
9615   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9616   ins_pipe(ialu_reg_reg);
9617 %}
9618 // end of ROR expand
9619 
9620 // Rotate Right by one
9621 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9622 %{
9623   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9624 
9625   expand %{
9626     rorI_rReg_imm1(dst, cr);
9627   %}
9628 %}
9629 
9630 // Rotate Right by 8-bit immediate
9631 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9632 %{
9633   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9634   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9635 
9636   expand %{
9637     rorI_rReg_imm8(dst, rshift, cr);
9638   %}
9639 %}
9640 
9641 // Rotate Right by variable
9642 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9643 %{
9644   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9645 
9646   expand %{
9647     rorI_rReg_CL(dst, shift, cr);
9648   %}
9649 %}
9650 
9651 // Rotate Right by variable
9652 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9653 %{
9654   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9655 
9656   expand %{
9657     rorI_rReg_CL(dst, shift, cr);
9658   %}
9659 %}
9660 
9661 // for long rotate
9662 // ROL expand
9663 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9664   effect(USE_DEF dst, KILL cr);
9665 
9666   format %{ "rolq    $dst" %}
9667   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9668   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9669   ins_pipe(ialu_reg);
9670 %}
9671 
9672 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9673   effect(USE_DEF dst, USE shift, KILL cr);
9674 
9675   format %{ "rolq    $dst, $shift" %}
9676   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9677   ins_encode( reg_opc_imm_wide(dst, shift) );
9678   ins_pipe(ialu_reg);
9679 %}
9680 
9681 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9682 %{
9683   effect(USE_DEF dst, USE shift, KILL cr);
9684 
9685   format %{ "rolq    $dst, $shift" %}
9686   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9687   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9688   ins_pipe(ialu_reg_reg);
9689 %}
9690 // end of ROL expand
9691 
9692 // Rotate Left by one
9693 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9694 %{
9695   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9696 
9697   expand %{
9698     rolL_rReg_imm1(dst, cr);
9699   %}
9700 %}
9701 
9702 // Rotate Left by 8-bit immediate
9703 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9704 %{
9705   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9706   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9707 
9708   expand %{
9709     rolL_rReg_imm8(dst, lshift, cr);
9710   %}
9711 %}
9712 
9713 // Rotate Left by variable
9714 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9715 %{
9716   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9717 
9718   expand %{
9719     rolL_rReg_CL(dst, shift, cr);
9720   %}
9721 %}
9722 
9723 // Rotate Left by variable
9724 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9725 %{
9726   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9727 
9728   expand %{
9729     rolL_rReg_CL(dst, shift, cr);
9730   %}
9731 %}
9732 
9733 // ROR expand
9734 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9735 %{
9736   effect(USE_DEF dst, KILL cr);
9737 
9738   format %{ "rorq    $dst" %}
9739   opcode(0xD1, 0x1); /* D1 /1 */
9740   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9741   ins_pipe(ialu_reg);
9742 %}
9743 
9744 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9745 %{
9746   effect(USE_DEF dst, USE shift, KILL cr);
9747 
9748   format %{ "rorq    $dst, $shift" %}
9749   opcode(0xC1, 0x1); /* C1 /1 ib */
9750   ins_encode(reg_opc_imm_wide(dst, shift));
9751   ins_pipe(ialu_reg);
9752 %}
9753 
9754 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9755 %{
9756   effect(USE_DEF dst, USE shift, KILL cr);
9757 
9758   format %{ "rorq    $dst, $shift" %}
9759   opcode(0xD3, 0x1); /* D3 /1 */
9760   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9761   ins_pipe(ialu_reg_reg);
9762 %}
9763 // end of ROR expand
9764 
9765 // Rotate Right by one
9766 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9767 %{
9768   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9769 
9770   expand %{
9771     rorL_rReg_imm1(dst, cr);
9772   %}
9773 %}
9774 
9775 // Rotate Right by 8-bit immediate
9776 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9777 %{
9778   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9779   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9780 
9781   expand %{
9782     rorL_rReg_imm8(dst, rshift, cr);
9783   %}
9784 %}
9785 
9786 // Rotate Right by variable
9787 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9788 %{
9789   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9790 
9791   expand %{
9792     rorL_rReg_CL(dst, shift, cr);
9793   %}
9794 %}
9795 
9796 // Rotate Right by variable
9797 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9798 %{
9799   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9800 
9801   expand %{
9802     rorL_rReg_CL(dst, shift, cr);
9803   %}
9804 %}
9805 
9806 // Logical Instructions
9807 
9808 // Integer Logical Instructions
9809 
9810 // And Instructions
9811 // And Register with Register
9812 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9813 %{
9814   match(Set dst (AndI dst src));
9815   effect(KILL cr);
9816 
9817   format %{ "andl    $dst, $src\t# int" %}
9818   opcode(0x23);
9819   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9820   ins_pipe(ialu_reg_reg);
9821 %}
9822 
9823 // And Register with Immediate 255
9824 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9825 %{
9826   match(Set dst (AndI dst src));
9827 
9828   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9829   opcode(0x0F, 0xB6);
9830   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9831   ins_pipe(ialu_reg);
9832 %}
9833 
9834 // And Register with Immediate 255 and promote to long
9835 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9836 %{
9837   match(Set dst (ConvI2L (AndI src mask)));
9838 
9839   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9840   opcode(0x0F, 0xB6);
9841   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9842   ins_pipe(ialu_reg);
9843 %}
9844 
9845 // And Register with Immediate 65535
9846 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9847 %{
9848   match(Set dst (AndI dst src));
9849 
9850   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9851   opcode(0x0F, 0xB7);
9852   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9853   ins_pipe(ialu_reg);
9854 %}
9855 
9856 // And Register with Immediate 65535 and promote to long
9857 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9858 %{
9859   match(Set dst (ConvI2L (AndI src mask)));
9860 
9861   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9862   opcode(0x0F, 0xB7);
9863   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9864   ins_pipe(ialu_reg);
9865 %}
9866 
9867 // And Register with Immediate
9868 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9869 %{
9870   match(Set dst (AndI dst src));
9871   effect(KILL cr);
9872 
9873   format %{ "andl    $dst, $src\t# int" %}
9874   opcode(0x81, 0x04); /* Opcode 81 /4 */
9875   ins_encode(OpcSErm(dst, src), Con8or32(src));
9876   ins_pipe(ialu_reg);
9877 %}
9878 
9879 // And Register with Memory
9880 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9881 %{
9882   match(Set dst (AndI dst (LoadI src)));
9883   effect(KILL cr);
9884 
9885   ins_cost(125);
9886   format %{ "andl    $dst, $src\t# int" %}
9887   opcode(0x23);
9888   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9889   ins_pipe(ialu_reg_mem);
9890 %}
9891 
9892 // And Memory with Register
9893 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9894 %{
9895   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9896   effect(KILL cr);
9897 
9898   ins_cost(150);
9899   format %{ "andl    $dst, $src\t# int" %}
9900   opcode(0x21); /* Opcode 21 /r */
9901   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9902   ins_pipe(ialu_mem_reg);
9903 %}
9904 
9905 // And Memory with Immediate
9906 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9907 %{
9908   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9909   effect(KILL cr);
9910 
9911   ins_cost(125);
9912   format %{ "andl    $dst, $src\t# int" %}
9913   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9914   ins_encode(REX_mem(dst), OpcSE(src),
9915              RM_opc_mem(secondary, dst), Con8or32(src));
9916   ins_pipe(ialu_mem_imm);
9917 %}
9918 
9919 // Or Instructions
9920 // Or Register with Register
9921 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9922 %{
9923   match(Set dst (OrI dst src));
9924   effect(KILL cr);
9925 
9926   format %{ "orl     $dst, $src\t# int" %}
9927   opcode(0x0B);
9928   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9929   ins_pipe(ialu_reg_reg);
9930 %}
9931 
9932 // Or Register with Immediate
9933 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9934 %{
9935   match(Set dst (OrI dst src));
9936   effect(KILL cr);
9937 
9938   format %{ "orl     $dst, $src\t# int" %}
9939   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9940   ins_encode(OpcSErm(dst, src), Con8or32(src));
9941   ins_pipe(ialu_reg);
9942 %}
9943 
9944 // Or Register with Memory
9945 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9946 %{
9947   match(Set dst (OrI dst (LoadI src)));
9948   effect(KILL cr);
9949 
9950   ins_cost(125);
9951   format %{ "orl     $dst, $src\t# int" %}
9952   opcode(0x0B);
9953   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9954   ins_pipe(ialu_reg_mem);
9955 %}
9956 
9957 // Or Memory with Register
9958 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9959 %{
9960   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9961   effect(KILL cr);
9962 
9963   ins_cost(150);
9964   format %{ "orl     $dst, $src\t# int" %}
9965   opcode(0x09); /* Opcode 09 /r */
9966   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9967   ins_pipe(ialu_mem_reg);
9968 %}
9969 
9970 // Or Memory with Immediate
9971 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9972 %{
9973   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9974   effect(KILL cr);
9975 
9976   ins_cost(125);
9977   format %{ "orl     $dst, $src\t# int" %}
9978   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9979   ins_encode(REX_mem(dst), OpcSE(src),
9980              RM_opc_mem(secondary, dst), Con8or32(src));
9981   ins_pipe(ialu_mem_imm);
9982 %}
9983 
9984 // Xor Instructions
9985 // Xor Register with Register
9986 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9987 %{
9988   match(Set dst (XorI dst src));
9989   effect(KILL cr);
9990 
9991   format %{ "xorl    $dst, $src\t# int" %}
9992   opcode(0x33);
9993   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9994   ins_pipe(ialu_reg_reg);
9995 %}
9996 
9997 // Xor Register with Immediate -1
9998 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9999   match(Set dst (XorI dst imm));  
10000 
10001   format %{ "not    $dst" %}  
10002   ins_encode %{
10003      __ notl($dst$$Register);
10004   %}
10005   ins_pipe(ialu_reg);
10006 %}
10007 
10008 // Xor Register with Immediate
10009 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10010 %{
10011   match(Set dst (XorI dst src));
10012   effect(KILL cr);
10013 
10014   format %{ "xorl    $dst, $src\t# int" %}
10015   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10016   ins_encode(OpcSErm(dst, src), Con8or32(src));
10017   ins_pipe(ialu_reg);
10018 %}
10019 
10020 // Xor Register with Memory
10021 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10022 %{
10023   match(Set dst (XorI dst (LoadI src)));
10024   effect(KILL cr);
10025 
10026   ins_cost(125);
10027   format %{ "xorl    $dst, $src\t# int" %}
10028   opcode(0x33);
10029   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10030   ins_pipe(ialu_reg_mem);
10031 %}
10032 
10033 // Xor Memory with Register
10034 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10035 %{
10036   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10037   effect(KILL cr);
10038 
10039   ins_cost(150);
10040   format %{ "xorl    $dst, $src\t# int" %}
10041   opcode(0x31); /* Opcode 31 /r */
10042   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10043   ins_pipe(ialu_mem_reg);
10044 %}
10045 
10046 // Xor Memory with Immediate
10047 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10048 %{
10049   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10050   effect(KILL cr);
10051 
10052   ins_cost(125);
10053   format %{ "xorl    $dst, $src\t# int" %}
10054   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10055   ins_encode(REX_mem(dst), OpcSE(src),
10056              RM_opc_mem(secondary, dst), Con8or32(src));
10057   ins_pipe(ialu_mem_imm);
10058 %}
10059 
10060 
10061 // Long Logical Instructions
10062 
10063 // And Instructions
10064 // And Register with Register
10065 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10066 %{
10067   match(Set dst (AndL dst src));
10068   effect(KILL cr);
10069 
10070   format %{ "andq    $dst, $src\t# long" %}
10071   opcode(0x23);
10072   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10073   ins_pipe(ialu_reg_reg);
10074 %}
10075 
10076 // And Register with Immediate 255
10077 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10078 %{
10079   match(Set dst (AndL dst src));
10080 
10081   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10082   opcode(0x0F, 0xB6);
10083   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10084   ins_pipe(ialu_reg);
10085 %}
10086 
10087 // And Register with Immediate 65535
10088 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10089 %{
10090   match(Set dst (AndL dst src));
10091 
10092   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10093   opcode(0x0F, 0xB7);
10094   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10095   ins_pipe(ialu_reg);
10096 %}
10097 
10098 // And Register with Immediate
10099 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10100 %{
10101   match(Set dst (AndL dst src));
10102   effect(KILL cr);
10103 
10104   format %{ "andq    $dst, $src\t# long" %}
10105   opcode(0x81, 0x04); /* Opcode 81 /4 */
10106   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10107   ins_pipe(ialu_reg);
10108 %}
10109 
10110 // And Register with Memory
10111 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10112 %{
10113   match(Set dst (AndL dst (LoadL src)));
10114   effect(KILL cr);
10115 
10116   ins_cost(125);
10117   format %{ "andq    $dst, $src\t# long" %}
10118   opcode(0x23);
10119   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10120   ins_pipe(ialu_reg_mem);
10121 %}
10122 
10123 // And Memory with Register
10124 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10125 %{
10126   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10127   effect(KILL cr);
10128 
10129   ins_cost(150);
10130   format %{ "andq    $dst, $src\t# long" %}
10131   opcode(0x21); /* Opcode 21 /r */
10132   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10133   ins_pipe(ialu_mem_reg);
10134 %}
10135 
10136 // And Memory with Immediate
10137 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10138 %{
10139   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10140   effect(KILL cr);
10141 
10142   ins_cost(125);
10143   format %{ "andq    $dst, $src\t# long" %}
10144   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10145   ins_encode(REX_mem_wide(dst), OpcSE(src),
10146              RM_opc_mem(secondary, dst), Con8or32(src));
10147   ins_pipe(ialu_mem_imm);
10148 %}
10149 
10150 // Or Instructions
10151 // Or Register with Register
10152 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10153 %{
10154   match(Set dst (OrL dst src));
10155   effect(KILL cr);
10156 
10157   format %{ "orq     $dst, $src\t# long" %}
10158   opcode(0x0B);
10159   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10160   ins_pipe(ialu_reg_reg);
10161 %}
10162 
10163 // Use any_RegP to match R15 (TLS register) without spilling.
10164 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10165   match(Set dst (OrL dst (CastP2X src)));
10166   effect(KILL cr);
10167 
10168   format %{ "orq     $dst, $src\t# long" %}
10169   opcode(0x0B);
10170   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10171   ins_pipe(ialu_reg_reg);
10172 %}
10173 
10174 
10175 // Or Register with Immediate
10176 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10177 %{
10178   match(Set dst (OrL dst src));
10179   effect(KILL cr);
10180 
10181   format %{ "orq     $dst, $src\t# long" %}
10182   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10183   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10184   ins_pipe(ialu_reg);
10185 %}
10186 
10187 // Or Register with Memory
10188 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10189 %{
10190   match(Set dst (OrL dst (LoadL src)));
10191   effect(KILL cr);
10192 
10193   ins_cost(125);
10194   format %{ "orq     $dst, $src\t# long" %}
10195   opcode(0x0B);
10196   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10197   ins_pipe(ialu_reg_mem);
10198 %}
10199 
10200 // Or Memory with Register
10201 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10202 %{
10203   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10204   effect(KILL cr);
10205 
10206   ins_cost(150);
10207   format %{ "orq     $dst, $src\t# long" %}
10208   opcode(0x09); /* Opcode 09 /r */
10209   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10210   ins_pipe(ialu_mem_reg);
10211 %}
10212 
10213 // Or Memory with Immediate
10214 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10215 %{
10216   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10217   effect(KILL cr);
10218 
10219   ins_cost(125);
10220   format %{ "orq     $dst, $src\t# long" %}
10221   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10222   ins_encode(REX_mem_wide(dst), OpcSE(src),
10223              RM_opc_mem(secondary, dst), Con8or32(src));
10224   ins_pipe(ialu_mem_imm);
10225 %}
10226 
10227 // Xor Instructions
10228 // Xor Register with Register
10229 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10230 %{
10231   match(Set dst (XorL dst src));
10232   effect(KILL cr);
10233 
10234   format %{ "xorq    $dst, $src\t# long" %}
10235   opcode(0x33);
10236   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10237   ins_pipe(ialu_reg_reg);
10238 %}
10239 
10240 // Xor Register with Immediate -1
10241 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10242   match(Set dst (XorL dst imm));  
10243 
10244   format %{ "notq   $dst" %}  
10245   ins_encode %{
10246      __ notq($dst$$Register);
10247   %}
10248   ins_pipe(ialu_reg);
10249 %}
10250 
10251 // Xor Register with Immediate
10252 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10253 %{
10254   match(Set dst (XorL dst src));
10255   effect(KILL cr);
10256 
10257   format %{ "xorq    $dst, $src\t# long" %}
10258   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10259   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10260   ins_pipe(ialu_reg);
10261 %}
10262 
10263 // Xor Register with Memory
10264 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10265 %{
10266   match(Set dst (XorL dst (LoadL src)));
10267   effect(KILL cr);
10268 
10269   ins_cost(125);
10270   format %{ "xorq    $dst, $src\t# long" %}
10271   opcode(0x33);
10272   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10273   ins_pipe(ialu_reg_mem);
10274 %}
10275 
10276 // Xor Memory with Register
10277 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10278 %{
10279   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10280   effect(KILL cr);
10281 
10282   ins_cost(150);
10283   format %{ "xorq    $dst, $src\t# long" %}
10284   opcode(0x31); /* Opcode 31 /r */
10285   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10286   ins_pipe(ialu_mem_reg);
10287 %}
10288 
10289 // Xor Memory with Immediate
10290 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10291 %{
10292   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10293   effect(KILL cr);
10294 
10295   ins_cost(125);
10296   format %{ "xorq    $dst, $src\t# long" %}
10297   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10298   ins_encode(REX_mem_wide(dst), OpcSE(src),
10299              RM_opc_mem(secondary, dst), Con8or32(src));
10300   ins_pipe(ialu_mem_imm);
10301 %}
10302 
10303 // Convert Int to Boolean
10304 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10305 %{
10306   match(Set dst (Conv2B src));
10307   effect(KILL cr);
10308 
10309   format %{ "testl   $src, $src\t# ci2b\n\t"
10310             "setnz   $dst\n\t"
10311             "movzbl  $dst, $dst" %}
10312   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10313              setNZ_reg(dst),
10314              REX_reg_breg(dst, dst), // movzbl
10315              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10316   ins_pipe(pipe_slow); // XXX
10317 %}
10318 
10319 // Convert Pointer to Boolean
10320 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10321 %{
10322   match(Set dst (Conv2B src));
10323   effect(KILL cr);
10324 
10325   format %{ "testq   $src, $src\t# cp2b\n\t"
10326             "setnz   $dst\n\t"
10327             "movzbl  $dst, $dst" %}
10328   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10329              setNZ_reg(dst),
10330              REX_reg_breg(dst, dst), // movzbl
10331              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10332   ins_pipe(pipe_slow); // XXX
10333 %}
10334 
10335 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10336 %{
10337   match(Set dst (CmpLTMask p q));
10338   effect(KILL cr);
10339 
10340   ins_cost(400); // XXX
10341   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10342             "setlt   $dst\n\t"
10343             "movzbl  $dst, $dst\n\t"
10344             "negl    $dst" %}
10345   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10346              setLT_reg(dst),
10347              REX_reg_breg(dst, dst), // movzbl
10348              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10349              neg_reg(dst));
10350   ins_pipe(pipe_slow);
10351 %}
10352 
10353 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10354 %{
10355   match(Set dst (CmpLTMask dst zero));
10356   effect(KILL cr);
10357 
10358   ins_cost(100); // XXX
10359   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10360   opcode(0xC1, 0x7);  /* C1 /7 ib */
10361   ins_encode(reg_opc_imm(dst, 0x1F));
10362   ins_pipe(ialu_reg);
10363 %}
10364 
10365 
10366 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10367                          rRegI tmp,
10368                          rFlagsReg cr)
10369 %{
10370   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10371   effect(TEMP tmp, KILL cr);
10372 
10373   ins_cost(400); // XXX
10374   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10375             "sbbl    $tmp, $tmp\n\t"
10376             "andl    $tmp, $y\n\t"
10377             "addl    $p, $tmp" %}
10378   ins_encode(enc_cmpLTP(p, q, y, tmp));
10379   ins_pipe(pipe_cmplt);
10380 %}
10381 
10382 /* If I enable this, I encourage spilling in the inner loop of compress.
10383 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10384 %{
10385   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10386   effect( TEMP tmp, KILL cr );
10387   ins_cost(400);
10388 
10389   format %{ "SUB    $p,$q\n\t"
10390             "SBB    RCX,RCX\n\t"
10391             "AND    RCX,$y\n\t"
10392             "ADD    $p,RCX" %}
10393   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10394 %}
10395 */
10396 
10397 //---------- FP Instructions------------------------------------------------
10398 
10399 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10400 %{
10401   match(Set cr (CmpF src1 src2));
10402 
10403   ins_cost(145);
10404   format %{ "ucomiss $src1, $src2\n\t"
10405             "jnp,s   exit\n\t"
10406             "pushfq\t# saw NaN, set CF\n\t"
10407             "andq    [rsp], #0xffffff2b\n\t"
10408             "popfq\n"
10409     "exit:   nop\t# avoid branch to branch" %}
10410   opcode(0x0F, 0x2E);
10411   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10412              cmpfp_fixup);
10413   ins_pipe(pipe_slow);
10414 %}
10415 
10416 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10417   match(Set cr (CmpF src1 src2));
10418 
10419   ins_cost(145);
10420   format %{ "ucomiss $src1, $src2" %}
10421   ins_encode %{
10422     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10423   %}
10424   ins_pipe(pipe_slow);
10425 %}
10426 
10427 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10428 %{
10429   match(Set cr (CmpF src1 (LoadF src2)));
10430 
10431   ins_cost(145);
10432   format %{ "ucomiss $src1, $src2\n\t"
10433             "jnp,s   exit\n\t"
10434             "pushfq\t# saw NaN, set CF\n\t"
10435             "andq    [rsp], #0xffffff2b\n\t"
10436             "popfq\n"
10437     "exit:   nop\t# avoid branch to branch" %}
10438   opcode(0x0F, 0x2E);
10439   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10440              cmpfp_fixup);
10441   ins_pipe(pipe_slow);
10442 %}
10443 
10444 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10445   match(Set cr (CmpF src1 (LoadF src2)));
10446 
10447   ins_cost(100);
10448   format %{ "ucomiss $src1, $src2" %}
10449   opcode(0x0F, 0x2E);
10450   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10451   ins_pipe(pipe_slow);
10452 %}
10453 
10454 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10455 %{
10456   match(Set cr (CmpF src1 src2));
10457 
10458   ins_cost(145);
10459   format %{ "ucomiss $src1, $src2\n\t"
10460             "jnp,s   exit\n\t"
10461             "pushfq\t# saw NaN, set CF\n\t"
10462             "andq    [rsp], #0xffffff2b\n\t"
10463             "popfq\n"
10464     "exit:   nop\t# avoid branch to branch" %}
10465   opcode(0x0F, 0x2E);
10466   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10467              cmpfp_fixup);
10468   ins_pipe(pipe_slow);
10469 %}
10470 
10471 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10472   match(Set cr (CmpF src1 src2));
10473 
10474   ins_cost(100);
10475   format %{ "ucomiss $src1, $src2" %}
10476   opcode(0x0F, 0x2E);
10477   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10478   ins_pipe(pipe_slow);
10479 %}
10480 
10481 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10482 %{
10483   match(Set cr (CmpD src1 src2));
10484 
10485   ins_cost(145);
10486   format %{ "ucomisd $src1, $src2\n\t"
10487             "jnp,s   exit\n\t"
10488             "pushfq\t# saw NaN, set CF\n\t"
10489             "andq    [rsp], #0xffffff2b\n\t"
10490             "popfq\n"
10491     "exit:   nop\t# avoid branch to branch" %}
10492   opcode(0x66, 0x0F, 0x2E);
10493   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10494              cmpfp_fixup);
10495   ins_pipe(pipe_slow);
10496 %}
10497 
10498 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10499   match(Set cr (CmpD src1 src2));
10500 
10501   ins_cost(100);
10502   format %{ "ucomisd $src1, $src2 test" %}
10503   ins_encode %{
10504     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10505   %}
10506   ins_pipe(pipe_slow);
10507 %}
10508 
10509 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10510 %{
10511   match(Set cr (CmpD src1 (LoadD src2)));
10512 
10513   ins_cost(145);
10514   format %{ "ucomisd $src1, $src2\n\t"
10515             "jnp,s   exit\n\t"
10516             "pushfq\t# saw NaN, set CF\n\t"
10517             "andq    [rsp], #0xffffff2b\n\t"
10518             "popfq\n"
10519     "exit:   nop\t# avoid branch to branch" %}
10520   opcode(0x66, 0x0F, 0x2E);
10521   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10522              cmpfp_fixup);
10523   ins_pipe(pipe_slow);
10524 %}
10525 
10526 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10527   match(Set cr (CmpD src1 (LoadD src2)));
10528 
10529   ins_cost(100);
10530   format %{ "ucomisd $src1, $src2" %}
10531   opcode(0x66, 0x0F, 0x2E);
10532   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10533   ins_pipe(pipe_slow);
10534 %}
10535 
10536 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10537 %{
10538   match(Set cr (CmpD src1 src2));
10539 
10540   ins_cost(145);
10541   format %{ "ucomisd $src1, [$src2]\n\t"
10542             "jnp,s   exit\n\t"
10543             "pushfq\t# saw NaN, set CF\n\t"
10544             "andq    [rsp], #0xffffff2b\n\t"
10545             "popfq\n"
10546     "exit:   nop\t# avoid branch to branch" %}
10547   opcode(0x66, 0x0F, 0x2E);
10548   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10549              cmpfp_fixup);
10550   ins_pipe(pipe_slow);
10551 %}
10552 
10553 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10554   match(Set cr (CmpD src1 src2));
10555 
10556   ins_cost(100);
10557   format %{ "ucomisd $src1, [$src2]" %}
10558   opcode(0x66, 0x0F, 0x2E);
10559   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10560   ins_pipe(pipe_slow);
10561 %}
10562 
10563 // Compare into -1,0,1
10564 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10565 %{
10566   match(Set dst (CmpF3 src1 src2));
10567   effect(KILL cr);
10568 
10569   ins_cost(275);
10570   format %{ "ucomiss $src1, $src2\n\t"
10571             "movl    $dst, #-1\n\t"
10572             "jp,s    done\n\t"
10573             "jb,s    done\n\t"
10574             "setne   $dst\n\t"
10575             "movzbl  $dst, $dst\n"
10576     "done:" %}
10577 
10578   opcode(0x0F, 0x2E);
10579   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10580              cmpfp3(dst));
10581   ins_pipe(pipe_slow);
10582 %}
10583 
10584 // Compare into -1,0,1
10585 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10586 %{
10587   match(Set dst (CmpF3 src1 (LoadF src2)));
10588   effect(KILL cr);
10589 
10590   ins_cost(275);
10591   format %{ "ucomiss $src1, $src2\n\t"
10592             "movl    $dst, #-1\n\t"
10593             "jp,s    done\n\t"
10594             "jb,s    done\n\t"
10595             "setne   $dst\n\t"
10596             "movzbl  $dst, $dst\n"
10597     "done:" %}
10598 
10599   opcode(0x0F, 0x2E);
10600   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10601              cmpfp3(dst));
10602   ins_pipe(pipe_slow);
10603 %}
10604 
10605 // Compare into -1,0,1
10606 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10607 %{
10608   match(Set dst (CmpF3 src1 src2));
10609   effect(KILL cr);
10610 
10611   ins_cost(275);
10612   format %{ "ucomiss $src1, [$src2]\n\t"
10613             "movl    $dst, #-1\n\t"
10614             "jp,s    done\n\t"
10615             "jb,s    done\n\t"
10616             "setne   $dst\n\t"
10617             "movzbl  $dst, $dst\n"
10618     "done:" %}
10619 
10620   opcode(0x0F, 0x2E);
10621   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10622              cmpfp3(dst));
10623   ins_pipe(pipe_slow);
10624 %}
10625 
10626 // Compare into -1,0,1
10627 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10628 %{
10629   match(Set dst (CmpD3 src1 src2));
10630   effect(KILL cr);
10631 
10632   ins_cost(275);
10633   format %{ "ucomisd $src1, $src2\n\t"
10634             "movl    $dst, #-1\n\t"
10635             "jp,s    done\n\t"
10636             "jb,s    done\n\t"
10637             "setne   $dst\n\t"
10638             "movzbl  $dst, $dst\n"
10639     "done:" %}
10640 
10641   opcode(0x66, 0x0F, 0x2E);
10642   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10643              cmpfp3(dst));
10644   ins_pipe(pipe_slow);
10645 %}
10646 
10647 // Compare into -1,0,1
10648 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10649 %{
10650   match(Set dst (CmpD3 src1 (LoadD src2)));
10651   effect(KILL cr);
10652 
10653   ins_cost(275);
10654   format %{ "ucomisd $src1, $src2\n\t"
10655             "movl    $dst, #-1\n\t"
10656             "jp,s    done\n\t"
10657             "jb,s    done\n\t"
10658             "setne   $dst\n\t"
10659             "movzbl  $dst, $dst\n"
10660     "done:" %}
10661 
10662   opcode(0x66, 0x0F, 0x2E);
10663   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10664              cmpfp3(dst));
10665   ins_pipe(pipe_slow);
10666 %}
10667 
10668 // Compare into -1,0,1
10669 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10670 %{
10671   match(Set dst (CmpD3 src1 src2));
10672   effect(KILL cr);
10673 
10674   ins_cost(275);
10675   format %{ "ucomisd $src1, [$src2]\n\t"
10676             "movl    $dst, #-1\n\t"
10677             "jp,s    done\n\t"
10678             "jb,s    done\n\t"
10679             "setne   $dst\n\t"
10680             "movzbl  $dst, $dst\n"
10681     "done:" %}
10682 
10683   opcode(0x66, 0x0F, 0x2E);
10684   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10685              cmpfp3(dst));
10686   ins_pipe(pipe_slow);
10687 %}
10688 
10689 instruct addF_reg(regF dst, regF src)
10690 %{
10691   match(Set dst (AddF dst src));
10692 
10693   format %{ "addss   $dst, $src" %}
10694   ins_cost(150); // XXX
10695   opcode(0xF3, 0x0F, 0x58);
10696   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10697   ins_pipe(pipe_slow);
10698 %}
10699 
10700 instruct addF_mem(regF dst, memory src)
10701 %{
10702   match(Set dst (AddF dst (LoadF src)));
10703 
10704   format %{ "addss   $dst, $src" %}
10705   ins_cost(150); // XXX
10706   opcode(0xF3, 0x0F, 0x58);
10707   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10708   ins_pipe(pipe_slow);
10709 %}
10710 
10711 instruct addF_imm(regF dst, immF src)
10712 %{
10713   match(Set dst (AddF dst src));
10714 
10715   format %{ "addss   $dst, [$src]" %}
10716   ins_cost(150); // XXX
10717   opcode(0xF3, 0x0F, 0x58);
10718   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10719   ins_pipe(pipe_slow);
10720 %}
10721 
10722 instruct addD_reg(regD dst, regD src)
10723 %{
10724   match(Set dst (AddD dst src));
10725 
10726   format %{ "addsd   $dst, $src" %}
10727   ins_cost(150); // XXX
10728   opcode(0xF2, 0x0F, 0x58);
10729   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10730   ins_pipe(pipe_slow);
10731 %}
10732 
10733 instruct addD_mem(regD dst, memory src)
10734 %{
10735   match(Set dst (AddD dst (LoadD src)));
10736 
10737   format %{ "addsd   $dst, $src" %}
10738   ins_cost(150); // XXX
10739   opcode(0xF2, 0x0F, 0x58);
10740   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10741   ins_pipe(pipe_slow);
10742 %}
10743 
10744 instruct addD_imm(regD dst, immD src)
10745 %{
10746   match(Set dst (AddD dst src));
10747 
10748   format %{ "addsd   $dst, [$src]" %}
10749   ins_cost(150); // XXX
10750   opcode(0xF2, 0x0F, 0x58);
10751   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10752   ins_pipe(pipe_slow);
10753 %}
10754 
10755 instruct subF_reg(regF dst, regF src)
10756 %{
10757   match(Set dst (SubF dst src));
10758 
10759   format %{ "subss   $dst, $src" %}
10760   ins_cost(150); // XXX
10761   opcode(0xF3, 0x0F, 0x5C);
10762   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10763   ins_pipe(pipe_slow);
10764 %}
10765 
10766 instruct subF_mem(regF dst, memory src)
10767 %{
10768   match(Set dst (SubF dst (LoadF src)));
10769 
10770   format %{ "subss   $dst, $src" %}
10771   ins_cost(150); // XXX
10772   opcode(0xF3, 0x0F, 0x5C);
10773   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10774   ins_pipe(pipe_slow);
10775 %}
10776 
10777 instruct subF_imm(regF dst, immF src)
10778 %{
10779   match(Set dst (SubF dst src));
10780 
10781   format %{ "subss   $dst, [$src]" %}
10782   ins_cost(150); // XXX
10783   opcode(0xF3, 0x0F, 0x5C);
10784   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10785   ins_pipe(pipe_slow);
10786 %}
10787 
10788 instruct subD_reg(regD dst, regD src)
10789 %{
10790   match(Set dst (SubD dst src));
10791 
10792   format %{ "subsd   $dst, $src" %}
10793   ins_cost(150); // XXX
10794   opcode(0xF2, 0x0F, 0x5C);
10795   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10796   ins_pipe(pipe_slow);
10797 %}
10798 
10799 instruct subD_mem(regD dst, memory src)
10800 %{
10801   match(Set dst (SubD dst (LoadD src)));
10802 
10803   format %{ "subsd   $dst, $src" %}
10804   ins_cost(150); // XXX
10805   opcode(0xF2, 0x0F, 0x5C);
10806   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10807   ins_pipe(pipe_slow);
10808 %}
10809 
10810 instruct subD_imm(regD dst, immD src)
10811 %{
10812   match(Set dst (SubD dst src));
10813 
10814   format %{ "subsd   $dst, [$src]" %}
10815   ins_cost(150); // XXX
10816   opcode(0xF2, 0x0F, 0x5C);
10817   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10818   ins_pipe(pipe_slow);
10819 %}
10820 
10821 instruct mulF_reg(regF dst, regF src)
10822 %{
10823   match(Set dst (MulF dst src));
10824 
10825   format %{ "mulss   $dst, $src" %}
10826   ins_cost(150); // XXX
10827   opcode(0xF3, 0x0F, 0x59);
10828   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10829   ins_pipe(pipe_slow);
10830 %}
10831 
10832 instruct mulF_mem(regF dst, memory src)
10833 %{
10834   match(Set dst (MulF dst (LoadF src)));
10835 
10836   format %{ "mulss   $dst, $src" %}
10837   ins_cost(150); // XXX
10838   opcode(0xF3, 0x0F, 0x59);
10839   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10840   ins_pipe(pipe_slow);
10841 %}
10842 
10843 instruct mulF_imm(regF dst, immF src)
10844 %{
10845   match(Set dst (MulF dst src));
10846 
10847   format %{ "mulss   $dst, [$src]" %}
10848   ins_cost(150); // XXX
10849   opcode(0xF3, 0x0F, 0x59);
10850   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10851   ins_pipe(pipe_slow);
10852 %}
10853 
10854 instruct mulD_reg(regD dst, regD src)
10855 %{
10856   match(Set dst (MulD dst src));
10857 
10858   format %{ "mulsd   $dst, $src" %}
10859   ins_cost(150); // XXX
10860   opcode(0xF2, 0x0F, 0x59);
10861   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10862   ins_pipe(pipe_slow);
10863 %}
10864 
10865 instruct mulD_mem(regD dst, memory src)
10866 %{
10867   match(Set dst (MulD dst (LoadD src)));
10868 
10869   format %{ "mulsd   $dst, $src" %}
10870   ins_cost(150); // XXX
10871   opcode(0xF2, 0x0F, 0x59);
10872   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10873   ins_pipe(pipe_slow);
10874 %}
10875 
10876 instruct mulD_imm(regD dst, immD src)
10877 %{
10878   match(Set dst (MulD dst src));
10879 
10880   format %{ "mulsd   $dst, [$src]" %}
10881   ins_cost(150); // XXX
10882   opcode(0xF2, 0x0F, 0x59);
10883   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10884   ins_pipe(pipe_slow);
10885 %}
10886 
10887 instruct divF_reg(regF dst, regF src)
10888 %{
10889   match(Set dst (DivF dst src));
10890 
10891   format %{ "divss   $dst, $src" %}
10892   ins_cost(150); // XXX
10893   opcode(0xF3, 0x0F, 0x5E);
10894   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10895   ins_pipe(pipe_slow);
10896 %}
10897 
10898 instruct divF_mem(regF dst, memory src)
10899 %{
10900   match(Set dst (DivF dst (LoadF src)));
10901 
10902   format %{ "divss   $dst, $src" %}
10903   ins_cost(150); // XXX
10904   opcode(0xF3, 0x0F, 0x5E);
10905   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10906   ins_pipe(pipe_slow);
10907 %}
10908 
10909 instruct divF_imm(regF dst, immF src)
10910 %{
10911   match(Set dst (DivF dst src));
10912 
10913   format %{ "divss   $dst, [$src]" %}
10914   ins_cost(150); // XXX
10915   opcode(0xF3, 0x0F, 0x5E);
10916   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10917   ins_pipe(pipe_slow);
10918 %}
10919 
10920 instruct divD_reg(regD dst, regD src)
10921 %{
10922   match(Set dst (DivD dst src));
10923 
10924   format %{ "divsd   $dst, $src" %}
10925   ins_cost(150); // XXX
10926   opcode(0xF2, 0x0F, 0x5E);
10927   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10928   ins_pipe(pipe_slow);
10929 %}
10930 
10931 instruct divD_mem(regD dst, memory src)
10932 %{
10933   match(Set dst (DivD dst (LoadD src)));
10934 
10935   format %{ "divsd   $dst, $src" %}
10936   ins_cost(150); // XXX
10937   opcode(0xF2, 0x0F, 0x5E);
10938   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10939   ins_pipe(pipe_slow);
10940 %}
10941 
10942 instruct divD_imm(regD dst, immD src)
10943 %{
10944   match(Set dst (DivD dst src));
10945 
10946   format %{ "divsd   $dst, [$src]" %}
10947   ins_cost(150); // XXX
10948   opcode(0xF2, 0x0F, 0x5E);
10949   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10950   ins_pipe(pipe_slow);
10951 %}
10952 
10953 instruct sqrtF_reg(regF dst, regF src)
10954 %{
10955   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10956 
10957   format %{ "sqrtss  $dst, $src" %}
10958   ins_cost(150); // XXX
10959   opcode(0xF3, 0x0F, 0x51);
10960   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10961   ins_pipe(pipe_slow);
10962 %}
10963 
10964 instruct sqrtF_mem(regF dst, memory src)
10965 %{
10966   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10967 
10968   format %{ "sqrtss  $dst, $src" %}
10969   ins_cost(150); // XXX
10970   opcode(0xF3, 0x0F, 0x51);
10971   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10972   ins_pipe(pipe_slow);
10973 %}
10974 
10975 instruct sqrtF_imm(regF dst, immF src)
10976 %{
10977   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10978 
10979   format %{ "sqrtss  $dst, [$src]" %}
10980   ins_cost(150); // XXX
10981   opcode(0xF3, 0x0F, 0x51);
10982   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10983   ins_pipe(pipe_slow);
10984 %}
10985 
10986 instruct sqrtD_reg(regD dst, regD src)
10987 %{
10988   match(Set dst (SqrtD src));
10989 
10990   format %{ "sqrtsd  $dst, $src" %}
10991   ins_cost(150); // XXX
10992   opcode(0xF2, 0x0F, 0x51);
10993   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10994   ins_pipe(pipe_slow);
10995 %}
10996 
10997 instruct sqrtD_mem(regD dst, memory src)
10998 %{
10999   match(Set dst (SqrtD (LoadD src)));
11000 
11001   format %{ "sqrtsd  $dst, $src" %}
11002   ins_cost(150); // XXX
11003   opcode(0xF2, 0x0F, 0x51);
11004   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11005   ins_pipe(pipe_slow);
11006 %}
11007 
11008 instruct sqrtD_imm(regD dst, immD src)
11009 %{
11010   match(Set dst (SqrtD src));
11011 
11012   format %{ "sqrtsd  $dst, [$src]" %}
11013   ins_cost(150); // XXX
11014   opcode(0xF2, 0x0F, 0x51);
11015   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11016   ins_pipe(pipe_slow);
11017 %}
11018 
11019 instruct absF_reg(regF dst)
11020 %{
11021   match(Set dst (AbsF dst));
11022 
11023   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11024   ins_encode(absF_encoding(dst));
11025   ins_pipe(pipe_slow);
11026 %}
11027 
11028 instruct absD_reg(regD dst)
11029 %{
11030   match(Set dst (AbsD dst));
11031 
11032   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11033             "# abs double by sign masking" %}
11034   ins_encode(absD_encoding(dst));
11035   ins_pipe(pipe_slow);
11036 %}
11037 
11038 instruct negF_reg(regF dst)
11039 %{
11040   match(Set dst (NegF dst));
11041 
11042   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11043   ins_encode(negF_encoding(dst));
11044   ins_pipe(pipe_slow);
11045 %}
11046 
11047 instruct negD_reg(regD dst)
11048 %{
11049   match(Set dst (NegD dst));
11050 
11051   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11052             "# neg double by sign flipping" %}
11053   ins_encode(negD_encoding(dst));
11054   ins_pipe(pipe_slow);
11055 %}
11056 
11057 // -----------Trig and Trancendental Instructions------------------------------
11058 instruct cosD_reg(regD dst) %{
11059   match(Set dst (CosD dst));
11060 
11061   format %{ "dcos   $dst\n\t" %}
11062   opcode(0xD9, 0xFF);
11063   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11064   ins_pipe( pipe_slow );
11065 %}
11066 
11067 instruct sinD_reg(regD dst) %{
11068   match(Set dst (SinD dst));
11069 
11070   format %{ "dsin   $dst\n\t" %}
11071   opcode(0xD9, 0xFE);
11072   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11073   ins_pipe( pipe_slow );
11074 %}
11075 
11076 instruct tanD_reg(regD dst) %{
11077   match(Set dst (TanD dst));
11078 
11079   format %{ "dtan   $dst\n\t" %}
11080   ins_encode( Push_SrcXD(dst),
11081               Opcode(0xD9), Opcode(0xF2),   //fptan
11082               Opcode(0xDD), Opcode(0xD8),   //fstp st
11083               Push_ResultXD(dst) );
11084   ins_pipe( pipe_slow );
11085 %}
11086 
11087 instruct log10D_reg(regD dst) %{
11088   // The source and result Double operands in XMM registers
11089   match(Set dst (Log10D dst));
11090   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11091   // fyl2x        ; compute log_10(2) * log_2(x)
11092   format %{ "fldlg2\t\t\t#Log10\n\t"
11093             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11094          %}
11095    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11096               Push_SrcXD(dst),
11097               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11098               Push_ResultXD(dst));
11099 
11100   ins_pipe( pipe_slow );
11101 %}
11102 
11103 instruct logD_reg(regD dst) %{
11104   // The source and result Double operands in XMM registers
11105   match(Set dst (LogD dst));
11106   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11107   // fyl2x        ; compute log_e(2) * log_2(x)
11108   format %{ "fldln2\t\t\t#Log_e\n\t"
11109             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11110          %}
11111   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11112               Push_SrcXD(dst),
11113               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11114               Push_ResultXD(dst));
11115   ins_pipe( pipe_slow );
11116 %}
11117 
11118 
11119 
11120 //----------Arithmetic Conversion Instructions---------------------------------
11121 
11122 instruct roundFloat_nop(regF dst)
11123 %{
11124   match(Set dst (RoundFloat dst));
11125 
11126   ins_cost(0);
11127   ins_encode();
11128   ins_pipe(empty);
11129 %}
11130 
11131 instruct roundDouble_nop(regD dst)
11132 %{
11133   match(Set dst (RoundDouble dst));
11134 
11135   ins_cost(0);
11136   ins_encode();
11137   ins_pipe(empty);
11138 %}
11139 
11140 instruct convF2D_reg_reg(regD dst, regF src)
11141 %{
11142   match(Set dst (ConvF2D src));
11143 
11144   format %{ "cvtss2sd $dst, $src" %}
11145   opcode(0xF3, 0x0F, 0x5A);
11146   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11147   ins_pipe(pipe_slow); // XXX
11148 %}
11149 
11150 instruct convF2D_reg_mem(regD dst, memory src)
11151 %{
11152   match(Set dst (ConvF2D (LoadF src)));
11153 
11154   format %{ "cvtss2sd $dst, $src" %}
11155   opcode(0xF3, 0x0F, 0x5A);
11156   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11157   ins_pipe(pipe_slow); // XXX
11158 %}
11159 
11160 instruct convD2F_reg_reg(regF dst, regD src)
11161 %{
11162   match(Set dst (ConvD2F src));
11163 
11164   format %{ "cvtsd2ss $dst, $src" %}
11165   opcode(0xF2, 0x0F, 0x5A);
11166   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11167   ins_pipe(pipe_slow); // XXX
11168 %}
11169 
11170 instruct convD2F_reg_mem(regF dst, memory src)
11171 %{
11172   match(Set dst (ConvD2F (LoadD src)));
11173 
11174   format %{ "cvtsd2ss $dst, $src" %}
11175   opcode(0xF2, 0x0F, 0x5A);
11176   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11177   ins_pipe(pipe_slow); // XXX
11178 %}
11179 
11180 // XXX do mem variants
11181 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11182 %{
11183   match(Set dst (ConvF2I src));
11184   effect(KILL cr);
11185 
11186   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11187             "cmpl    $dst, #0x80000000\n\t"
11188             "jne,s   done\n\t"
11189             "subq    rsp, #8\n\t"
11190             "movss   [rsp], $src\n\t"
11191             "call    f2i_fixup\n\t"
11192             "popq    $dst\n"
11193     "done:   "%}
11194   opcode(0xF3, 0x0F, 0x2C);
11195   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11196              f2i_fixup(dst, src));
11197   ins_pipe(pipe_slow);
11198 %}
11199 
11200 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11201 %{
11202   match(Set dst (ConvF2L src));
11203   effect(KILL cr);
11204 
11205   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11206             "cmpq    $dst, [0x8000000000000000]\n\t"
11207             "jne,s   done\n\t"
11208             "subq    rsp, #8\n\t"
11209             "movss   [rsp], $src\n\t"
11210             "call    f2l_fixup\n\t"
11211             "popq    $dst\n"
11212     "done:   "%}
11213   opcode(0xF3, 0x0F, 0x2C);
11214   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11215              f2l_fixup(dst, src));
11216   ins_pipe(pipe_slow);
11217 %}
11218 
11219 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11220 %{
11221   match(Set dst (ConvD2I src));
11222   effect(KILL cr);
11223 
11224   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11225             "cmpl    $dst, #0x80000000\n\t"
11226             "jne,s   done\n\t"
11227             "subq    rsp, #8\n\t"
11228             "movsd   [rsp], $src\n\t"
11229             "call    d2i_fixup\n\t"
11230             "popq    $dst\n"
11231     "done:   "%}
11232   opcode(0xF2, 0x0F, 0x2C);
11233   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11234              d2i_fixup(dst, src));
11235   ins_pipe(pipe_slow);
11236 %}
11237 
11238 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11239 %{
11240   match(Set dst (ConvD2L src));
11241   effect(KILL cr);
11242 
11243   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11244             "cmpq    $dst, [0x8000000000000000]\n\t"
11245             "jne,s   done\n\t"
11246             "subq    rsp, #8\n\t"
11247             "movsd   [rsp], $src\n\t"
11248             "call    d2l_fixup\n\t"
11249             "popq    $dst\n"
11250     "done:   "%}
11251   opcode(0xF2, 0x0F, 0x2C);
11252   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11253              d2l_fixup(dst, src));
11254   ins_pipe(pipe_slow);
11255 %}
11256 
11257 instruct convI2F_reg_reg(regF dst, rRegI src)
11258 %{
11259   predicate(!UseXmmI2F);
11260   match(Set dst (ConvI2F src));
11261 
11262   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11263   opcode(0xF3, 0x0F, 0x2A);
11264   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11265   ins_pipe(pipe_slow); // XXX
11266 %}
11267 
11268 instruct convI2F_reg_mem(regF dst, memory src)
11269 %{
11270   match(Set dst (ConvI2F (LoadI src)));
11271 
11272   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11273   opcode(0xF3, 0x0F, 0x2A);
11274   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11275   ins_pipe(pipe_slow); // XXX
11276 %}
11277 
11278 instruct convI2D_reg_reg(regD dst, rRegI src)
11279 %{
11280   predicate(!UseXmmI2D);
11281   match(Set dst (ConvI2D src));
11282 
11283   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11284   opcode(0xF2, 0x0F, 0x2A);
11285   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11286   ins_pipe(pipe_slow); // XXX
11287 %}
11288 
11289 instruct convI2D_reg_mem(regD dst, memory src)
11290 %{
11291   match(Set dst (ConvI2D (LoadI src)));
11292 
11293   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11294   opcode(0xF2, 0x0F, 0x2A);
11295   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11296   ins_pipe(pipe_slow); // XXX
11297 %}
11298 
11299 instruct convXI2F_reg(regF dst, rRegI src)
11300 %{
11301   predicate(UseXmmI2F);
11302   match(Set dst (ConvI2F src));
11303 
11304   format %{ "movdl $dst, $src\n\t"
11305             "cvtdq2psl $dst, $dst\t# i2f" %}
11306   ins_encode %{
11307     __ movdl($dst$$XMMRegister, $src$$Register);
11308     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11309   %}
11310   ins_pipe(pipe_slow); // XXX
11311 %}
11312 
11313 instruct convXI2D_reg(regD dst, rRegI src)
11314 %{
11315   predicate(UseXmmI2D);
11316   match(Set dst (ConvI2D src));
11317 
11318   format %{ "movdl $dst, $src\n\t"
11319             "cvtdq2pdl $dst, $dst\t# i2d" %}
11320   ins_encode %{
11321     __ movdl($dst$$XMMRegister, $src$$Register);
11322     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11323   %}
11324   ins_pipe(pipe_slow); // XXX
11325 %}
11326 
11327 instruct convL2F_reg_reg(regF dst, rRegL src)
11328 %{
11329   match(Set dst (ConvL2F src));
11330 
11331   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11332   opcode(0xF3, 0x0F, 0x2A);
11333   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11334   ins_pipe(pipe_slow); // XXX
11335 %}
11336 
11337 instruct convL2F_reg_mem(regF dst, memory src)
11338 %{
11339   match(Set dst (ConvL2F (LoadL src)));
11340 
11341   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11342   opcode(0xF3, 0x0F, 0x2A);
11343   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11344   ins_pipe(pipe_slow); // XXX
11345 %}
11346 
11347 instruct convL2D_reg_reg(regD dst, rRegL src)
11348 %{
11349   match(Set dst (ConvL2D src));
11350 
11351   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11352   opcode(0xF2, 0x0F, 0x2A);
11353   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11354   ins_pipe(pipe_slow); // XXX
11355 %}
11356 
11357 instruct convL2D_reg_mem(regD dst, memory src)
11358 %{
11359   match(Set dst (ConvL2D (LoadL src)));
11360 
11361   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11362   opcode(0xF2, 0x0F, 0x2A);
11363   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11364   ins_pipe(pipe_slow); // XXX
11365 %}
11366 
11367 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11368 %{
11369   match(Set dst (ConvI2L src));
11370 
11371   ins_cost(125);
11372   format %{ "movslq  $dst, $src\t# i2l" %}
11373   ins_encode %{
11374     __ movslq($dst$$Register, $src$$Register);
11375   %}
11376   ins_pipe(ialu_reg_reg);
11377 %}
11378 
11379 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11380 // %{
11381 //   match(Set dst (ConvI2L src));
11382 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11383 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11384 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11385 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11386 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11387 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11388 
11389 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11390 //   ins_encode(enc_copy(dst, src));
11391 // //   opcode(0x63); // needs REX.W
11392 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11393 //   ins_pipe(ialu_reg_reg);
11394 // %}
11395 
11396 // Zero-extend convert int to long
11397 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11398 %{
11399   match(Set dst (AndL (ConvI2L src) mask));
11400 
11401   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11402   ins_encode(enc_copy(dst, src));
11403   ins_pipe(ialu_reg_reg);
11404 %}
11405 
11406 // Zero-extend convert int to long
11407 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11408 %{
11409   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11410 
11411   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11412   opcode(0x8B);
11413   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11414   ins_pipe(ialu_reg_mem);
11415 %}
11416 
11417 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11418 %{
11419   match(Set dst (AndL src mask));
11420 
11421   format %{ "movl    $dst, $src\t# zero-extend long" %}
11422   ins_encode(enc_copy_always(dst, src));
11423   ins_pipe(ialu_reg_reg);
11424 %}
11425 
11426 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11427 %{
11428   match(Set dst (ConvL2I src));
11429 
11430   format %{ "movl    $dst, $src\t# l2i" %}
11431   ins_encode(enc_copy_always(dst, src));
11432   ins_pipe(ialu_reg_reg);
11433 %}
11434 
11435 
11436 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11437   match(Set dst (MoveF2I src));
11438   effect(DEF dst, USE src);
11439 
11440   ins_cost(125);
11441   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11442   opcode(0x8B);
11443   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11444   ins_pipe(ialu_reg_mem);
11445 %}
11446 
11447 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11448   match(Set dst (MoveI2F src));
11449   effect(DEF dst, USE src);
11450 
11451   ins_cost(125);
11452   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11453   opcode(0xF3, 0x0F, 0x10);
11454   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11455   ins_pipe(pipe_slow);
11456 %}
11457 
11458 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11459   match(Set dst (MoveD2L src));
11460   effect(DEF dst, USE src);
11461 
11462   ins_cost(125);
11463   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11464   opcode(0x8B);
11465   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11466   ins_pipe(ialu_reg_mem);
11467 %}
11468 
11469 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11470   predicate(!UseXmmLoadAndClearUpper);
11471   match(Set dst (MoveL2D src));
11472   effect(DEF dst, USE src);
11473 
11474   ins_cost(125);
11475   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11476   opcode(0x66, 0x0F, 0x12);
11477   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11478   ins_pipe(pipe_slow);
11479 %}
11480 
11481 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11482   predicate(UseXmmLoadAndClearUpper);
11483   match(Set dst (MoveL2D src));
11484   effect(DEF dst, USE src);
11485 
11486   ins_cost(125);
11487   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11488   opcode(0xF2, 0x0F, 0x10);
11489   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11490   ins_pipe(pipe_slow);
11491 %}
11492 
11493 
11494 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11495   match(Set dst (MoveF2I src));
11496   effect(DEF dst, USE src);
11497 
11498   ins_cost(95); // XXX
11499   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11500   opcode(0xF3, 0x0F, 0x11);
11501   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11502   ins_pipe(pipe_slow);
11503 %}
11504 
11505 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11506   match(Set dst (MoveI2F src));
11507   effect(DEF dst, USE src);
11508 
11509   ins_cost(100);
11510   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11511   opcode(0x89);
11512   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11513   ins_pipe( ialu_mem_reg );
11514 %}
11515 
11516 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11517   match(Set dst (MoveD2L src));
11518   effect(DEF dst, USE src);
11519 
11520   ins_cost(95); // XXX
11521   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11522   opcode(0xF2, 0x0F, 0x11);
11523   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11524   ins_pipe(pipe_slow);
11525 %}
11526 
11527 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11528   match(Set dst (MoveL2D src));
11529   effect(DEF dst, USE src);
11530 
11531   ins_cost(100);
11532   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11533   opcode(0x89);
11534   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11535   ins_pipe(ialu_mem_reg);
11536 %}
11537 
11538 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11539   match(Set dst (MoveF2I src));
11540   effect(DEF dst, USE src);
11541   ins_cost(85);
11542   format %{ "movd    $dst,$src\t# MoveF2I" %}
11543   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11544   ins_pipe( pipe_slow );
11545 %}
11546 
11547 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11548   match(Set dst (MoveD2L src));
11549   effect(DEF dst, USE src);
11550   ins_cost(85);
11551   format %{ "movd    $dst,$src\t# MoveD2L" %}
11552   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11553   ins_pipe( pipe_slow );
11554 %}
11555 
11556 // The next instructions have long latency and use Int unit. Set high cost.
11557 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11558   match(Set dst (MoveI2F src));
11559   effect(DEF dst, USE src);
11560   ins_cost(300);
11561   format %{ "movd    $dst,$src\t# MoveI2F" %}
11562   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11563   ins_pipe( pipe_slow );
11564 %}
11565 
11566 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11567   match(Set dst (MoveL2D src));
11568   effect(DEF dst, USE src);
11569   ins_cost(300);
11570   format %{ "movd    $dst,$src\t# MoveL2D" %}
11571   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11572   ins_pipe( pipe_slow );
11573 %}
11574 
11575 // Replicate scalar to packed byte (1 byte) values in xmm
11576 instruct Repl8B_reg(regD dst, regD src) %{
11577   match(Set dst (Replicate8B src));
11578   format %{ "MOVDQA  $dst,$src\n\t"
11579             "PUNPCKLBW $dst,$dst\n\t"
11580             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11581   ins_encode( pshufd_8x8(dst, src));
11582   ins_pipe( pipe_slow );
11583 %}
11584 
11585 // Replicate scalar to packed byte (1 byte) values in xmm
11586 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11587   match(Set dst (Replicate8B src));
11588   format %{ "MOVD    $dst,$src\n\t"
11589             "PUNPCKLBW $dst,$dst\n\t"
11590             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11591   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11592   ins_pipe( pipe_slow );
11593 %}
11594 
11595 // Replicate scalar zero to packed byte (1 byte) values in xmm
11596 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11597   match(Set dst (Replicate8B zero));
11598   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11599   ins_encode( pxor(dst, dst));
11600   ins_pipe( fpu_reg_reg );
11601 %}
11602 
11603 // Replicate scalar to packed shore (2 byte) values in xmm
11604 instruct Repl4S_reg(regD dst, regD src) %{
11605   match(Set dst (Replicate4S src));
11606   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11607   ins_encode( pshufd_4x16(dst, src));
11608   ins_pipe( fpu_reg_reg );
11609 %}
11610 
11611 // Replicate scalar to packed shore (2 byte) values in xmm
11612 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11613   match(Set dst (Replicate4S src));
11614   format %{ "MOVD    $dst,$src\n\t"
11615             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11616   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11617   ins_pipe( fpu_reg_reg );
11618 %}
11619 
11620 // Replicate scalar zero to packed short (2 byte) values in xmm
11621 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11622   match(Set dst (Replicate4S zero));
11623   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11624   ins_encode( pxor(dst, dst));
11625   ins_pipe( fpu_reg_reg );
11626 %}
11627 
11628 // Replicate scalar to packed char (2 byte) values in xmm
11629 instruct Repl4C_reg(regD dst, regD src) %{
11630   match(Set dst (Replicate4C src));
11631   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11632   ins_encode( pshufd_4x16(dst, src));
11633   ins_pipe( fpu_reg_reg );
11634 %}
11635 
11636 // Replicate scalar to packed char (2 byte) values in xmm
11637 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11638   match(Set dst (Replicate4C src));
11639   format %{ "MOVD    $dst,$src\n\t"
11640             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11641   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11642   ins_pipe( fpu_reg_reg );
11643 %}
11644 
11645 // Replicate scalar zero to packed char (2 byte) values in xmm
11646 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11647   match(Set dst (Replicate4C zero));
11648   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11649   ins_encode( pxor(dst, dst));
11650   ins_pipe( fpu_reg_reg );
11651 %}
11652 
11653 // Replicate scalar to packed integer (4 byte) values in xmm
11654 instruct Repl2I_reg(regD dst, regD src) %{
11655   match(Set dst (Replicate2I src));
11656   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11657   ins_encode( pshufd(dst, src, 0x00));
11658   ins_pipe( fpu_reg_reg );
11659 %}
11660 
11661 // Replicate scalar to packed integer (4 byte) values in xmm
11662 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11663   match(Set dst (Replicate2I src));
11664   format %{ "MOVD   $dst,$src\n\t"
11665             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11666   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11667   ins_pipe( fpu_reg_reg );
11668 %}
11669 
11670 // Replicate scalar zero to packed integer (2 byte) values in xmm
11671 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11672   match(Set dst (Replicate2I zero));
11673   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11674   ins_encode( pxor(dst, dst));
11675   ins_pipe( fpu_reg_reg );
11676 %}
11677 
11678 // Replicate scalar to packed single precision floating point values in xmm
11679 instruct Repl2F_reg(regD dst, regD src) %{
11680   match(Set dst (Replicate2F src));
11681   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11682   ins_encode( pshufd(dst, src, 0xe0));
11683   ins_pipe( fpu_reg_reg );
11684 %}
11685 
11686 // Replicate scalar to packed single precision floating point values in xmm
11687 instruct Repl2F_regF(regD dst, regF src) %{
11688   match(Set dst (Replicate2F src));
11689   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11690   ins_encode( pshufd(dst, src, 0xe0));
11691   ins_pipe( fpu_reg_reg );
11692 %}
11693 
11694 // Replicate scalar to packed single precision floating point values in xmm
11695 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11696   match(Set dst (Replicate2F zero));
11697   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11698   ins_encode( pxor(dst, dst));
11699   ins_pipe( fpu_reg_reg );
11700 %}
11701 
11702 
11703 // =======================================================================
11704 // fast clearing of an array
11705 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11706                   rFlagsReg cr)
11707 %{
11708   match(Set dummy (ClearArray cnt base));
11709   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11710 
11711   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11712             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11713   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11714              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11715   ins_pipe(pipe_slow);
11716 %}
11717 
11718 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11719                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11720 %{
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11725   ins_encode %{
11726     __ string_compare($str1$$Register, $str2$$Register,
11727                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11734                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11735 %{
11736   predicate(UseSSE42Intrinsics);
11737   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11738   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11739 
11740   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11741   ins_encode %{
11742     __ string_indexof($str1$$Register, $str2$$Register,
11743                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11744                       $tmp1$$XMMRegister, $tmp2$$Register);
11745   %}
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 // fast string equals
11750 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11751                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11752 %{
11753   match(Set result (StrEquals (Binary str1 str2) cnt));
11754   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11755 
11756   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11757   ins_encode %{
11758     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11759                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11760                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11761   %}
11762   ins_pipe( pipe_slow );
11763 %}
11764 
11765 // fast array equals
11766 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11767                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11768 %{
11769   match(Set result (AryEq ary1 ary2));
11770   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11771   //ins_cost(300);
11772 
11773   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11774   ins_encode %{
11775     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11776                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11777                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11778   %}
11779   ins_pipe( pipe_slow );
11780 %}
11781 
11782 //----------Control Flow Instructions------------------------------------------
11783 // Signed compare Instructions
11784 
11785 // XXX more variants!!
11786 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11787 %{
11788   match(Set cr (CmpI op1 op2));
11789   effect(DEF cr, USE op1, USE op2);
11790 
11791   format %{ "cmpl    $op1, $op2" %}
11792   opcode(0x3B);  /* Opcode 3B /r */
11793   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11794   ins_pipe(ialu_cr_reg_reg);
11795 %}
11796 
11797 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11798 %{
11799   match(Set cr (CmpI op1 op2));
11800 
11801   format %{ "cmpl    $op1, $op2" %}
11802   opcode(0x81, 0x07); /* Opcode 81 /7 */
11803   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11804   ins_pipe(ialu_cr_reg_imm);
11805 %}
11806 
11807 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11808 %{
11809   match(Set cr (CmpI op1 (LoadI op2)));
11810 
11811   ins_cost(500); // XXX
11812   format %{ "cmpl    $op1, $op2" %}
11813   opcode(0x3B); /* Opcode 3B /r */
11814   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11815   ins_pipe(ialu_cr_reg_mem);
11816 %}
11817 
11818 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11819 %{
11820   match(Set cr (CmpI src zero));
11821 
11822   format %{ "testl   $src, $src" %}
11823   opcode(0x85);
11824   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11825   ins_pipe(ialu_cr_reg_imm);
11826 %}
11827 
11828 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11829 %{
11830   match(Set cr (CmpI (AndI src con) zero));
11831 
11832   format %{ "testl   $src, $con" %}
11833   opcode(0xF7, 0x00);
11834   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11835   ins_pipe(ialu_cr_reg_imm);
11836 %}
11837 
11838 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11839 %{
11840   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11841 
11842   format %{ "testl   $src, $mem" %}
11843   opcode(0x85);
11844   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11845   ins_pipe(ialu_cr_reg_mem);
11846 %}
11847 
11848 // Unsigned compare Instructions; really, same as signed except they
11849 // produce an rFlagsRegU instead of rFlagsReg.
11850 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11851 %{
11852   match(Set cr (CmpU op1 op2));
11853 
11854   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11855   opcode(0x3B); /* Opcode 3B /r */
11856   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11857   ins_pipe(ialu_cr_reg_reg);
11858 %}
11859 
11860 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11861 %{
11862   match(Set cr (CmpU op1 op2));
11863 
11864   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11865   opcode(0x81,0x07); /* Opcode 81 /7 */
11866   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11867   ins_pipe(ialu_cr_reg_imm);
11868 %}
11869 
11870 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11871 %{
11872   match(Set cr (CmpU op1 (LoadI op2)));
11873 
11874   ins_cost(500); // XXX
11875   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11876   opcode(0x3B); /* Opcode 3B /r */
11877   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11878   ins_pipe(ialu_cr_reg_mem);
11879 %}
11880 
11881 // // // Cisc-spilled version of cmpU_rReg
11882 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11883 // //%{
11884 // //  match(Set cr (CmpU (LoadI op1) op2));
11885 // //
11886 // //  format %{ "CMPu   $op1,$op2" %}
11887 // //  ins_cost(500);
11888 // //  opcode(0x39);  /* Opcode 39 /r */
11889 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11890 // //%}
11891 
11892 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11893 %{
11894   match(Set cr (CmpU src zero));
11895 
11896   format %{ "testl  $src, $src\t# unsigned" %}
11897   opcode(0x85);
11898   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11899   ins_pipe(ialu_cr_reg_imm);
11900 %}
11901 
11902 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11903 %{
11904   match(Set cr (CmpP op1 op2));
11905 
11906   format %{ "cmpq    $op1, $op2\t# ptr" %}
11907   opcode(0x3B); /* Opcode 3B /r */
11908   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11909   ins_pipe(ialu_cr_reg_reg);
11910 %}
11911 
11912 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11913 %{
11914   match(Set cr (CmpP op1 (LoadP op2)));
11915 
11916   ins_cost(500); // XXX
11917   format %{ "cmpq    $op1, $op2\t# ptr" %}
11918   opcode(0x3B); /* Opcode 3B /r */
11919   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11920   ins_pipe(ialu_cr_reg_mem);
11921 %}
11922 
11923 // // // Cisc-spilled version of cmpP_rReg
11924 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11925 // //%{
11926 // //  match(Set cr (CmpP (LoadP op1) op2));
11927 // //
11928 // //  format %{ "CMPu   $op1,$op2" %}
11929 // //  ins_cost(500);
11930 // //  opcode(0x39);  /* Opcode 39 /r */
11931 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11932 // //%}
11933 
11934 // XXX this is generalized by compP_rReg_mem???
11935 // Compare raw pointer (used in out-of-heap check).
11936 // Only works because non-oop pointers must be raw pointers
11937 // and raw pointers have no anti-dependencies.
11938 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11939 %{
11940   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11941   match(Set cr (CmpP op1 (LoadP op2)));
11942 
11943   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11944   opcode(0x3B); /* Opcode 3B /r */
11945   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11946   ins_pipe(ialu_cr_reg_mem);
11947 %}
11948 
11949 // This will generate a signed flags result. This should be OK since
11950 // any compare to a zero should be eq/neq.
11951 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11952 %{
11953   match(Set cr (CmpP src zero));
11954 
11955   format %{ "testq   $src, $src\t# ptr" %}
11956   opcode(0x85);
11957   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11958   ins_pipe(ialu_cr_reg_imm);
11959 %}
11960 
11961 // This will generate a signed flags result. This should be OK since
11962 // any compare to a zero should be eq/neq.
11963 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11964 %{
11965   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11966   match(Set cr (CmpP (LoadP op) zero));
11967 
11968   ins_cost(500); // XXX
11969   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11970   opcode(0xF7); /* Opcode F7 /0 */
11971   ins_encode(REX_mem_wide(op),
11972              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11973   ins_pipe(ialu_cr_reg_imm);
11974 %}
11975 
11976 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11977 %{
11978   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11979   match(Set cr (CmpP (LoadP mem) zero));
11980 
11981   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11982   ins_encode %{
11983     __ cmpq(r12, $mem$$Address);
11984   %}
11985   ins_pipe(ialu_cr_reg_mem);
11986 %}
11987 
11988 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11989 %{
11990   match(Set cr (CmpN op1 op2));
11991 
11992   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11993   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11994   ins_pipe(ialu_cr_reg_reg);
11995 %}
11996 
11997 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11998 %{
11999   match(Set cr (CmpN src (LoadN mem)));
12000 
12001   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12002   ins_encode %{
12003     __ cmpl($src$$Register, $mem$$Address);
12004   %}
12005   ins_pipe(ialu_cr_reg_mem);
12006 %}
12007 
12008 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12009   match(Set cr (CmpN op1 op2));
12010 
12011   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12012   ins_encode %{
12013     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12014   %}
12015   ins_pipe(ialu_cr_reg_imm);
12016 %}
12017 
12018 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12019 %{
12020   match(Set cr (CmpN src (LoadN mem)));
12021 
12022   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12023   ins_encode %{
12024     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12025   %}
12026   ins_pipe(ialu_cr_reg_mem);
12027 %}
12028 
12029 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12030   match(Set cr (CmpN src zero));
12031 
12032   format %{ "testl   $src, $src\t# compressed ptr" %}
12033   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12034   ins_pipe(ialu_cr_reg_imm);
12035 %}
12036 
12037 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12038 %{
12039   predicate(Universe::narrow_oop_base() != NULL);
12040   match(Set cr (CmpN (LoadN mem) zero));
12041 
12042   ins_cost(500); // XXX
12043   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12044   ins_encode %{
12045     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12046   %}
12047   ins_pipe(ialu_cr_reg_mem);
12048 %}
12049 
12050 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12051 %{
12052   predicate(Universe::narrow_oop_base() == NULL);
12053   match(Set cr (CmpN (LoadN mem) zero));
12054 
12055   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12056   ins_encode %{
12057     __ cmpl(r12, $mem$$Address);
12058   %}
12059   ins_pipe(ialu_cr_reg_mem);
12060 %}
12061 
12062 // Yanked all unsigned pointer compare operations.
12063 // Pointer compares are done with CmpP which is already unsigned.
12064 
12065 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12066 %{
12067   match(Set cr (CmpL op1 op2));
12068 
12069   format %{ "cmpq    $op1, $op2" %}
12070   opcode(0x3B);  /* Opcode 3B /r */
12071   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12072   ins_pipe(ialu_cr_reg_reg);
12073 %}
12074 
12075 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12076 %{
12077   match(Set cr (CmpL op1 op2));
12078 
12079   format %{ "cmpq    $op1, $op2" %}
12080   opcode(0x81, 0x07); /* Opcode 81 /7 */
12081   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12082   ins_pipe(ialu_cr_reg_imm);
12083 %}
12084 
12085 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12086 %{
12087   match(Set cr (CmpL op1 (LoadL op2)));
12088 
12089   format %{ "cmpq    $op1, $op2" %}
12090   opcode(0x3B); /* Opcode 3B /r */
12091   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12092   ins_pipe(ialu_cr_reg_mem);
12093 %}
12094 
12095 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12096 %{
12097   match(Set cr (CmpL src zero));
12098 
12099   format %{ "testq   $src, $src" %}
12100   opcode(0x85);
12101   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12102   ins_pipe(ialu_cr_reg_imm);
12103 %}
12104 
12105 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12106 %{
12107   match(Set cr (CmpL (AndL src con) zero));
12108 
12109   format %{ "testq   $src, $con\t# long" %}
12110   opcode(0xF7, 0x00);
12111   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12112   ins_pipe(ialu_cr_reg_imm);
12113 %}
12114 
12115 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12116 %{
12117   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12118 
12119   format %{ "testq   $src, $mem" %}
12120   opcode(0x85);
12121   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12122   ins_pipe(ialu_cr_reg_mem);
12123 %}
12124 
12125 // Manifest a CmpL result in an integer register.  Very painful.
12126 // This is the test to avoid.
12127 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12128 %{
12129   match(Set dst (CmpL3 src1 src2));
12130   effect(KILL flags);
12131 
12132   ins_cost(275); // XXX
12133   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12134             "movl    $dst, -1\n\t"
12135             "jl,s    done\n\t"
12136             "setne   $dst\n\t"
12137             "movzbl  $dst, $dst\n\t"
12138     "done:" %}
12139   ins_encode(cmpl3_flag(src1, src2, dst));
12140   ins_pipe(pipe_slow);
12141 %}
12142 
12143 //----------Max and Min--------------------------------------------------------
12144 // Min Instructions
12145 
12146 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12147 %{
12148   effect(USE_DEF dst, USE src, USE cr);
12149 
12150   format %{ "cmovlgt $dst, $src\t# min" %}
12151   opcode(0x0F, 0x4F);
12152   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12153   ins_pipe(pipe_cmov_reg);
12154 %}
12155 
12156 
12157 instruct minI_rReg(rRegI dst, rRegI src)
12158 %{
12159   match(Set dst (MinI dst src));
12160 
12161   ins_cost(200);
12162   expand %{
12163     rFlagsReg cr;
12164     compI_rReg(cr, dst, src);
12165     cmovI_reg_g(dst, src, cr);
12166   %}
12167 %}
12168 
12169 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12170 %{
12171   effect(USE_DEF dst, USE src, USE cr);
12172 
12173   format %{ "cmovllt $dst, $src\t# max" %}
12174   opcode(0x0F, 0x4C);
12175   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12176   ins_pipe(pipe_cmov_reg);
12177 %}
12178 
12179 
12180 instruct maxI_rReg(rRegI dst, rRegI src)
12181 %{
12182   match(Set dst (MaxI dst src));
12183 
12184   ins_cost(200);
12185   expand %{
12186     rFlagsReg cr;
12187     compI_rReg(cr, dst, src);
12188     cmovI_reg_l(dst, src, cr);
12189   %}
12190 %}
12191 
12192 // ============================================================================
12193 // Branch Instructions
12194 
12195 // Jump Direct - Label defines a relative address from JMP+1
12196 instruct jmpDir(label labl)
12197 %{
12198   match(Goto);
12199   effect(USE labl);
12200 
12201   ins_cost(300);
12202   format %{ "jmp     $labl" %}
12203   size(5);
12204   opcode(0xE9);
12205   ins_encode(OpcP, Lbl(labl));
12206   ins_pipe(pipe_jmp);
12207   ins_pc_relative(1);
12208 %}
12209 
12210 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12211 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12212 %{
12213   match(If cop cr);
12214   effect(USE labl);
12215 
12216   ins_cost(300);
12217   format %{ "j$cop     $labl" %}
12218   size(6);
12219   opcode(0x0F, 0x80);
12220   ins_encode(Jcc(cop, labl));
12221   ins_pipe(pipe_jcc);
12222   ins_pc_relative(1);
12223 %}
12224 
12225 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12226 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12227 %{
12228   match(CountedLoopEnd cop cr);
12229   effect(USE labl);
12230 
12231   ins_cost(300);
12232   format %{ "j$cop     $labl\t# loop end" %}
12233   size(6);
12234   opcode(0x0F, 0x80);
12235   ins_encode(Jcc(cop, labl));
12236   ins_pipe(pipe_jcc);
12237   ins_pc_relative(1);
12238 %}
12239 
12240 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12241 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12242   match(CountedLoopEnd cop cmp);
12243   effect(USE labl);
12244 
12245   ins_cost(300);
12246   format %{ "j$cop,u   $labl\t# loop end" %}
12247   size(6);
12248   opcode(0x0F, 0x80);
12249   ins_encode(Jcc(cop, labl));
12250   ins_pipe(pipe_jcc);
12251   ins_pc_relative(1);
12252 %}
12253 
12254 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12255   match(CountedLoopEnd cop cmp);
12256   effect(USE labl);
12257 
12258   ins_cost(200);
12259   format %{ "j$cop,u   $labl\t# loop end" %}
12260   size(6);
12261   opcode(0x0F, 0x80);
12262   ins_encode(Jcc(cop, labl));
12263   ins_pipe(pipe_jcc);
12264   ins_pc_relative(1);
12265 %}
12266 
12267 // Jump Direct Conditional - using unsigned comparison
12268 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12269   match(If cop cmp);
12270   effect(USE labl);
12271 
12272   ins_cost(300);
12273   format %{ "j$cop,u  $labl" %}
12274   size(6);
12275   opcode(0x0F, 0x80);
12276   ins_encode(Jcc(cop, labl));
12277   ins_pipe(pipe_jcc);
12278   ins_pc_relative(1);
12279 %}
12280 
12281 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12282   match(If cop cmp);
12283   effect(USE labl);
12284 
12285   ins_cost(200);
12286   format %{ "j$cop,u  $labl" %}
12287   size(6);
12288   opcode(0x0F, 0x80);
12289   ins_encode(Jcc(cop, labl));
12290   ins_pipe(pipe_jcc);
12291   ins_pc_relative(1);
12292 %}
12293 
12294 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12295   match(If cop cmp);
12296   effect(USE labl);
12297 
12298   ins_cost(200);
12299   format %{ $$template
12300     if ($cop$$cmpcode == Assembler::notEqual) {
12301       $$emit$$"jp,u   $labl\n\t"
12302       $$emit$$"j$cop,u   $labl"
12303     } else {
12304       $$emit$$"jp,u   done\n\t"
12305       $$emit$$"j$cop,u   $labl\n\t"
12306       $$emit$$"done:"
12307     }
12308   %}
12309   size(12);
12310   opcode(0x0F, 0x80);
12311   ins_encode %{
12312     Label* l = $labl$$label;
12313     $$$emit8$primary;
12314     emit_cc(cbuf, $secondary, Assembler::parity);
12315     int parity_disp = -1;
12316     if ($cop$$cmpcode == Assembler::notEqual) {
12317        // the two jumps 6 bytes apart so the jump distances are too
12318        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12319     } else if ($cop$$cmpcode == Assembler::equal) {
12320        parity_disp = 6;
12321     } else {
12322        ShouldNotReachHere();
12323     }
12324     emit_d32(cbuf, parity_disp);
12325     $$$emit8$primary;
12326     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12327     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12328     emit_d32(cbuf, disp);
12329   %}
12330   ins_pipe(pipe_jcc);
12331   ins_pc_relative(1);
12332 %}
12333 
12334 // ============================================================================
12335 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12336 // superklass array for an instance of the superklass.  Set a hidden
12337 // internal cache on a hit (cache is checked with exposed code in
12338 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12339 // encoding ALSO sets flags.
12340 
12341 instruct partialSubtypeCheck(rdi_RegP result,
12342                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12343                              rFlagsReg cr)
12344 %{
12345   match(Set result (PartialSubtypeCheck sub super));
12346   effect(KILL rcx, KILL cr);
12347 
12348   ins_cost(1100);  // slightly larger than the next version
12349   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12350             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12351             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12352             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12353             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12354             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12355             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12356     "miss:\t" %}
12357 
12358   opcode(0x1); // Force a XOR of RDI
12359   ins_encode(enc_PartialSubtypeCheck());
12360   ins_pipe(pipe_slow);
12361 %}
12362 
12363 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12364                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12365                                      immP0 zero,
12366                                      rdi_RegP result)
12367 %{
12368   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12369   effect(KILL rcx, KILL result);
12370 
12371   ins_cost(1000);
12372   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12373             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12374             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12375             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12376             "jne,s   miss\t\t# Missed: flags nz\n\t"
12377             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12378     "miss:\t" %}
12379 
12380   opcode(0x0); // No need to XOR RDI
12381   ins_encode(enc_PartialSubtypeCheck());
12382   ins_pipe(pipe_slow);
12383 %}
12384 
12385 // ============================================================================
12386 // Branch Instructions -- short offset versions
12387 //
12388 // These instructions are used to replace jumps of a long offset (the default
12389 // match) with jumps of a shorter offset.  These instructions are all tagged
12390 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12391 // match rules in general matching.  Instead, the ADLC generates a conversion
12392 // method in the MachNode which can be used to do in-place replacement of the
12393 // long variant with the shorter variant.  The compiler will determine if a
12394 // branch can be taken by the is_short_branch_offset() predicate in the machine
12395 // specific code section of the file.
12396 
12397 // Jump Direct - Label defines a relative address from JMP+1
12398 instruct jmpDir_short(label labl) %{
12399   match(Goto);
12400   effect(USE labl);
12401 
12402   ins_cost(300);
12403   format %{ "jmp,s   $labl" %}
12404   size(2);
12405   opcode(0xEB);
12406   ins_encode(OpcP, LblShort(labl));
12407   ins_pipe(pipe_jmp);
12408   ins_pc_relative(1);
12409   ins_short_branch(1);
12410 %}
12411 
12412 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12413 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12414   match(If cop cr);
12415   effect(USE labl);
12416 
12417   ins_cost(300);
12418   format %{ "j$cop,s   $labl" %}
12419   size(2);
12420   opcode(0x70);
12421   ins_encode(JccShort(cop, labl));
12422   ins_pipe(pipe_jcc);
12423   ins_pc_relative(1);
12424   ins_short_branch(1);
12425 %}
12426 
12427 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12428 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12429   match(CountedLoopEnd cop cr);
12430   effect(USE labl);
12431 
12432   ins_cost(300);
12433   format %{ "j$cop,s   $labl\t# loop end" %}
12434   size(2);
12435   opcode(0x70);
12436   ins_encode(JccShort(cop, labl));
12437   ins_pipe(pipe_jcc);
12438   ins_pc_relative(1);
12439   ins_short_branch(1);
12440 %}
12441 
12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12443 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12444   match(CountedLoopEnd cop cmp);
12445   effect(USE labl);
12446 
12447   ins_cost(300);
12448   format %{ "j$cop,us  $labl\t# loop end" %}
12449   size(2);
12450   opcode(0x70);
12451   ins_encode(JccShort(cop, labl));
12452   ins_pipe(pipe_jcc);
12453   ins_pc_relative(1);
12454   ins_short_branch(1);
12455 %}
12456 
12457 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12458   match(CountedLoopEnd cop cmp);
12459   effect(USE labl);
12460 
12461   ins_cost(300);
12462   format %{ "j$cop,us  $labl\t# loop end" %}
12463   size(2);
12464   opcode(0x70);
12465   ins_encode(JccShort(cop, labl));
12466   ins_pipe(pipe_jcc);
12467   ins_pc_relative(1);
12468   ins_short_branch(1);
12469 %}
12470 
12471 // Jump Direct Conditional - using unsigned comparison
12472 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12473   match(If cop cmp);
12474   effect(USE labl);
12475 
12476   ins_cost(300);
12477   format %{ "j$cop,us  $labl" %}
12478   size(2);
12479   opcode(0x70);
12480   ins_encode(JccShort(cop, labl));
12481   ins_pipe(pipe_jcc);
12482   ins_pc_relative(1);
12483   ins_short_branch(1);
12484 %}
12485 
12486 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12487   match(If cop cmp);
12488   effect(USE labl);
12489 
12490   ins_cost(300);
12491   format %{ "j$cop,us  $labl" %}
12492   size(2);
12493   opcode(0x70);
12494   ins_encode(JccShort(cop, labl));
12495   ins_pipe(pipe_jcc);
12496   ins_pc_relative(1);
12497   ins_short_branch(1);
12498 %}
12499 
12500 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12501   match(If cop cmp);
12502   effect(USE labl);
12503 
12504   ins_cost(300);
12505   format %{ $$template
12506     if ($cop$$cmpcode == Assembler::notEqual) {
12507       $$emit$$"jp,u,s   $labl\n\t"
12508       $$emit$$"j$cop,u,s   $labl"
12509     } else {
12510       $$emit$$"jp,u,s   done\n\t"
12511       $$emit$$"j$cop,u,s  $labl\n\t"
12512       $$emit$$"done:"
12513     }
12514   %}
12515   size(4);
12516   opcode(0x70);
12517   ins_encode %{
12518     Label* l = $labl$$label;
12519     emit_cc(cbuf, $primary, Assembler::parity);
12520     int parity_disp = -1;
12521     if ($cop$$cmpcode == Assembler::notEqual) {
12522       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12523     } else if ($cop$$cmpcode == Assembler::equal) {
12524       parity_disp = 2;
12525     } else {
12526       ShouldNotReachHere();
12527     }
12528     emit_d8(cbuf, parity_disp);
12529     emit_cc(cbuf, $primary, $cop$$cmpcode);
12530     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12531     emit_d8(cbuf, disp);
12532     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12533     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12534   %}
12535   ins_pipe(pipe_jcc);
12536   ins_pc_relative(1);
12537   ins_short_branch(1);
12538 %}
12539 
12540 // ============================================================================
12541 // inlined locking and unlocking
12542 
12543 instruct cmpFastLock(rFlagsReg cr,
12544                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12545 %{
12546   match(Set cr (FastLock object box));
12547   effect(TEMP tmp, TEMP scr);
12548 
12549   ins_cost(300);
12550   format %{ "fastlock $object,$box,$tmp,$scr" %}
12551   ins_encode(Fast_Lock(object, box, tmp, scr));
12552   ins_pipe(pipe_slow);
12553   ins_pc_relative(1);
12554 %}
12555 
12556 instruct cmpFastUnlock(rFlagsReg cr,
12557                        rRegP object, rax_RegP box, rRegP tmp)
12558 %{
12559   match(Set cr (FastUnlock object box));
12560   effect(TEMP tmp);
12561 
12562   ins_cost(300);
12563   format %{ "fastunlock $object, $box, $tmp" %}
12564   ins_encode(Fast_Unlock(object, box, tmp));
12565   ins_pipe(pipe_slow);
12566   ins_pc_relative(1);
12567 %}
12568 
12569 
12570 // ============================================================================
12571 // Safepoint Instructions
12572 instruct safePoint_poll(rFlagsReg cr)
12573 %{
12574   match(SafePoint);
12575   effect(KILL cr);
12576 
12577   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12578             "# Safepoint: poll for GC" %}
12579   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12580   ins_cost(125);
12581   ins_encode(enc_safepoint_poll);
12582   ins_pipe(ialu_reg_mem);
12583 %}
12584 
12585 // ============================================================================
12586 // Procedure Call/Return Instructions
12587 // Call Java Static Instruction
12588 // Note: If this code changes, the corresponding ret_addr_offset() and
12589 //       compute_padding() functions will have to be adjusted.
12590 instruct CallStaticJavaDirect(method meth) %{
12591   match(CallStaticJava);
12592   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12593   effect(USE meth);
12594 
12595   ins_cost(300);
12596   format %{ "call,static " %}
12597   opcode(0xE8); /* E8 cd */
12598   ins_encode(Java_Static_Call(meth), call_epilog);
12599   ins_pipe(pipe_slow);
12600   ins_pc_relative(1);
12601   ins_alignment(4);
12602 %}
12603 
12604 // Call Java Static Instruction (method handle version)
12605 // Note: If this code changes, the corresponding ret_addr_offset() and
12606 //       compute_padding() functions will have to be adjusted.
12607 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp) %{
12608   match(CallStaticJava);
12609   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12610   effect(USE meth);
12611   // RBP is saved by all callees (for interpreter stack correction).
12612   // We use it here for a similar purpose, in {preserve,restore}_SP.
12613 
12614   ins_cost(300);
12615   format %{ "call,static/MethodHandle " %}
12616   opcode(0xE8); /* E8 cd */
12617   ins_encode(preserve_SP,
12618              Java_Static_Call(meth),
12619              restore_SP,
12620              call_epilog);
12621   ins_pipe(pipe_slow);
12622   ins_pc_relative(1);
12623   ins_alignment(4);
12624 %}
12625 
12626 // Call Java Dynamic Instruction
12627 // Note: If this code changes, the corresponding ret_addr_offset() and
12628 //       compute_padding() functions will have to be adjusted.
12629 instruct CallDynamicJavaDirect(method meth)
12630 %{
12631   match(CallDynamicJava);
12632   effect(USE meth);
12633 
12634   ins_cost(300);
12635   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12636             "call,dynamic " %}
12637   opcode(0xE8); /* E8 cd */
12638   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12639   ins_pipe(pipe_slow);
12640   ins_pc_relative(1);
12641   ins_alignment(4);
12642 %}
12643 
12644 // Call Runtime Instruction
12645 instruct CallRuntimeDirect(method meth)
12646 %{
12647   match(CallRuntime);
12648   effect(USE meth);
12649 
12650   ins_cost(300);
12651   format %{ "call,runtime " %}
12652   opcode(0xE8); /* E8 cd */
12653   ins_encode(Java_To_Runtime(meth));
12654   ins_pipe(pipe_slow);
12655   ins_pc_relative(1);
12656 %}
12657 
12658 // Call runtime without safepoint
12659 instruct CallLeafDirect(method meth)
12660 %{
12661   match(CallLeaf);
12662   effect(USE meth);
12663 
12664   ins_cost(300);
12665   format %{ "call_leaf,runtime " %}
12666   opcode(0xE8); /* E8 cd */
12667   ins_encode(Java_To_Runtime(meth));
12668   ins_pipe(pipe_slow);
12669   ins_pc_relative(1);
12670 %}
12671 
12672 // Call runtime without safepoint
12673 instruct CallLeafNoFPDirect(method meth)
12674 %{
12675   match(CallLeafNoFP);
12676   effect(USE meth);
12677 
12678   ins_cost(300);
12679   format %{ "call_leaf_nofp,runtime " %}
12680   opcode(0xE8); /* E8 cd */
12681   ins_encode(Java_To_Runtime(meth));
12682   ins_pipe(pipe_slow);
12683   ins_pc_relative(1);
12684 %}
12685 
12686 // Return Instruction
12687 // Remove the return address & jump to it.
12688 // Notice: We always emit a nop after a ret to make sure there is room
12689 // for safepoint patching
12690 instruct Ret()
12691 %{
12692   match(Return);
12693 
12694   format %{ "ret" %}
12695   opcode(0xC3);
12696   ins_encode(OpcP);
12697   ins_pipe(pipe_jmp);
12698 %}
12699 
12700 // Tail Call; Jump from runtime stub to Java code.
12701 // Also known as an 'interprocedural jump'.
12702 // Target of jump will eventually return to caller.
12703 // TailJump below removes the return address.
12704 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12705 %{
12706   match(TailCall jump_target method_oop);
12707 
12708   ins_cost(300);
12709   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12710   opcode(0xFF, 0x4); /* Opcode FF /4 */
12711   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12712   ins_pipe(pipe_jmp);
12713 %}
12714 
12715 // Tail Jump; remove the return address; jump to target.
12716 // TailCall above leaves the return address around.
12717 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12718 %{
12719   match(TailJump jump_target ex_oop);
12720 
12721   ins_cost(300);
12722   format %{ "popq    rdx\t# pop return address\n\t"
12723             "jmp     $jump_target" %}
12724   opcode(0xFF, 0x4); /* Opcode FF /4 */
12725   ins_encode(Opcode(0x5a), // popq rdx
12726              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12727   ins_pipe(pipe_jmp);
12728 %}
12729 
12730 // Create exception oop: created by stack-crawling runtime code.
12731 // Created exception is now available to this handler, and is setup
12732 // just prior to jumping to this handler.  No code emitted.
12733 instruct CreateException(rax_RegP ex_oop)
12734 %{
12735   match(Set ex_oop (CreateEx));
12736 
12737   size(0);
12738   // use the following format syntax
12739   format %{ "# exception oop is in rax; no code emitted" %}
12740   ins_encode();
12741   ins_pipe(empty);
12742 %}
12743 
12744 // Rethrow exception:
12745 // The exception oop will come in the first argument position.
12746 // Then JUMP (not call) to the rethrow stub code.
12747 instruct RethrowException()
12748 %{
12749   match(Rethrow);
12750 
12751   // use the following format syntax
12752   format %{ "jmp     rethrow_stub" %}
12753   ins_encode(enc_rethrow);
12754   ins_pipe(pipe_jmp);
12755 %}
12756 
12757 
12758 //----------PEEPHOLE RULES-----------------------------------------------------
12759 // These must follow all instruction definitions as they use the names
12760 // defined in the instructions definitions.
12761 //
12762 // peepmatch ( root_instr_name [preceding_instruction]* );
12763 //
12764 // peepconstraint %{
12765 // (instruction_number.operand_name relational_op instruction_number.operand_name
12766 //  [, ...] );
12767 // // instruction numbers are zero-based using left to right order in peepmatch
12768 //
12769 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12770 // // provide an instruction_number.operand_name for each operand that appears
12771 // // in the replacement instruction's match rule
12772 //
12773 // ---------VM FLAGS---------------------------------------------------------
12774 //
12775 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12776 //
12777 // Each peephole rule is given an identifying number starting with zero and
12778 // increasing by one in the order seen by the parser.  An individual peephole
12779 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12780 // on the command-line.
12781 //
12782 // ---------CURRENT LIMITATIONS----------------------------------------------
12783 //
12784 // Only match adjacent instructions in same basic block
12785 // Only equality constraints
12786 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12787 // Only one replacement instruction
12788 //
12789 // ---------EXAMPLE----------------------------------------------------------
12790 //
12791 // // pertinent parts of existing instructions in architecture description
12792 // instruct movI(rRegI dst, rRegI src)
12793 // %{
12794 //   match(Set dst (CopyI src));
12795 // %}
12796 //
12797 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12798 // %{
12799 //   match(Set dst (AddI dst src));
12800 //   effect(KILL cr);
12801 // %}
12802 //
12803 // // Change (inc mov) to lea
12804 // peephole %{
12805 //   // increment preceeded by register-register move
12806 //   peepmatch ( incI_rReg movI );
12807 //   // require that the destination register of the increment
12808 //   // match the destination register of the move
12809 //   peepconstraint ( 0.dst == 1.dst );
12810 //   // construct a replacement instruction that sets
12811 //   // the destination to ( move's source register + one )
12812 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12813 // %}
12814 //
12815 
12816 // Implementation no longer uses movX instructions since
12817 // machine-independent system no longer uses CopyX nodes.
12818 //
12819 // peephole
12820 // %{
12821 //   peepmatch (incI_rReg movI);
12822 //   peepconstraint (0.dst == 1.dst);
12823 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12824 // %}
12825 
12826 // peephole
12827 // %{
12828 //   peepmatch (decI_rReg movI);
12829 //   peepconstraint (0.dst == 1.dst);
12830 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12831 // %}
12832 
12833 // peephole
12834 // %{
12835 //   peepmatch (addI_rReg_imm movI);
12836 //   peepconstraint (0.dst == 1.dst);
12837 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12838 // %}
12839 
12840 // peephole
12841 // %{
12842 //   peepmatch (incL_rReg movL);
12843 //   peepconstraint (0.dst == 1.dst);
12844 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12845 // %}
12846 
12847 // peephole
12848 // %{
12849 //   peepmatch (decL_rReg movL);
12850 //   peepconstraint (0.dst == 1.dst);
12851 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12852 // %}
12853 
12854 // peephole
12855 // %{
12856 //   peepmatch (addL_rReg_imm movL);
12857 //   peepconstraint (0.dst == 1.dst);
12858 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12859 // %}
12860 
12861 // peephole
12862 // %{
12863 //   peepmatch (addP_rReg_imm movP);
12864 //   peepconstraint (0.dst == 1.dst);
12865 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12866 // %}
12867 
12868 // // Change load of spilled value to only a spill
12869 // instruct storeI(memory mem, rRegI src)
12870 // %{
12871 //   match(Set mem (StoreI mem src));
12872 // %}
12873 //
12874 // instruct loadI(rRegI dst, memory mem)
12875 // %{
12876 //   match(Set dst (LoadI mem));
12877 // %}
12878 //
12879 
12880 peephole
12881 %{
12882   peepmatch (loadI storeI);
12883   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12884   peepreplace (storeI(1.mem 1.mem 1.src));
12885 %}
12886 
12887 peephole
12888 %{
12889   peepmatch (loadL storeL);
12890   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12891   peepreplace (storeL(1.mem 1.mem 1.src));
12892 %}
12893 
12894 //----------SMARTSPILL RULES---------------------------------------------------
12895 // These must follow all instruction definitions as they use the names
12896 // defined in the instructions definitions.