1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 // This could be in MacroAssembler but it's fairly C2 specific
 834 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 835   Label exit;
 836   __ jccb(Assembler::noParity, exit);
 837   __ pushf();
 838   __ andq(Address(rsp, 0), 0xffffff2b);
 839   __ popf();
 840   __ bind(exit);
 841   __ nop(); // (target for branch to avoid branch to branch)
 842 }
 843 
 844 
 845 //=============================================================================
 846 const bool Matcher::constant_table_absolute_addressing = true;
 847 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 848 
 849 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 850   // Empty encoding
 851 }
 852 
 853 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 854   return 0;
 855 }
 856 
 857 #ifndef PRODUCT
 858 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 859   st->print("# MachConstantBaseNode (empty encoding)");
 860 }
 861 #endif
 862 
 863 
 864 //=============================================================================
 865 #ifndef PRODUCT
 866 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 867 {
 868   Compile* C = ra_->C;
 869 
 870   int framesize = C->frame_slots() << LogBytesPerInt;
 871   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 872   // Remove wordSize for return adr already pushed
 873   // and another for the RBP we are going to save
 874   framesize -= 2*wordSize;
 875   bool need_nop = true;
 876 
 877   // Calls to C2R adapters often do not accept exceptional returns.
 878   // We require that their callers must bang for them.  But be
 879   // careful, because some VM calls (such as call site linkage) can
 880   // use several kilobytes of stack.  But the stack safety zone should
 881   // account for that.  See bugs 4446381, 4468289, 4497237.
 882   if (C->need_stack_bang(framesize)) {
 883     st->print_cr("# stack bang"); st->print("\t");
 884     need_nop = false;
 885   }
 886   st->print_cr("pushq   rbp"); st->print("\t");
 887 
 888   if (VerifyStackAtCalls) {
 889     // Majik cookie to verify stack depth
 890     st->print_cr("pushq   0xffffffffbadb100d"
 891                   "\t# Majik cookie for stack depth check");
 892     st->print("\t");
 893     framesize -= wordSize; // Remove 2 for cookie
 894     need_nop = false;
 895   }
 896 
 897   if (framesize) {
 898     st->print("subq    rsp, #%d\t# Create frame", framesize);
 899     if (framesize < 0x80 && need_nop) {
 900       st->print("\n\tnop\t# nop for patch_verified_entry");
 901     }
 902   }
 903 }
 904 #endif
 905 
 906 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 907 {
 908   Compile* C = ra_->C;
 909 
 910   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 911   // NativeJump::patch_verified_entry will be able to patch out the entry
 912   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 913   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 914   // 6 bytes. So if we don't do the fldcw or the push then we must
 915   // use the 6 byte frame allocation even if we have no frame. :-(
 916   // If method sets FPU control word do it now
 917 
 918   int framesize = C->frame_slots() << LogBytesPerInt;
 919   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 920   // Remove wordSize for return adr already pushed
 921   // and another for the RBP we are going to save
 922   framesize -= 2*wordSize;
 923   bool need_nop = true;
 924 
 925   // Calls to C2R adapters often do not accept exceptional returns.
 926   // We require that their callers must bang for them.  But be
 927   // careful, because some VM calls (such as call site linkage) can
 928   // use several kilobytes of stack.  But the stack safety zone should
 929   // account for that.  See bugs 4446381, 4468289, 4497237.
 930   if (C->need_stack_bang(framesize)) {
 931     MacroAssembler masm(&cbuf);
 932     masm.generate_stack_overflow_check(framesize);
 933     need_nop = false;
 934   }
 935 
 936   // We always push rbp so that on return to interpreter rbp will be
 937   // restored correctly and we can correct the stack.
 938   emit_opcode(cbuf, 0x50 | RBP_enc);
 939 
 940   if (VerifyStackAtCalls) {
 941     // Majik cookie to verify stack depth
 942     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 943     emit_d32(cbuf, 0xbadb100d);
 944     framesize -= wordSize; // Remove 2 for cookie
 945     need_nop = false;
 946   }
 947 
 948   if (framesize) {
 949     emit_opcode(cbuf, Assembler::REX_W);
 950     if (framesize < 0x80) {
 951       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 952       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 953       emit_d8(cbuf, framesize);
 954       if (need_nop) {
 955         emit_opcode(cbuf, 0x90); // nop
 956       }
 957     } else {
 958       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 959       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 960       emit_d32(cbuf, framesize);
 961     }
 962   }
 963 
 964   C->set_frame_complete(cbuf.insts_size());
 965 
 966 #ifdef ASSERT
 967   if (VerifyStackAtCalls) {
 968     Label L;
 969     MacroAssembler masm(&cbuf);
 970     masm.push(rax);
 971     masm.mov(rax, rsp);
 972     masm.andptr(rax, StackAlignmentInBytes-1);
 973     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 974     masm.pop(rax);
 975     masm.jcc(Assembler::equal, L);
 976     masm.stop("Stack is not properly aligned!");
 977     masm.bind(L);
 978   }
 979 #endif
 980 }
 981 
 982 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 983 {
 984   return MachNode::size(ra_); // too many variables; just compute it
 985                               // the hard way
 986 }
 987 
 988 int MachPrologNode::reloc() const
 989 {
 990   return 0; // a large enough number
 991 }
 992 
 993 //=============================================================================
 994 #ifndef PRODUCT
 995 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 996 {
 997   Compile* C = ra_->C;
 998   int framesize = C->frame_slots() << LogBytesPerInt;
 999   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1000   // Remove word for return adr already pushed
1001   // and RBP
1002   framesize -= 2*wordSize;
1003 
1004   if (framesize) {
1005     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
1006     st->print("\t");
1007   }
1008 
1009   st->print_cr("popq   rbp");
1010   if (do_polling() && C->is_method_compilation()) {
1011     st->print("\t");
1012     if (Assembler::is_polling_page_far()) {
1013       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1014                    "testl  rax, [rscratch1]\t"
1015                    "# Safepoint: poll for GC");
1016     } else {
1017       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1018                    "# Safepoint: poll for GC");
1019     }
1020   }
1021 }
1022 #endif
1023 
1024 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1025 {
1026   Compile* C = ra_->C;
1027   int framesize = C->frame_slots() << LogBytesPerInt;
1028   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1029   // Remove word for return adr already pushed
1030   // and RBP
1031   framesize -= 2*wordSize;
1032 
1033   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1034 
1035   if (framesize) {
1036     emit_opcode(cbuf, Assembler::REX_W);
1037     if (framesize < 0x80) {
1038       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1039       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1040       emit_d8(cbuf, framesize);
1041     } else {
1042       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1043       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1044       emit_d32(cbuf, framesize);
1045     }
1046   }
1047 
1048   // popq rbp
1049   emit_opcode(cbuf, 0x58 | RBP_enc);
1050 
1051   if (do_polling() && C->is_method_compilation()) {
1052     MacroAssembler _masm(&cbuf);
1053     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1054     if (Assembler::is_polling_page_far()) {
1055       __ lea(rscratch1, polling_page);
1056       __ relocate(relocInfo::poll_return_type);
1057       __ testl(rax, Address(rscratch1, 0));
1058     } else {
1059       __ testl(rax, polling_page);
1060     }
1061   }
1062 }
1063 
1064 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1065 {
1066   return MachNode::size(ra_); // too many variables; just compute it
1067                               // the hard way
1068 }
1069 
1070 int MachEpilogNode::reloc() const
1071 {
1072   return 2; // a large enough number
1073 }
1074 
1075 const Pipeline* MachEpilogNode::pipeline() const
1076 {
1077   return MachNode::pipeline_class();
1078 }
1079 
1080 int MachEpilogNode::safepoint_offset() const
1081 {
1082   return 0;
1083 }
1084 
1085 //=============================================================================
1086 
1087 enum RC {
1088   rc_bad,
1089   rc_int,
1090   rc_float,
1091   rc_stack
1092 };
1093 
1094 static enum RC rc_class(OptoReg::Name reg)
1095 {
1096   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1097 
1098   if (OptoReg::is_stack(reg)) return rc_stack;
1099 
1100   VMReg r = OptoReg::as_VMReg(reg);
1101 
1102   if (r->is_Register()) return rc_int;
1103 
1104   assert(r->is_XMMRegister(), "must be");
1105   return rc_float;
1106 }
1107 
1108 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1109                                        PhaseRegAlloc* ra_,
1110                                        bool do_size,
1111                                        outputStream* st) const
1112 {
1113 
1114   // Get registers to move
1115   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1116   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1117   OptoReg::Name dst_second = ra_->get_reg_second(this);
1118   OptoReg::Name dst_first = ra_->get_reg_first(this);
1119 
1120   enum RC src_second_rc = rc_class(src_second);
1121   enum RC src_first_rc = rc_class(src_first);
1122   enum RC dst_second_rc = rc_class(dst_second);
1123   enum RC dst_first_rc = rc_class(dst_first);
1124 
1125   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1126          "must move at least 1 register" );
1127 
1128   if (src_first == dst_first && src_second == dst_second) {
1129     // Self copy, no move
1130     return 0;
1131   } else if (src_first_rc == rc_stack) {
1132     // mem ->
1133     if (dst_first_rc == rc_stack) {
1134       // mem -> mem
1135       assert(src_second != dst_first, "overlap");
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int src_offset = ra_->reg2offset(src_first);
1140         int dst_offset = ra_->reg2offset(dst_first);
1141         if (cbuf) {
1142           emit_opcode(*cbuf, 0xFF);
1143           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1144 
1145           emit_opcode(*cbuf, 0x8F);
1146           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1147 
1148 #ifndef PRODUCT
1149         } else if (!do_size) {
1150           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1151                      "popq    [rsp + #%d]",
1152                      src_offset,
1153                      dst_offset);
1154 #endif
1155         }
1156         return
1157           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1158           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1159       } else {
1160         // 32-bit
1161         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1162         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1163         // No pushl/popl, so:
1164         int src_offset = ra_->reg2offset(src_first);
1165         int dst_offset = ra_->reg2offset(dst_first);
1166         if (cbuf) {
1167           emit_opcode(*cbuf, Assembler::REX_W);
1168           emit_opcode(*cbuf, 0x89);
1169           emit_opcode(*cbuf, 0x44);
1170           emit_opcode(*cbuf, 0x24);
1171           emit_opcode(*cbuf, 0xF8);
1172 
1173           emit_opcode(*cbuf, 0x8B);
1174           encode_RegMem(*cbuf,
1175                         RAX_enc,
1176                         RSP_enc, 0x4, 0, src_offset,
1177                         false);
1178 
1179           emit_opcode(*cbuf, 0x89);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, dst_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, Assembler::REX_W);
1186           emit_opcode(*cbuf, 0x8B);
1187           emit_opcode(*cbuf, 0x44);
1188           emit_opcode(*cbuf, 0x24);
1189           emit_opcode(*cbuf, 0xF8);
1190 
1191 #ifndef PRODUCT
1192         } else if (!do_size) {
1193           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1194                      "movl    rax, [rsp + #%d]\n\t"
1195                      "movl    [rsp + #%d], rax\n\t"
1196                      "movq    rax, [rsp - #8]",
1197                      src_offset,
1198                      dst_offset);
1199 #endif
1200         }
1201         return
1202           5 + // movq
1203           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1204           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1205           5; // movq
1206       }
1207     } else if (dst_first_rc == rc_int) {
1208       // mem -> gpr
1209       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1210           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1211         // 64-bit
1212         int offset = ra_->reg2offset(src_first);
1213         if (cbuf) {
1214           if (Matcher::_regEncode[dst_first] < 8) {
1215             emit_opcode(*cbuf, Assembler::REX_W);
1216           } else {
1217             emit_opcode(*cbuf, Assembler::REX_WR);
1218           }
1219           emit_opcode(*cbuf, 0x8B);
1220           encode_RegMem(*cbuf,
1221                         Matcher::_regEncode[dst_first],
1222                         RSP_enc, 0x4, 0, offset,
1223                         false);
1224 #ifndef PRODUCT
1225         } else if (!do_size) {
1226           st->print("movq    %s, [rsp + #%d]\t# spill",
1227                      Matcher::regName[dst_first],
1228                      offset);
1229 #endif
1230         }
1231         return
1232           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1233       } else {
1234         // 32-bit
1235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1237         int offset = ra_->reg2offset(src_first);
1238         if (cbuf) {
1239           if (Matcher::_regEncode[dst_first] >= 8) {
1240             emit_opcode(*cbuf, Assembler::REX_R);
1241           }
1242           emit_opcode(*cbuf, 0x8B);
1243           encode_RegMem(*cbuf,
1244                         Matcher::_regEncode[dst_first],
1245                         RSP_enc, 0x4, 0, offset,
1246                         false);
1247 #ifndef PRODUCT
1248         } else if (!do_size) {
1249           st->print("movl    %s, [rsp + #%d]\t# spill",
1250                      Matcher::regName[dst_first],
1251                      offset);
1252 #endif
1253         }
1254         return
1255           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1256           ((Matcher::_regEncode[dst_first] < 8)
1257            ? 3
1258            : 4); // REX
1259       }
1260     } else if (dst_first_rc == rc_float) {
1261       // mem-> xmm
1262       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1263           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1264         // 64-bit
1265         int offset = ra_->reg2offset(src_first);
1266         if (cbuf) {
1267           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1268           if (Matcher::_regEncode[dst_first] >= 8) {
1269             emit_opcode(*cbuf, Assembler::REX_R);
1270           }
1271           emit_opcode(*cbuf, 0x0F);
1272           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1273           encode_RegMem(*cbuf,
1274                         Matcher::_regEncode[dst_first],
1275                         RSP_enc, 0x4, 0, offset,
1276                         false);
1277 #ifndef PRODUCT
1278         } else if (!do_size) {
1279           st->print("%s  %s, [rsp + #%d]\t# spill",
1280                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1281                      Matcher::regName[dst_first],
1282                      offset);
1283 #endif
1284         }
1285         return
1286           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1287           ((Matcher::_regEncode[dst_first] < 8)
1288            ? 5
1289            : 6); // REX
1290       } else {
1291         // 32-bit
1292         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1293         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1294         int offset = ra_->reg2offset(src_first);
1295         if (cbuf) {
1296           emit_opcode(*cbuf, 0xF3);
1297           if (Matcher::_regEncode[dst_first] >= 8) {
1298             emit_opcode(*cbuf, Assembler::REX_R);
1299           }
1300           emit_opcode(*cbuf, 0x0F);
1301           emit_opcode(*cbuf, 0x10);
1302           encode_RegMem(*cbuf,
1303                         Matcher::_regEncode[dst_first],
1304                         RSP_enc, 0x4, 0, offset,
1305                         false);
1306 #ifndef PRODUCT
1307         } else if (!do_size) {
1308           st->print("movss   %s, [rsp + #%d]\t# spill",
1309                      Matcher::regName[dst_first],
1310                      offset);
1311 #endif
1312         }
1313         return
1314           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1315           ((Matcher::_regEncode[dst_first] < 8)
1316            ? 5
1317            : 6); // REX
1318       }
1319     }
1320   } else if (src_first_rc == rc_int) {
1321     // gpr ->
1322     if (dst_first_rc == rc_stack) {
1323       // gpr -> mem
1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1326         // 64-bit
1327         int offset = ra_->reg2offset(dst_first);
1328         if (cbuf) {
1329           if (Matcher::_regEncode[src_first] < 8) {
1330             emit_opcode(*cbuf, Assembler::REX_W);
1331           } else {
1332             emit_opcode(*cbuf, Assembler::REX_WR);
1333           }
1334           emit_opcode(*cbuf, 0x89);
1335           encode_RegMem(*cbuf,
1336                         Matcher::_regEncode[src_first],
1337                         RSP_enc, 0x4, 0, offset,
1338                         false);
1339 #ifndef PRODUCT
1340         } else if (!do_size) {
1341           st->print("movq    [rsp + #%d], %s\t# spill",
1342                      offset,
1343                      Matcher::regName[src_first]);
1344 #endif
1345         }
1346         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1347       } else {
1348         // 32-bit
1349         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1350         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1351         int offset = ra_->reg2offset(dst_first);
1352         if (cbuf) {
1353           if (Matcher::_regEncode[src_first] >= 8) {
1354             emit_opcode(*cbuf, Assembler::REX_R);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movl    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return
1369           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1370           ((Matcher::_regEncode[src_first] < 8)
1371            ? 3
1372            : 4); // REX
1373       }
1374     } else if (dst_first_rc == rc_int) {
1375       // gpr -> gpr
1376       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1377           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1378         // 64-bit
1379         if (cbuf) {
1380           if (Matcher::_regEncode[dst_first] < 8) {
1381             if (Matcher::_regEncode[src_first] < 8) {
1382               emit_opcode(*cbuf, Assembler::REX_W);
1383             } else {
1384               emit_opcode(*cbuf, Assembler::REX_WB);
1385             }
1386           } else {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_WR);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WRB);
1391             }
1392           }
1393           emit_opcode(*cbuf, 0x8B);
1394           emit_rm(*cbuf, 0x3,
1395                   Matcher::_regEncode[dst_first] & 7,
1396                   Matcher::_regEncode[src_first] & 7);
1397 #ifndef PRODUCT
1398         } else if (!do_size) {
1399           st->print("movq    %s, %s\t# spill",
1400                      Matcher::regName[dst_first],
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404         return 3; // REX
1405       } else {
1406         // 32-bit
1407         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1408         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1409         if (cbuf) {
1410           if (Matcher::_regEncode[dst_first] < 8) {
1411             if (Matcher::_regEncode[src_first] >= 8) {
1412               emit_opcode(*cbuf, Assembler::REX_B);
1413             }
1414           } else {
1415             if (Matcher::_regEncode[src_first] < 8) {
1416               emit_opcode(*cbuf, Assembler::REX_R);
1417             } else {
1418               emit_opcode(*cbuf, Assembler::REX_RB);
1419             }
1420           }
1421           emit_opcode(*cbuf, 0x8B);
1422           emit_rm(*cbuf, 0x3,
1423                   Matcher::_regEncode[dst_first] & 7,
1424                   Matcher::_regEncode[src_first] & 7);
1425 #ifndef PRODUCT
1426         } else if (!do_size) {
1427           st->print("movl    %s, %s\t# spill",
1428                      Matcher::regName[dst_first],
1429                      Matcher::regName[src_first]);
1430 #endif
1431         }
1432         return
1433           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1434           ? 2
1435           : 3; // REX
1436       }
1437     } else if (dst_first_rc == rc_float) {
1438       // gpr -> xmm
1439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1441         // 64-bit
1442         if (cbuf) {
1443           emit_opcode(*cbuf, 0x66);
1444           if (Matcher::_regEncode[dst_first] < 8) {
1445             if (Matcher::_regEncode[src_first] < 8) {
1446               emit_opcode(*cbuf, Assembler::REX_W);
1447             } else {
1448               emit_opcode(*cbuf, Assembler::REX_WB);
1449             }
1450           } else {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_WR);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WRB);
1455             }
1456           }
1457           emit_opcode(*cbuf, 0x0F);
1458           emit_opcode(*cbuf, 0x6E);
1459           emit_rm(*cbuf, 0x3,
1460                   Matcher::_regEncode[dst_first] & 7,
1461                   Matcher::_regEncode[src_first] & 7);
1462 #ifndef PRODUCT
1463         } else if (!do_size) {
1464           st->print("movdq   %s, %s\t# spill",
1465                      Matcher::regName[dst_first],
1466                      Matcher::regName[src_first]);
1467 #endif
1468         }
1469         return 5; // REX
1470       } else {
1471         // 32-bit
1472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1474         if (cbuf) {
1475           emit_opcode(*cbuf, 0x66);
1476           if (Matcher::_regEncode[dst_first] < 8) {
1477             if (Matcher::_regEncode[src_first] >= 8) {
1478               emit_opcode(*cbuf, Assembler::REX_B);
1479             }
1480           } else {
1481             if (Matcher::_regEncode[src_first] < 8) {
1482               emit_opcode(*cbuf, Assembler::REX_R);
1483             } else {
1484               emit_opcode(*cbuf, Assembler::REX_RB);
1485             }
1486           }
1487           emit_opcode(*cbuf, 0x0F);
1488           emit_opcode(*cbuf, 0x6E);
1489           emit_rm(*cbuf, 0x3,
1490                   Matcher::_regEncode[dst_first] & 7,
1491                   Matcher::_regEncode[src_first] & 7);
1492 #ifndef PRODUCT
1493         } else if (!do_size) {
1494           st->print("movdl   %s, %s\t# spill",
1495                      Matcher::regName[dst_first],
1496                      Matcher::regName[src_first]);
1497 #endif
1498         }
1499         return
1500           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1501           ? 4
1502           : 5; // REX
1503       }
1504     }
1505   } else if (src_first_rc == rc_float) {
1506     // xmm ->
1507     if (dst_first_rc == rc_stack) {
1508       // xmm -> mem
1509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1511         // 64-bit
1512         int offset = ra_->reg2offset(dst_first);
1513         if (cbuf) {
1514           emit_opcode(*cbuf, 0xF2);
1515           if (Matcher::_regEncode[src_first] >= 8) {
1516               emit_opcode(*cbuf, Assembler::REX_R);
1517           }
1518           emit_opcode(*cbuf, 0x0F);
1519           emit_opcode(*cbuf, 0x11);
1520           encode_RegMem(*cbuf,
1521                         Matcher::_regEncode[src_first],
1522                         RSP_enc, 0x4, 0, offset,
1523                         false);
1524 #ifndef PRODUCT
1525         } else if (!do_size) {
1526           st->print("movsd   [rsp + #%d], %s\t# spill",
1527                      offset,
1528                      Matcher::regName[src_first]);
1529 #endif
1530         }
1531         return
1532           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1533           ((Matcher::_regEncode[src_first] < 8)
1534            ? 5
1535            : 6); // REX
1536       } else {
1537         // 32-bit
1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1540         int offset = ra_->reg2offset(dst_first);
1541         if (cbuf) {
1542           emit_opcode(*cbuf, 0xF3);
1543           if (Matcher::_regEncode[src_first] >= 8) {
1544               emit_opcode(*cbuf, Assembler::REX_R);
1545           }
1546           emit_opcode(*cbuf, 0x0F);
1547           emit_opcode(*cbuf, 0x11);
1548           encode_RegMem(*cbuf,
1549                         Matcher::_regEncode[src_first],
1550                         RSP_enc, 0x4, 0, offset,
1551                         false);
1552 #ifndef PRODUCT
1553         } else if (!do_size) {
1554           st->print("movss   [rsp + #%d], %s\t# spill",
1555                      offset,
1556                      Matcher::regName[src_first]);
1557 #endif
1558         }
1559         return
1560           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1561           ((Matcher::_regEncode[src_first] < 8)
1562            ? 5
1563            : 6); // REX
1564       }
1565     } else if (dst_first_rc == rc_int) {
1566       // xmm -> gpr
1567       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1568           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1569         // 64-bit
1570         if (cbuf) {
1571           emit_opcode(*cbuf, 0x66);
1572           if (Matcher::_regEncode[dst_first] < 8) {
1573             if (Matcher::_regEncode[src_first] < 8) {
1574               emit_opcode(*cbuf, Assembler::REX_W);
1575             } else {
1576               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1577             }
1578           } else {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WRB);
1583             }
1584           }
1585           emit_opcode(*cbuf, 0x0F);
1586           emit_opcode(*cbuf, 0x7E);
1587           emit_rm(*cbuf, 0x3,
1588                   Matcher::_regEncode[src_first] & 7,
1589                   Matcher::_regEncode[dst_first] & 7);
1590 #ifndef PRODUCT
1591         } else if (!do_size) {
1592           st->print("movdq   %s, %s\t# spill",
1593                      Matcher::regName[dst_first],
1594                      Matcher::regName[src_first]);
1595 #endif
1596         }
1597         return 5; // REX
1598       } else {
1599         // 32-bit
1600         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1601         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1602         if (cbuf) {
1603           emit_opcode(*cbuf, 0x66);
1604           if (Matcher::_regEncode[dst_first] < 8) {
1605             if (Matcher::_regEncode[src_first] >= 8) {
1606               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1607             }
1608           } else {
1609             if (Matcher::_regEncode[src_first] < 8) {
1610               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1611             } else {
1612               emit_opcode(*cbuf, Assembler::REX_RB);
1613             }
1614           }
1615           emit_opcode(*cbuf, 0x0F);
1616           emit_opcode(*cbuf, 0x7E);
1617           emit_rm(*cbuf, 0x3,
1618                   Matcher::_regEncode[src_first] & 7,
1619                   Matcher::_regEncode[dst_first] & 7);
1620 #ifndef PRODUCT
1621         } else if (!do_size) {
1622           st->print("movdl   %s, %s\t# spill",
1623                      Matcher::regName[dst_first],
1624                      Matcher::regName[src_first]);
1625 #endif
1626         }
1627         return
1628           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1629           ? 4
1630           : 5; // REX
1631       }
1632     } else if (dst_first_rc == rc_float) {
1633       // xmm -> xmm
1634       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1635           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1636         // 64-bit
1637         if (cbuf) {
1638           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1639           if (Matcher::_regEncode[dst_first] < 8) {
1640             if (Matcher::_regEncode[src_first] >= 8) {
1641               emit_opcode(*cbuf, Assembler::REX_B);
1642             }
1643           } else {
1644             if (Matcher::_regEncode[src_first] < 8) {
1645               emit_opcode(*cbuf, Assembler::REX_R);
1646             } else {
1647               emit_opcode(*cbuf, Assembler::REX_RB);
1648             }
1649           }
1650           emit_opcode(*cbuf, 0x0F);
1651           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1652           emit_rm(*cbuf, 0x3,
1653                   Matcher::_regEncode[dst_first] & 7,
1654                   Matcher::_regEncode[src_first] & 7);
1655 #ifndef PRODUCT
1656         } else if (!do_size) {
1657           st->print("%s  %s, %s\t# spill",
1658                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1659                      Matcher::regName[dst_first],
1660                      Matcher::regName[src_first]);
1661 #endif
1662         }
1663         return
1664           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1665           ? 4
1666           : 5; // REX
1667       } else {
1668         // 32-bit
1669         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1670         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1671         if (cbuf) {
1672           if (!UseXmmRegToRegMoveAll)
1673             emit_opcode(*cbuf, 0xF3);
1674           if (Matcher::_regEncode[dst_first] < 8) {
1675             if (Matcher::_regEncode[src_first] >= 8) {
1676               emit_opcode(*cbuf, Assembler::REX_B);
1677             }
1678           } else {
1679             if (Matcher::_regEncode[src_first] < 8) {
1680               emit_opcode(*cbuf, Assembler::REX_R);
1681             } else {
1682               emit_opcode(*cbuf, Assembler::REX_RB);
1683             }
1684           }
1685           emit_opcode(*cbuf, 0x0F);
1686           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1687           emit_rm(*cbuf, 0x3,
1688                   Matcher::_regEncode[dst_first] & 7,
1689                   Matcher::_regEncode[src_first] & 7);
1690 #ifndef PRODUCT
1691         } else if (!do_size) {
1692           st->print("%s  %s, %s\t# spill",
1693                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1694                      Matcher::regName[dst_first],
1695                      Matcher::regName[src_first]);
1696 #endif
1697         }
1698         return
1699           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1700           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1701           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1702       }
1703     }
1704   }
1705 
1706   assert(0," foo ");
1707   Unimplemented();
1708 
1709   return 0;
1710 }
1711 
1712 #ifndef PRODUCT
1713 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1714 {
1715   implementation(NULL, ra_, false, st);
1716 }
1717 #endif
1718 
1719 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1720 {
1721   implementation(&cbuf, ra_, false, NULL);
1722 }
1723 
1724 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1725 {
1726   return implementation(NULL, ra_, true, NULL);
1727 }
1728 
1729 //=============================================================================
1730 #ifndef PRODUCT
1731 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1732 {
1733   st->print("nop \t# %d bytes pad for loops and calls", _count);
1734 }
1735 #endif
1736 
1737 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1738 {
1739   MacroAssembler _masm(&cbuf);
1740   __ nop(_count);
1741 }
1742 
1743 uint MachNopNode::size(PhaseRegAlloc*) const
1744 {
1745   return _count;
1746 }
1747 
1748 
1749 //=============================================================================
1750 #ifndef PRODUCT
1751 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1752 {
1753   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1754   int reg = ra_->get_reg_first(this);
1755   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1756             Matcher::regName[reg], offset);
1757 }
1758 #endif
1759 
1760 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1761 {
1762   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1763   int reg = ra_->get_encode(this);
1764   if (offset >= 0x80) {
1765     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1766     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1767     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1768     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1769     emit_d32(cbuf, offset);
1770   } else {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d8(cbuf, offset);
1776   }
1777 }
1778 
1779 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1780 {
1781   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1782   return (offset < 0x80) ? 5 : 8; // REX
1783 }
1784 
1785 //=============================================================================
1786 
1787 // emit call stub, compiled java to interpreter
1788 void emit_java_to_interp(CodeBuffer& cbuf)
1789 {
1790   // Stub is fixed up when the corresponding call is converted from
1791   // calling compiled code to calling interpreted code.
1792   // movq rbx, 0
1793   // jmp -5 # to self
1794 
1795   address mark = cbuf.insts_mark();  // get mark within main instrs section
1796 
1797   // Note that the code buffer's insts_mark is always relative to insts.
1798   // That's why we must use the macroassembler to generate a stub.
1799   MacroAssembler _masm(&cbuf);
1800 
1801   address base =
1802   __ start_a_stub(Compile::MAX_stubs_size);
1803   if (base == NULL)  return;  // CodeBuffer::expand failed
1804   // static stub relocation stores the instruction address of the call
1805   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1806   // static stub relocation also tags the methodOop in the code-stream.
1807   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1808   // This is recognized as unresolved by relocs/nativeinst/ic code
1809   __ jump(RuntimeAddress(__ pc()));
1810 
1811   // Update current stubs pointer and restore insts_end.
1812   __ end_a_stub();
1813 }
1814 
1815 // size of call stub, compiled java to interpretor
1816 uint size_java_to_interp()
1817 {
1818   return 15;  // movq (1+1+8); jmp (1+4)
1819 }
1820 
1821 // relocation entries for call stub, compiled java to interpretor
1822 uint reloc_java_to_interp()
1823 {
1824   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1825 }
1826 
1827 //=============================================================================
1828 #ifndef PRODUCT
1829 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1830 {
1831   if (UseCompressedOops) {
1832     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1833     if (Universe::narrow_oop_shift() != 0) {
1834       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1835     }
1836     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1837   } else {
1838     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1839                  "# Inline cache check");
1840   }
1841   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1842   st->print_cr("\tnop\t# nops to align entry point");
1843 }
1844 #endif
1845 
1846 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1847 {
1848   MacroAssembler masm(&cbuf);
1849   uint insts_size = cbuf.insts_size();
1850   if (UseCompressedOops) {
1851     masm.load_klass(rscratch1, j_rarg0);
1852     masm.cmpptr(rax, rscratch1);
1853   } else {
1854     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1855   }
1856 
1857   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1858 
1859   /* WARNING these NOPs are critical so that verified entry point is properly
1860      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1861   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1862   if (OptoBreakpoint) {
1863     // Leave space for int3
1864     nops_cnt -= 1;
1865   }
1866   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1867   if (nops_cnt > 0)
1868     masm.nop(nops_cnt);
1869 }
1870 
1871 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1872 {
1873   return MachNode::size(ra_); // too many variables; just compute it
1874                               // the hard way
1875 }
1876 
1877 
1878 //=============================================================================
1879 uint size_exception_handler()
1880 {
1881   // NativeCall instruction size is the same as NativeJump.
1882   // Note that this value is also credited (in output.cpp) to
1883   // the size of the code section.
1884   return NativeJump::instruction_size;
1885 }
1886 
1887 // Emit exception handler code.
1888 int emit_exception_handler(CodeBuffer& cbuf)
1889 {
1890 
1891   // Note that the code buffer's insts_mark is always relative to insts.
1892   // That's why we must use the macroassembler to generate a handler.
1893   MacroAssembler _masm(&cbuf);
1894   address base =
1895   __ start_a_stub(size_exception_handler());
1896   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1897   int offset = __ offset();
1898   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1899   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1900   __ end_a_stub();
1901   return offset;
1902 }
1903 
1904 uint size_deopt_handler()
1905 {
1906   // three 5 byte instructions
1907   return 15;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's insts_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_deopt_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   address the_pc = (address) __ pc();
1922   Label next;
1923   // push a "the_pc" on the stack without destroying any registers
1924   // as they all may be live.
1925 
1926   // push address of "next"
1927   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1928   __ bind(next);
1929   // adjust it so it matches "the_pc"
1930   __ subptr(Address(rsp, 0), __ offset() - offset);
1931   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1932   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1933   __ end_a_stub();
1934   return offset;
1935 }
1936 
1937 
1938 const bool Matcher::match_rule_supported(int opcode) {
1939   if (!has_match_rule(opcode))
1940     return false;
1941 
1942   return true;  // Per default match rules are supported.
1943 }
1944 
1945 int Matcher::regnum_to_fpu_offset(int regnum)
1946 {
1947   return regnum - 32; // The FP registers are in the second chunk
1948 }
1949 
1950 // This is UltraSparc specific, true just means we have fast l2f conversion
1951 const bool Matcher::convL2FSupported(void) {
1952   return true;
1953 }
1954 
1955 // Vector width in bytes
1956 const uint Matcher::vector_width_in_bytes(void) {
1957   return 8;
1958 }
1959 
1960 // Vector ideal reg
1961 const uint Matcher::vector_ideal_reg(void) {
1962   return Op_RegD;
1963 }
1964 
1965 // Is this branch offset short enough that a short branch can be used?
1966 //
1967 // NOTE: If the platform does not provide any short branch variants, then
1968 //       this method should return false for offset 0.
1969 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1970   // The passed offset is relative to address of the branch.
1971   // On 86 a branch displacement is calculated relative to address
1972   // of a next instruction.
1973   offset -= br_size;
1974 
1975   // the short version of jmpConUCF2 contains multiple branches,
1976   // making the reach slightly less
1977   if (rule == jmpConUCF2_rule)
1978     return (-126 <= offset && offset <= 125);
1979   return (-128 <= offset && offset <= 127);
1980 }
1981 
1982 const bool Matcher::isSimpleConstant64(jlong value) {
1983   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1984   //return value == (int) value;  // Cf. storeImmL and immL32.
1985 
1986   // Probably always true, even if a temp register is required.
1987   return true;
1988 }
1989 
1990 // The ecx parameter to rep stosq for the ClearArray node is in words.
1991 const bool Matcher::init_array_count_is_in_bytes = false;
1992 
1993 // Threshold size for cleararray.
1994 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1995 
1996 // No additional cost for CMOVL.
1997 const int Matcher::long_cmove_cost() { return 0; }
1998 
1999 // No CMOVF/CMOVD with SSE2
2000 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
2001 
2002 // Should the Matcher clone shifts on addressing modes, expecting them
2003 // to be subsumed into complex addressing expressions or compute them
2004 // into registers?  True for Intel but false for most RISCs
2005 const bool Matcher::clone_shift_expressions = true;
2006 
2007 // Do we need to mask the count passed to shift instructions or does
2008 // the cpu only look at the lower 5/6 bits anyway?
2009 const bool Matcher::need_masked_shift_count = false;
2010 
2011 bool Matcher::narrow_oop_use_complex_address() {
2012   assert(UseCompressedOops, "only for compressed oops code");
2013   return (LogMinObjAlignmentInBytes <= 3);
2014 }
2015 
2016 // Is it better to copy float constants, or load them directly from
2017 // memory?  Intel can load a float constant from a direct address,
2018 // requiring no extra registers.  Most RISCs will have to materialize
2019 // an address into a register first, so they would do better to copy
2020 // the constant from stack.
2021 const bool Matcher::rematerialize_float_constants = true; // XXX
2022 
2023 // If CPU can load and store mis-aligned doubles directly then no
2024 // fixup is needed.  Else we split the double into 2 integer pieces
2025 // and move it piece-by-piece.  Only happens when passing doubles into
2026 // C code as the Java calling convention forces doubles to be aligned.
2027 const bool Matcher::misaligned_doubles_ok = true;
2028 
2029 // No-op on amd64
2030 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2031 
2032 // Advertise here if the CPU requires explicit rounding operations to
2033 // implement the UseStrictFP mode.
2034 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2035 
2036 // Are floats conerted to double when stored to stack during deoptimization?
2037 // On x64 it is stored without convertion so we can use normal access.
2038 bool Matcher::float_in_double() { return false; }
2039 
2040 // Do ints take an entire long register or just half?
2041 const bool Matcher::int_in_long = true;
2042 
2043 // Return whether or not this register is ever used as an argument.
2044 // This function is used on startup to build the trampoline stubs in
2045 // generateOptoStub.  Registers not mentioned will be killed by the VM
2046 // call in the trampoline, and arguments in those registers not be
2047 // available to the callee.
2048 bool Matcher::can_be_java_arg(int reg)
2049 {
2050   return
2051     reg ==  RDI_num || reg ==  RDI_H_num ||
2052     reg ==  RSI_num || reg ==  RSI_H_num ||
2053     reg ==  RDX_num || reg ==  RDX_H_num ||
2054     reg ==  RCX_num || reg ==  RCX_H_num ||
2055     reg ==   R8_num || reg ==   R8_H_num ||
2056     reg ==   R9_num || reg ==   R9_H_num ||
2057     reg ==  R12_num || reg ==  R12_H_num ||
2058     reg == XMM0_num || reg == XMM0_H_num ||
2059     reg == XMM1_num || reg == XMM1_H_num ||
2060     reg == XMM2_num || reg == XMM2_H_num ||
2061     reg == XMM3_num || reg == XMM3_H_num ||
2062     reg == XMM4_num || reg == XMM4_H_num ||
2063     reg == XMM5_num || reg == XMM5_H_num ||
2064     reg == XMM6_num || reg == XMM6_H_num ||
2065     reg == XMM7_num || reg == XMM7_H_num;
2066 }
2067 
2068 bool Matcher::is_spillable_arg(int reg)
2069 {
2070   return can_be_java_arg(reg);
2071 }
2072 
2073 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2074   // In 64 bit mode a code which use multiply when
2075   // devisor is constant is faster than hardware
2076   // DIV instruction (it uses MulHiL).
2077   return false;
2078 }
2079 
2080 // Register for DIVI projection of divmodI
2081 RegMask Matcher::divI_proj_mask() {
2082   return INT_RAX_REG_mask;
2083 }
2084 
2085 // Register for MODI projection of divmodI
2086 RegMask Matcher::modI_proj_mask() {
2087   return INT_RDX_REG_mask;
2088 }
2089 
2090 // Register for DIVL projection of divmodL
2091 RegMask Matcher::divL_proj_mask() {
2092   return LONG_RAX_REG_mask;
2093 }
2094 
2095 // Register for MODL projection of divmodL
2096 RegMask Matcher::modL_proj_mask() {
2097   return LONG_RDX_REG_mask;
2098 }
2099 
2100 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2101   return PTR_RBP_REG_mask;
2102 }
2103 
2104 static Address build_address(int b, int i, int s, int d) {
2105   Register index = as_Register(i);
2106   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2107   if (index == rsp) {
2108     index = noreg;
2109     scale = Address::no_scale;
2110   }
2111   Address addr(as_Register(b), index, scale, d);
2112   return addr;
2113 }
2114 
2115 %}
2116 
2117 //----------ENCODING BLOCK-----------------------------------------------------
2118 // This block specifies the encoding classes used by the compiler to
2119 // output byte streams.  Encoding classes are parameterized macros
2120 // used by Machine Instruction Nodes in order to generate the bit
2121 // encoding of the instruction.  Operands specify their base encoding
2122 // interface with the interface keyword.  There are currently
2123 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2124 // COND_INTER.  REG_INTER causes an operand to generate a function
2125 // which returns its register number when queried.  CONST_INTER causes
2126 // an operand to generate a function which returns the value of the
2127 // constant when queried.  MEMORY_INTER causes an operand to generate
2128 // four functions which return the Base Register, the Index Register,
2129 // the Scale Value, and the Offset Value of the operand when queried.
2130 // COND_INTER causes an operand to generate six functions which return
2131 // the encoding code (ie - encoding bits for the instruction)
2132 // associated with each basic boolean condition for a conditional
2133 // instruction.
2134 //
2135 // Instructions specify two basic values for encoding.  Again, a
2136 // function is available to check if the constant displacement is an
2137 // oop. They use the ins_encode keyword to specify their encoding
2138 // classes (which must be a sequence of enc_class names, and their
2139 // parameters, specified in the encoding block), and they use the
2140 // opcode keyword to specify, in order, their primary, secondary, and
2141 // tertiary opcode.  Only the opcode sections which a particular
2142 // instruction needs for encoding need to be specified.
2143 encode %{
2144   // Build emit functions for each basic byte or larger field in the
2145   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2146   // from C++ code in the enc_class source block.  Emit functions will
2147   // live in the main source block for now.  In future, we can
2148   // generalize this by adding a syntax that specifies the sizes of
2149   // fields in an order, so that the adlc can build the emit functions
2150   // automagically
2151 
2152   // Emit primary opcode
2153   enc_class OpcP
2154   %{
2155     emit_opcode(cbuf, $primary);
2156   %}
2157 
2158   // Emit secondary opcode
2159   enc_class OpcS
2160   %{
2161     emit_opcode(cbuf, $secondary);
2162   %}
2163 
2164   // Emit tertiary opcode
2165   enc_class OpcT
2166   %{
2167     emit_opcode(cbuf, $tertiary);
2168   %}
2169 
2170   // Emit opcode directly
2171   enc_class Opcode(immI d8)
2172   %{
2173     emit_opcode(cbuf, $d8$$constant);
2174   %}
2175 
2176   // Emit size prefix
2177   enc_class SizePrefix
2178   %{
2179     emit_opcode(cbuf, 0x66);
2180   %}
2181 
2182   enc_class reg(rRegI reg)
2183   %{
2184     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2185   %}
2186 
2187   enc_class reg_reg(rRegI dst, rRegI src)
2188   %{
2189     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2190   %}
2191 
2192   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2193   %{
2194     emit_opcode(cbuf, $opcode$$constant);
2195     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2196   %}
2197 
2198   enc_class cmpfp_fixup() %{
2199       MacroAssembler _masm(&cbuf);
2200       emit_cmpfp_fixup(_masm);
2201   %}
2202 
2203   enc_class cmpfp3(rRegI dst)
2204   %{
2205     int dstenc = $dst$$reg;
2206 
2207     // movl $dst, -1
2208     if (dstenc >= 8) {
2209       emit_opcode(cbuf, Assembler::REX_B);
2210     }
2211     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2212     emit_d32(cbuf, -1);
2213 
2214     // jp,s done
2215     emit_opcode(cbuf, 0x7A);
2216     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2217 
2218     // jb,s done
2219     emit_opcode(cbuf, 0x72);
2220     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2221 
2222     // setne $dst
2223     if (dstenc >= 4) {
2224       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2225     }
2226     emit_opcode(cbuf, 0x0F);
2227     emit_opcode(cbuf, 0x95);
2228     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2229 
2230     // movzbl $dst, $dst
2231     if (dstenc >= 4) {
2232       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2233     }
2234     emit_opcode(cbuf, 0x0F);
2235     emit_opcode(cbuf, 0xB6);
2236     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2237   %}
2238 
2239   enc_class cdql_enc(no_rax_rdx_RegI div)
2240   %{
2241     // Full implementation of Java idiv and irem; checks for
2242     // special case as described in JVM spec., p.243 & p.271.
2243     //
2244     //         normal case                           special case
2245     //
2246     // input : rax: dividend                         min_int
2247     //         reg: divisor                          -1
2248     //
2249     // output: rax: quotient  (= rax idiv reg)       min_int
2250     //         rdx: remainder (= rax irem reg)       0
2251     //
2252     //  Code sequnce:
2253     //
2254     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2255     //    5:   75 07/08                jne    e <normal>
2256     //    7:   33 d2                   xor    %edx,%edx
2257     //  [div >= 8 -> offset + 1]
2258     //  [REX_B]
2259     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2260     //    c:   74 03/04                je     11 <done>
2261     // 000000000000000e <normal>:
2262     //    e:   99                      cltd
2263     //  [div >= 8 -> offset + 1]
2264     //  [REX_B]
2265     //    f:   f7 f9                   idiv   $div
2266     // 0000000000000011 <done>:
2267 
2268     // cmp    $0x80000000,%eax
2269     emit_opcode(cbuf, 0x3d);
2270     emit_d8(cbuf, 0x00);
2271     emit_d8(cbuf, 0x00);
2272     emit_d8(cbuf, 0x00);
2273     emit_d8(cbuf, 0x80);
2274 
2275     // jne    e <normal>
2276     emit_opcode(cbuf, 0x75);
2277     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2278 
2279     // xor    %edx,%edx
2280     emit_opcode(cbuf, 0x33);
2281     emit_d8(cbuf, 0xD2);
2282 
2283     // cmp    $0xffffffffffffffff,%ecx
2284     if ($div$$reg >= 8) {
2285       emit_opcode(cbuf, Assembler::REX_B);
2286     }
2287     emit_opcode(cbuf, 0x83);
2288     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2289     emit_d8(cbuf, 0xFF);
2290 
2291     // je     11 <done>
2292     emit_opcode(cbuf, 0x74);
2293     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2294 
2295     // <normal>
2296     // cltd
2297     emit_opcode(cbuf, 0x99);
2298 
2299     // idivl (note: must be emitted by the user of this rule)
2300     // <done>
2301   %}
2302 
2303   enc_class cdqq_enc(no_rax_rdx_RegL div)
2304   %{
2305     // Full implementation of Java ldiv and lrem; checks for
2306     // special case as described in JVM spec., p.243 & p.271.
2307     //
2308     //         normal case                           special case
2309     //
2310     // input : rax: dividend                         min_long
2311     //         reg: divisor                          -1
2312     //
2313     // output: rax: quotient  (= rax idiv reg)       min_long
2314     //         rdx: remainder (= rax irem reg)       0
2315     //
2316     //  Code sequnce:
2317     //
2318     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2319     //    7:   00 00 80
2320     //    a:   48 39 d0                cmp    %rdx,%rax
2321     //    d:   75 08                   jne    17 <normal>
2322     //    f:   33 d2                   xor    %edx,%edx
2323     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2324     //   15:   74 05                   je     1c <done>
2325     // 0000000000000017 <normal>:
2326     //   17:   48 99                   cqto
2327     //   19:   48 f7 f9                idiv   $div
2328     // 000000000000001c <done>:
2329 
2330     // mov    $0x8000000000000000,%rdx
2331     emit_opcode(cbuf, Assembler::REX_W);
2332     emit_opcode(cbuf, 0xBA);
2333     emit_d8(cbuf, 0x00);
2334     emit_d8(cbuf, 0x00);
2335     emit_d8(cbuf, 0x00);
2336     emit_d8(cbuf, 0x00);
2337     emit_d8(cbuf, 0x00);
2338     emit_d8(cbuf, 0x00);
2339     emit_d8(cbuf, 0x00);
2340     emit_d8(cbuf, 0x80);
2341 
2342     // cmp    %rdx,%rax
2343     emit_opcode(cbuf, Assembler::REX_W);
2344     emit_opcode(cbuf, 0x39);
2345     emit_d8(cbuf, 0xD0);
2346 
2347     // jne    17 <normal>
2348     emit_opcode(cbuf, 0x75);
2349     emit_d8(cbuf, 0x08);
2350 
2351     // xor    %edx,%edx
2352     emit_opcode(cbuf, 0x33);
2353     emit_d8(cbuf, 0xD2);
2354 
2355     // cmp    $0xffffffffffffffff,$div
2356     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2357     emit_opcode(cbuf, 0x83);
2358     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2359     emit_d8(cbuf, 0xFF);
2360 
2361     // je     1e <done>
2362     emit_opcode(cbuf, 0x74);
2363     emit_d8(cbuf, 0x05);
2364 
2365     // <normal>
2366     // cqto
2367     emit_opcode(cbuf, Assembler::REX_W);
2368     emit_opcode(cbuf, 0x99);
2369 
2370     // idivq (note: must be emitted by the user of this rule)
2371     // <done>
2372   %}
2373 
2374   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2375   enc_class OpcSE(immI imm)
2376   %{
2377     // Emit primary opcode and set sign-extend bit
2378     // Check for 8-bit immediate, and set sign extend bit in opcode
2379     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2380       emit_opcode(cbuf, $primary | 0x02);
2381     } else {
2382       // 32-bit immediate
2383       emit_opcode(cbuf, $primary);
2384     }
2385   %}
2386 
2387   enc_class OpcSErm(rRegI dst, immI imm)
2388   %{
2389     // OpcSEr/m
2390     int dstenc = $dst$$reg;
2391     if (dstenc >= 8) {
2392       emit_opcode(cbuf, Assembler::REX_B);
2393       dstenc -= 8;
2394     }
2395     // Emit primary opcode and set sign-extend bit
2396     // Check for 8-bit immediate, and set sign extend bit in opcode
2397     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2398       emit_opcode(cbuf, $primary | 0x02);
2399     } else {
2400       // 32-bit immediate
2401       emit_opcode(cbuf, $primary);
2402     }
2403     // Emit r/m byte with secondary opcode, after primary opcode.
2404     emit_rm(cbuf, 0x3, $secondary, dstenc);
2405   %}
2406 
2407   enc_class OpcSErm_wide(rRegL dst, immI imm)
2408   %{
2409     // OpcSEr/m
2410     int dstenc = $dst$$reg;
2411     if (dstenc < 8) {
2412       emit_opcode(cbuf, Assembler::REX_W);
2413     } else {
2414       emit_opcode(cbuf, Assembler::REX_WB);
2415       dstenc -= 8;
2416     }
2417     // Emit primary opcode and set sign-extend bit
2418     // Check for 8-bit immediate, and set sign extend bit in opcode
2419     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2420       emit_opcode(cbuf, $primary | 0x02);
2421     } else {
2422       // 32-bit immediate
2423       emit_opcode(cbuf, $primary);
2424     }
2425     // Emit r/m byte with secondary opcode, after primary opcode.
2426     emit_rm(cbuf, 0x3, $secondary, dstenc);
2427   %}
2428 
2429   enc_class Con8or32(immI imm)
2430   %{
2431     // Check for 8-bit immediate, and set sign extend bit in opcode
2432     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2433       $$$emit8$imm$$constant;
2434     } else {
2435       // 32-bit immediate
2436       $$$emit32$imm$$constant;
2437     }
2438   %}
2439 
2440   enc_class opc2_reg(rRegI dst)
2441   %{
2442     // BSWAP
2443     emit_cc(cbuf, $secondary, $dst$$reg);
2444   %}
2445 
2446   enc_class opc3_reg(rRegI dst)
2447   %{
2448     // BSWAP
2449     emit_cc(cbuf, $tertiary, $dst$$reg);
2450   %}
2451 
2452   enc_class reg_opc(rRegI div)
2453   %{
2454     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2455     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2456   %}
2457 
2458   enc_class enc_cmov(cmpOp cop)
2459   %{
2460     // CMOV
2461     $$$emit8$primary;
2462     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2463   %}
2464 
2465   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2466   %{
2467     // Invert sense of branch from sense of cmov
2468     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2469     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2470                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2471                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2472     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2473     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2474     if ($dst$$reg < 8) {
2475       if ($src$$reg >= 8) {
2476         emit_opcode(cbuf, Assembler::REX_B);
2477       }
2478     } else {
2479       if ($src$$reg < 8) {
2480         emit_opcode(cbuf, Assembler::REX_R);
2481       } else {
2482         emit_opcode(cbuf, Assembler::REX_RB);
2483       }
2484     }
2485     emit_opcode(cbuf, 0x0F);
2486     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2487     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2488   %}
2489 
2490   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2491   %{
2492     // Invert sense of branch from sense of cmov
2493     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2494     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2495 
2496     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2497     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2498     if ($dst$$reg < 8) {
2499       if ($src$$reg >= 8) {
2500         emit_opcode(cbuf, Assembler::REX_B);
2501       }
2502     } else {
2503       if ($src$$reg < 8) {
2504         emit_opcode(cbuf, Assembler::REX_R);
2505       } else {
2506         emit_opcode(cbuf, Assembler::REX_RB);
2507       }
2508     }
2509     emit_opcode(cbuf, 0x0F);
2510     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2511     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2512   %}
2513 
2514   enc_class enc_PartialSubtypeCheck()
2515   %{
2516     Register Rrdi = as_Register(RDI_enc); // result register
2517     Register Rrax = as_Register(RAX_enc); // super class
2518     Register Rrcx = as_Register(RCX_enc); // killed
2519     Register Rrsi = as_Register(RSI_enc); // sub class
2520     Label miss;
2521     const bool set_cond_codes = true;
2522 
2523     MacroAssembler _masm(&cbuf);
2524     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2525                                      NULL, &miss,
2526                                      /*set_cond_codes:*/ true);
2527     if ($primary) {
2528       __ xorptr(Rrdi, Rrdi);
2529     }
2530     __ bind(miss);
2531   %}
2532 
2533   enc_class Java_To_Interpreter(method meth)
2534   %{
2535     // CALL Java_To_Interpreter
2536     // This is the instruction starting address for relocation info.
2537     cbuf.set_insts_mark();
2538     $$$emit8$primary;
2539     // CALL directly to the runtime
2540     emit_d32_reloc(cbuf,
2541                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2542                    runtime_call_Relocation::spec(),
2543                    RELOC_DISP32);
2544   %}
2545 
2546   enc_class preserve_SP %{
2547     debug_only(int off0 = cbuf.insts_size());
2548     MacroAssembler _masm(&cbuf);
2549     // RBP is preserved across all calls, even compiled calls.
2550     // Use it to preserve RSP in places where the callee might change the SP.
2551     __ movptr(rbp_mh_SP_save, rsp);
2552     debug_only(int off1 = cbuf.insts_size());
2553     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2554   %}
2555 
2556   enc_class restore_SP %{
2557     MacroAssembler _masm(&cbuf);
2558     __ movptr(rsp, rbp_mh_SP_save);
2559   %}
2560 
2561   enc_class Java_Static_Call(method meth)
2562   %{
2563     // JAVA STATIC CALL
2564     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2565     // determine who we intended to call.
2566     cbuf.set_insts_mark();
2567     $$$emit8$primary;
2568 
2569     if (!_method) {
2570       emit_d32_reloc(cbuf,
2571                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2572                      runtime_call_Relocation::spec(),
2573                      RELOC_DISP32);
2574     } else if (_optimized_virtual) {
2575       emit_d32_reloc(cbuf,
2576                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2577                      opt_virtual_call_Relocation::spec(),
2578                      RELOC_DISP32);
2579     } else {
2580       emit_d32_reloc(cbuf,
2581                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2582                      static_call_Relocation::spec(),
2583                      RELOC_DISP32);
2584     }
2585     if (_method) {
2586       // Emit stub for static call
2587       emit_java_to_interp(cbuf);
2588     }
2589   %}
2590 
2591   enc_class Java_Dynamic_Call(method meth)
2592   %{
2593     // JAVA DYNAMIC CALL
2594     // !!!!!
2595     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2596     // emit_call_dynamic_prologue( cbuf );
2597     cbuf.set_insts_mark();
2598 
2599     // movq rax, -1
2600     emit_opcode(cbuf, Assembler::REX_W);
2601     emit_opcode(cbuf, 0xB8 | RAX_enc);
2602     emit_d64_reloc(cbuf,
2603                    (int64_t) Universe::non_oop_word(),
2604                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2605     address virtual_call_oop_addr = cbuf.insts_mark();
2606     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2607     // who we intended to call.
2608     cbuf.set_insts_mark();
2609     $$$emit8$primary;
2610     emit_d32_reloc(cbuf,
2611                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2612                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2613                    RELOC_DISP32);
2614   %}
2615 
2616   enc_class Java_Compiled_Call(method meth)
2617   %{
2618     // JAVA COMPILED CALL
2619     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2620 
2621     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2622     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2623 
2624     // callq *disp(%rax)
2625     cbuf.set_insts_mark();
2626     $$$emit8$primary;
2627     if (disp < 0x80) {
2628       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2629       emit_d8(cbuf, disp); // Displacement
2630     } else {
2631       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2632       emit_d32(cbuf, disp); // Displacement
2633     }
2634   %}
2635 
2636   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2637   %{
2638     // SAL, SAR, SHR
2639     int dstenc = $dst$$reg;
2640     if (dstenc >= 8) {
2641       emit_opcode(cbuf, Assembler::REX_B);
2642       dstenc -= 8;
2643     }
2644     $$$emit8$primary;
2645     emit_rm(cbuf, 0x3, $secondary, dstenc);
2646     $$$emit8$shift$$constant;
2647   %}
2648 
2649   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2650   %{
2651     // SAL, SAR, SHR
2652     int dstenc = $dst$$reg;
2653     if (dstenc < 8) {
2654       emit_opcode(cbuf, Assembler::REX_W);
2655     } else {
2656       emit_opcode(cbuf, Assembler::REX_WB);
2657       dstenc -= 8;
2658     }
2659     $$$emit8$primary;
2660     emit_rm(cbuf, 0x3, $secondary, dstenc);
2661     $$$emit8$shift$$constant;
2662   %}
2663 
2664   enc_class load_immI(rRegI dst, immI src)
2665   %{
2666     int dstenc = $dst$$reg;
2667     if (dstenc >= 8) {
2668       emit_opcode(cbuf, Assembler::REX_B);
2669       dstenc -= 8;
2670     }
2671     emit_opcode(cbuf, 0xB8 | dstenc);
2672     $$$emit32$src$$constant;
2673   %}
2674 
2675   enc_class load_immL(rRegL dst, immL src)
2676   %{
2677     int dstenc = $dst$$reg;
2678     if (dstenc < 8) {
2679       emit_opcode(cbuf, Assembler::REX_W);
2680     } else {
2681       emit_opcode(cbuf, Assembler::REX_WB);
2682       dstenc -= 8;
2683     }
2684     emit_opcode(cbuf, 0xB8 | dstenc);
2685     emit_d64(cbuf, $src$$constant);
2686   %}
2687 
2688   enc_class load_immUL32(rRegL dst, immUL32 src)
2689   %{
2690     // same as load_immI, but this time we care about zeroes in the high word
2691     int dstenc = $dst$$reg;
2692     if (dstenc >= 8) {
2693       emit_opcode(cbuf, Assembler::REX_B);
2694       dstenc -= 8;
2695     }
2696     emit_opcode(cbuf, 0xB8 | dstenc);
2697     $$$emit32$src$$constant;
2698   %}
2699 
2700   enc_class load_immL32(rRegL dst, immL32 src)
2701   %{
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     emit_opcode(cbuf, 0xC7);
2710     emit_rm(cbuf, 0x03, 0x00, dstenc);
2711     $$$emit32$src$$constant;
2712   %}
2713 
2714   enc_class load_immP31(rRegP dst, immP32 src)
2715   %{
2716     // same as load_immI, but this time we care about zeroes in the high word
2717     int dstenc = $dst$$reg;
2718     if (dstenc >= 8) {
2719       emit_opcode(cbuf, Assembler::REX_B);
2720       dstenc -= 8;
2721     }
2722     emit_opcode(cbuf, 0xB8 | dstenc);
2723     $$$emit32$src$$constant;
2724   %}
2725 
2726   enc_class load_immP(rRegP dst, immP src)
2727   %{
2728     int dstenc = $dst$$reg;
2729     if (dstenc < 8) {
2730       emit_opcode(cbuf, Assembler::REX_W);
2731     } else {
2732       emit_opcode(cbuf, Assembler::REX_WB);
2733       dstenc -= 8;
2734     }
2735     emit_opcode(cbuf, 0xB8 | dstenc);
2736     // This next line should be generated from ADLC
2737     if ($src->constant_is_oop()) {
2738       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2739     } else {
2740       emit_d64(cbuf, $src$$constant);
2741     }
2742   %}
2743 
2744   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2745   enc_class enc_copy(rRegI dst, rRegI src)
2746   %{
2747     encode_copy(cbuf, $dst$$reg, $src$$reg);
2748   %}
2749 
2750   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2751   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2752     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2753   %}
2754 
2755   enc_class enc_copy_always(rRegI dst, rRegI src)
2756   %{
2757     int srcenc = $src$$reg;
2758     int dstenc = $dst$$reg;
2759 
2760     if (dstenc < 8) {
2761       if (srcenc >= 8) {
2762         emit_opcode(cbuf, Assembler::REX_B);
2763         srcenc -= 8;
2764       }
2765     } else {
2766       if (srcenc < 8) {
2767         emit_opcode(cbuf, Assembler::REX_R);
2768       } else {
2769         emit_opcode(cbuf, Assembler::REX_RB);
2770         srcenc -= 8;
2771       }
2772       dstenc -= 8;
2773     }
2774 
2775     emit_opcode(cbuf, 0x8B);
2776     emit_rm(cbuf, 0x3, dstenc, srcenc);
2777   %}
2778 
2779   enc_class enc_copy_wide(rRegL dst, rRegL src)
2780   %{
2781     int srcenc = $src$$reg;
2782     int dstenc = $dst$$reg;
2783 
2784     if (dstenc != srcenc) {
2785       if (dstenc < 8) {
2786         if (srcenc < 8) {
2787           emit_opcode(cbuf, Assembler::REX_W);
2788         } else {
2789           emit_opcode(cbuf, Assembler::REX_WB);
2790           srcenc -= 8;
2791         }
2792       } else {
2793         if (srcenc < 8) {
2794           emit_opcode(cbuf, Assembler::REX_WR);
2795         } else {
2796           emit_opcode(cbuf, Assembler::REX_WRB);
2797           srcenc -= 8;
2798         }
2799         dstenc -= 8;
2800       }
2801       emit_opcode(cbuf, 0x8B);
2802       emit_rm(cbuf, 0x3, dstenc, srcenc);
2803     }
2804   %}
2805 
2806   enc_class Con32(immI src)
2807   %{
2808     // Output immediate
2809     $$$emit32$src$$constant;
2810   %}
2811 
2812   enc_class Con64(immL src)
2813   %{
2814     // Output immediate
2815     emit_d64($src$$constant);
2816   %}
2817 
2818   enc_class Con32F_as_bits(immF src)
2819   %{
2820     // Output Float immediate bits
2821     jfloat jf = $src$$constant;
2822     jint jf_as_bits = jint_cast(jf);
2823     emit_d32(cbuf, jf_as_bits);
2824   %}
2825 
2826   enc_class Con16(immI src)
2827   %{
2828     // Output immediate
2829     $$$emit16$src$$constant;
2830   %}
2831 
2832   // How is this different from Con32??? XXX
2833   enc_class Con_d32(immI src)
2834   %{
2835     emit_d32(cbuf,$src$$constant);
2836   %}
2837 
2838   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2839     // Output immediate memory reference
2840     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2841     emit_d32(cbuf, 0x00);
2842   %}
2843 
2844   enc_class lock_prefix()
2845   %{
2846     if (os::is_MP()) {
2847       emit_opcode(cbuf, 0xF0); // lock
2848     }
2849   %}
2850 
2851   enc_class REX_mem(memory mem)
2852   %{
2853     if ($mem$$base >= 8) {
2854       if ($mem$$index < 8) {
2855         emit_opcode(cbuf, Assembler::REX_B);
2856       } else {
2857         emit_opcode(cbuf, Assembler::REX_XB);
2858       }
2859     } else {
2860       if ($mem$$index >= 8) {
2861         emit_opcode(cbuf, Assembler::REX_X);
2862       }
2863     }
2864   %}
2865 
2866   enc_class REX_mem_wide(memory mem)
2867   %{
2868     if ($mem$$base >= 8) {
2869       if ($mem$$index < 8) {
2870         emit_opcode(cbuf, Assembler::REX_WB);
2871       } else {
2872         emit_opcode(cbuf, Assembler::REX_WXB);
2873       }
2874     } else {
2875       if ($mem$$index < 8) {
2876         emit_opcode(cbuf, Assembler::REX_W);
2877       } else {
2878         emit_opcode(cbuf, Assembler::REX_WX);
2879       }
2880     }
2881   %}
2882 
2883   // for byte regs
2884   enc_class REX_breg(rRegI reg)
2885   %{
2886     if ($reg$$reg >= 4) {
2887       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2888     }
2889   %}
2890 
2891   // for byte regs
2892   enc_class REX_reg_breg(rRegI dst, rRegI src)
2893   %{
2894     if ($dst$$reg < 8) {
2895       if ($src$$reg >= 4) {
2896         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2897       }
2898     } else {
2899       if ($src$$reg < 8) {
2900         emit_opcode(cbuf, Assembler::REX_R);
2901       } else {
2902         emit_opcode(cbuf, Assembler::REX_RB);
2903       }
2904     }
2905   %}
2906 
2907   // for byte regs
2908   enc_class REX_breg_mem(rRegI reg, memory mem)
2909   %{
2910     if ($reg$$reg < 8) {
2911       if ($mem$$base < 8) {
2912         if ($mem$$index >= 8) {
2913           emit_opcode(cbuf, Assembler::REX_X);
2914         } else if ($reg$$reg >= 4) {
2915           emit_opcode(cbuf, Assembler::REX);
2916         }
2917       } else {
2918         if ($mem$$index < 8) {
2919           emit_opcode(cbuf, Assembler::REX_B);
2920         } else {
2921           emit_opcode(cbuf, Assembler::REX_XB);
2922         }
2923       }
2924     } else {
2925       if ($mem$$base < 8) {
2926         if ($mem$$index < 8) {
2927           emit_opcode(cbuf, Assembler::REX_R);
2928         } else {
2929           emit_opcode(cbuf, Assembler::REX_RX);
2930         }
2931       } else {
2932         if ($mem$$index < 8) {
2933           emit_opcode(cbuf, Assembler::REX_RB);
2934         } else {
2935           emit_opcode(cbuf, Assembler::REX_RXB);
2936         }
2937       }
2938     }
2939   %}
2940 
2941   enc_class REX_reg(rRegI reg)
2942   %{
2943     if ($reg$$reg >= 8) {
2944       emit_opcode(cbuf, Assembler::REX_B);
2945     }
2946   %}
2947 
2948   enc_class REX_reg_wide(rRegI reg)
2949   %{
2950     if ($reg$$reg < 8) {
2951       emit_opcode(cbuf, Assembler::REX_W);
2952     } else {
2953       emit_opcode(cbuf, Assembler::REX_WB);
2954     }
2955   %}
2956 
2957   enc_class REX_reg_reg(rRegI dst, rRegI src)
2958   %{
2959     if ($dst$$reg < 8) {
2960       if ($src$$reg >= 8) {
2961         emit_opcode(cbuf, Assembler::REX_B);
2962       }
2963     } else {
2964       if ($src$$reg < 8) {
2965         emit_opcode(cbuf, Assembler::REX_R);
2966       } else {
2967         emit_opcode(cbuf, Assembler::REX_RB);
2968       }
2969     }
2970   %}
2971 
2972   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2973   %{
2974     if ($dst$$reg < 8) {
2975       if ($src$$reg < 8) {
2976         emit_opcode(cbuf, Assembler::REX_W);
2977       } else {
2978         emit_opcode(cbuf, Assembler::REX_WB);
2979       }
2980     } else {
2981       if ($src$$reg < 8) {
2982         emit_opcode(cbuf, Assembler::REX_WR);
2983       } else {
2984         emit_opcode(cbuf, Assembler::REX_WRB);
2985       }
2986     }
2987   %}
2988 
2989   enc_class REX_reg_mem(rRegI reg, memory mem)
2990   %{
2991     if ($reg$$reg < 8) {
2992       if ($mem$$base < 8) {
2993         if ($mem$$index >= 8) {
2994           emit_opcode(cbuf, Assembler::REX_X);
2995         }
2996       } else {
2997         if ($mem$$index < 8) {
2998           emit_opcode(cbuf, Assembler::REX_B);
2999         } else {
3000           emit_opcode(cbuf, Assembler::REX_XB);
3001         }
3002       }
3003     } else {
3004       if ($mem$$base < 8) {
3005         if ($mem$$index < 8) {
3006           emit_opcode(cbuf, Assembler::REX_R);
3007         } else {
3008           emit_opcode(cbuf, Assembler::REX_RX);
3009         }
3010       } else {
3011         if ($mem$$index < 8) {
3012           emit_opcode(cbuf, Assembler::REX_RB);
3013         } else {
3014           emit_opcode(cbuf, Assembler::REX_RXB);
3015         }
3016       }
3017     }
3018   %}
3019 
3020   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3021   %{
3022     if ($reg$$reg < 8) {
3023       if ($mem$$base < 8) {
3024         if ($mem$$index < 8) {
3025           emit_opcode(cbuf, Assembler::REX_W);
3026         } else {
3027           emit_opcode(cbuf, Assembler::REX_WX);
3028         }
3029       } else {
3030         if ($mem$$index < 8) {
3031           emit_opcode(cbuf, Assembler::REX_WB);
3032         } else {
3033           emit_opcode(cbuf, Assembler::REX_WXB);
3034         }
3035       }
3036     } else {
3037       if ($mem$$base < 8) {
3038         if ($mem$$index < 8) {
3039           emit_opcode(cbuf, Assembler::REX_WR);
3040         } else {
3041           emit_opcode(cbuf, Assembler::REX_WRX);
3042         }
3043       } else {
3044         if ($mem$$index < 8) {
3045           emit_opcode(cbuf, Assembler::REX_WRB);
3046         } else {
3047           emit_opcode(cbuf, Assembler::REX_WRXB);
3048         }
3049       }
3050     }
3051   %}
3052 
3053   enc_class reg_mem(rRegI ereg, memory mem)
3054   %{
3055     // High registers handle in encode_RegMem
3056     int reg = $ereg$$reg;
3057     int base = $mem$$base;
3058     int index = $mem$$index;
3059     int scale = $mem$$scale;
3060     int disp = $mem$$disp;
3061     bool disp_is_oop = $mem->disp_is_oop();
3062 
3063     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3064   %}
3065 
3066   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3067   %{
3068     int rm_byte_opcode = $rm_opcode$$constant;
3069 
3070     // High registers handle in encode_RegMem
3071     int base = $mem$$base;
3072     int index = $mem$$index;
3073     int scale = $mem$$scale;
3074     int displace = $mem$$disp;
3075 
3076     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3077                                             // working with static
3078                                             // globals
3079     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3080                   disp_is_oop);
3081   %}
3082 
3083   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3084   %{
3085     int reg_encoding = $dst$$reg;
3086     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3087     int index        = 0x04;            // 0x04 indicates no index
3088     int scale        = 0x00;            // 0x00 indicates no scale
3089     int displace     = $src1$$constant; // 0x00 indicates no displacement
3090     bool disp_is_oop = false;
3091     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3092                   disp_is_oop);
3093   %}
3094 
3095   enc_class neg_reg(rRegI dst)
3096   %{
3097     int dstenc = $dst$$reg;
3098     if (dstenc >= 8) {
3099       emit_opcode(cbuf, Assembler::REX_B);
3100       dstenc -= 8;
3101     }
3102     // NEG $dst
3103     emit_opcode(cbuf, 0xF7);
3104     emit_rm(cbuf, 0x3, 0x03, dstenc);
3105   %}
3106 
3107   enc_class neg_reg_wide(rRegI dst)
3108   %{
3109     int dstenc = $dst$$reg;
3110     if (dstenc < 8) {
3111       emit_opcode(cbuf, Assembler::REX_W);
3112     } else {
3113       emit_opcode(cbuf, Assembler::REX_WB);
3114       dstenc -= 8;
3115     }
3116     // NEG $dst
3117     emit_opcode(cbuf, 0xF7);
3118     emit_rm(cbuf, 0x3, 0x03, dstenc);
3119   %}
3120 
3121   enc_class setLT_reg(rRegI dst)
3122   %{
3123     int dstenc = $dst$$reg;
3124     if (dstenc >= 8) {
3125       emit_opcode(cbuf, Assembler::REX_B);
3126       dstenc -= 8;
3127     } else if (dstenc >= 4) {
3128       emit_opcode(cbuf, Assembler::REX);
3129     }
3130     // SETLT $dst
3131     emit_opcode(cbuf, 0x0F);
3132     emit_opcode(cbuf, 0x9C);
3133     emit_rm(cbuf, 0x3, 0x0, dstenc);
3134   %}
3135 
3136   enc_class setNZ_reg(rRegI dst)
3137   %{
3138     int dstenc = $dst$$reg;
3139     if (dstenc >= 8) {
3140       emit_opcode(cbuf, Assembler::REX_B);
3141       dstenc -= 8;
3142     } else if (dstenc >= 4) {
3143       emit_opcode(cbuf, Assembler::REX);
3144     }
3145     // SETNZ $dst
3146     emit_opcode(cbuf, 0x0F);
3147     emit_opcode(cbuf, 0x95);
3148     emit_rm(cbuf, 0x3, 0x0, dstenc);
3149   %}
3150 
3151 
3152   // Compare the lonogs and set -1, 0, or 1 into dst
3153   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3154   %{
3155     int src1enc = $src1$$reg;
3156     int src2enc = $src2$$reg;
3157     int dstenc = $dst$$reg;
3158 
3159     // cmpq $src1, $src2
3160     if (src1enc < 8) {
3161       if (src2enc < 8) {
3162         emit_opcode(cbuf, Assembler::REX_W);
3163       } else {
3164         emit_opcode(cbuf, Assembler::REX_WB);
3165       }
3166     } else {
3167       if (src2enc < 8) {
3168         emit_opcode(cbuf, Assembler::REX_WR);
3169       } else {
3170         emit_opcode(cbuf, Assembler::REX_WRB);
3171       }
3172     }
3173     emit_opcode(cbuf, 0x3B);
3174     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3175 
3176     // movl $dst, -1
3177     if (dstenc >= 8) {
3178       emit_opcode(cbuf, Assembler::REX_B);
3179     }
3180     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3181     emit_d32(cbuf, -1);
3182 
3183     // jl,s done
3184     emit_opcode(cbuf, 0x7C);
3185     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3186 
3187     // setne $dst
3188     if (dstenc >= 4) {
3189       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3190     }
3191     emit_opcode(cbuf, 0x0F);
3192     emit_opcode(cbuf, 0x95);
3193     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3194 
3195     // movzbl $dst, $dst
3196     if (dstenc >= 4) {
3197       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3198     }
3199     emit_opcode(cbuf, 0x0F);
3200     emit_opcode(cbuf, 0xB6);
3201     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3202   %}
3203 
3204   enc_class Push_ResultXD(regD dst) %{
3205     int dstenc = $dst$$reg;
3206 
3207     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3208 
3209     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3210     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3211     if (dstenc >= 8) {
3212       emit_opcode(cbuf, Assembler::REX_R);
3213     }
3214     emit_opcode  (cbuf, 0x0F );
3215     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3216     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3217 
3218     // add rsp,8
3219     emit_opcode(cbuf, Assembler::REX_W);
3220     emit_opcode(cbuf,0x83);
3221     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3222     emit_d8(cbuf,0x08);
3223   %}
3224 
3225   enc_class Push_SrcXD(regD src) %{
3226     int srcenc = $src$$reg;
3227 
3228     // subq rsp,#8
3229     emit_opcode(cbuf, Assembler::REX_W);
3230     emit_opcode(cbuf, 0x83);
3231     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3232     emit_d8(cbuf, 0x8);
3233 
3234     // movsd [rsp],src
3235     emit_opcode(cbuf, 0xF2);
3236     if (srcenc >= 8) {
3237       emit_opcode(cbuf, Assembler::REX_R);
3238     }
3239     emit_opcode(cbuf, 0x0F);
3240     emit_opcode(cbuf, 0x11);
3241     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3242 
3243     // fldd [rsp]
3244     emit_opcode(cbuf, 0x66);
3245     emit_opcode(cbuf, 0xDD);
3246     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3247   %}
3248 
3249 
3250   enc_class movq_ld(regD dst, memory mem) %{
3251     MacroAssembler _masm(&cbuf);
3252     __ movq($dst$$XMMRegister, $mem$$Address);
3253   %}
3254 
3255   enc_class movq_st(memory mem, regD src) %{
3256     MacroAssembler _masm(&cbuf);
3257     __ movq($mem$$Address, $src$$XMMRegister);
3258   %}
3259 
3260   enc_class pshufd_8x8(regF dst, regF src) %{
3261     MacroAssembler _masm(&cbuf);
3262 
3263     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3264     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3265     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3266   %}
3267 
3268   enc_class pshufd_4x16(regF dst, regF src) %{
3269     MacroAssembler _masm(&cbuf);
3270 
3271     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3272   %}
3273 
3274   enc_class pshufd(regD dst, regD src, int mode) %{
3275     MacroAssembler _masm(&cbuf);
3276 
3277     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3278   %}
3279 
3280   enc_class pxor(regD dst, regD src) %{
3281     MacroAssembler _masm(&cbuf);
3282 
3283     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3284   %}
3285 
3286   enc_class mov_i2x(regD dst, rRegI src) %{
3287     MacroAssembler _masm(&cbuf);
3288 
3289     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3290   %}
3291 
3292   // obj: object to lock
3293   // box: box address (header location) -- killed
3294   // tmp: rax -- killed
3295   // scr: rbx -- killed
3296   //
3297   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3298   // from i486.ad.  See that file for comments.
3299   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3300   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3301 
3302 
3303   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3304   %{
3305     Register objReg = as_Register((int)$obj$$reg);
3306     Register boxReg = as_Register((int)$box$$reg);
3307     Register tmpReg = as_Register($tmp$$reg);
3308     Register scrReg = as_Register($scr$$reg);
3309     MacroAssembler masm(&cbuf);
3310 
3311     // Verify uniqueness of register assignments -- necessary but not sufficient
3312     assert (objReg != boxReg && objReg != tmpReg &&
3313             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3314 
3315     if (_counters != NULL) {
3316       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3317     }
3318     if (EmitSync & 1) {
3319         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3320         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3321         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3322     } else
3323     if (EmitSync & 2) {
3324         Label DONE_LABEL;
3325         if (UseBiasedLocking) {
3326            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3327           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3328         }
3329         // QQQ was movl...
3330         masm.movptr(tmpReg, 0x1);
3331         masm.orptr(tmpReg, Address(objReg, 0));
3332         masm.movptr(Address(boxReg, 0), tmpReg);
3333         if (os::is_MP()) {
3334           masm.lock();
3335         }
3336         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3337         masm.jcc(Assembler::equal, DONE_LABEL);
3338 
3339         // Recursive locking
3340         masm.subptr(tmpReg, rsp);
3341         masm.andptr(tmpReg, 7 - os::vm_page_size());
3342         masm.movptr(Address(boxReg, 0), tmpReg);
3343 
3344         masm.bind(DONE_LABEL);
3345         masm.nop(); // avoid branch to branch
3346     } else {
3347         Label DONE_LABEL, IsInflated, Egress;
3348 
3349         masm.movptr(tmpReg, Address(objReg, 0)) ;
3350         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3351         masm.jcc   (Assembler::notZero, IsInflated) ;
3352 
3353         // it's stack-locked, biased or neutral
3354         // TODO: optimize markword triage order to reduce the number of
3355         // conditional branches in the most common cases.
3356         // Beware -- there's a subtle invariant that fetch of the markword
3357         // at [FETCH], below, will never observe a biased encoding (*101b).
3358         // If this invariant is not held we'll suffer exclusion (safety) failure.
3359 
3360         if (UseBiasedLocking && !UseOptoBiasInlining) {
3361           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3362           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3363         }
3364 
3365         // was q will it destroy high?
3366         masm.orl   (tmpReg, 1) ;
3367         masm.movptr(Address(boxReg, 0), tmpReg) ;
3368         if (os::is_MP()) { masm.lock(); }
3369         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3370         if (_counters != NULL) {
3371            masm.cond_inc32(Assembler::equal,
3372                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3373         }
3374         masm.jcc   (Assembler::equal, DONE_LABEL);
3375 
3376         // Recursive locking
3377         masm.subptr(tmpReg, rsp);
3378         masm.andptr(tmpReg, 7 - os::vm_page_size());
3379         masm.movptr(Address(boxReg, 0), tmpReg);
3380         if (_counters != NULL) {
3381            masm.cond_inc32(Assembler::equal,
3382                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3383         }
3384         masm.jmp   (DONE_LABEL) ;
3385 
3386         masm.bind  (IsInflated) ;
3387         // It's inflated
3388 
3389         // TODO: someday avoid the ST-before-CAS penalty by
3390         // relocating (deferring) the following ST.
3391         // We should also think about trying a CAS without having
3392         // fetched _owner.  If the CAS is successful we may
3393         // avoid an RTO->RTS upgrade on the $line.
3394         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3395         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3396 
3397         masm.mov    (boxReg, tmpReg) ;
3398         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3399         masm.testptr(tmpReg, tmpReg) ;
3400         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3401 
3402         // It's inflated and appears unlocked
3403         if (os::is_MP()) { masm.lock(); }
3404         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3405         // Intentional fall-through into DONE_LABEL ...
3406 
3407         masm.bind  (DONE_LABEL) ;
3408         masm.nop   () ;                 // avoid jmp to jmp
3409     }
3410   %}
3411 
3412   // obj: object to unlock
3413   // box: box address (displaced header location), killed
3414   // RBX: killed tmp; cannot be obj nor box
3415   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3416   %{
3417 
3418     Register objReg = as_Register($obj$$reg);
3419     Register boxReg = as_Register($box$$reg);
3420     Register tmpReg = as_Register($tmp$$reg);
3421     MacroAssembler masm(&cbuf);
3422 
3423     if (EmitSync & 4) {
3424        masm.cmpptr(rsp, 0) ;
3425     } else
3426     if (EmitSync & 8) {
3427        Label DONE_LABEL;
3428        if (UseBiasedLocking) {
3429          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3430        }
3431 
3432        // Check whether the displaced header is 0
3433        //(=> recursive unlock)
3434        masm.movptr(tmpReg, Address(boxReg, 0));
3435        masm.testptr(tmpReg, tmpReg);
3436        masm.jcc(Assembler::zero, DONE_LABEL);
3437 
3438        // If not recursive lock, reset the header to displaced header
3439        if (os::is_MP()) {
3440          masm.lock();
3441        }
3442        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3443        masm.bind(DONE_LABEL);
3444        masm.nop(); // avoid branch to branch
3445     } else {
3446        Label DONE_LABEL, Stacked, CheckSucc ;
3447 
3448        if (UseBiasedLocking && !UseOptoBiasInlining) {
3449          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3450        }
3451 
3452        masm.movptr(tmpReg, Address(objReg, 0)) ;
3453        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3454        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3455        masm.testl (tmpReg, 0x02) ;
3456        masm.jcc   (Assembler::zero, Stacked) ;
3457 
3458        // It's inflated
3459        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3460        masm.xorptr(boxReg, r15_thread) ;
3461        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3462        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3463        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3464        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3465        masm.jcc   (Assembler::notZero, CheckSucc) ;
3466        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3467        masm.jmp   (DONE_LABEL) ;
3468 
3469        if ((EmitSync & 65536) == 0) {
3470          Label LSuccess, LGoSlowPath ;
3471          masm.bind  (CheckSucc) ;
3472          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3473          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3474 
3475          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3476          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3477          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3478          // are all faster when the write buffer is populated.
3479          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3480          if (os::is_MP()) {
3481             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3482          }
3483          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3484          masm.jcc   (Assembler::notZero, LSuccess) ;
3485 
3486          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3487          if (os::is_MP()) { masm.lock(); }
3488          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3489          masm.jcc   (Assembler::notEqual, LSuccess) ;
3490          // Intentional fall-through into slow-path
3491 
3492          masm.bind  (LGoSlowPath) ;
3493          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3494          masm.jmp   (DONE_LABEL) ;
3495 
3496          masm.bind  (LSuccess) ;
3497          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3498          masm.jmp   (DONE_LABEL) ;
3499        }
3500 
3501        masm.bind  (Stacked) ;
3502        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3503        if (os::is_MP()) { masm.lock(); }
3504        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3505 
3506        if (EmitSync & 65536) {
3507           masm.bind (CheckSucc) ;
3508        }
3509        masm.bind(DONE_LABEL);
3510        if (EmitSync & 32768) {
3511           masm.nop();                      // avoid branch to branch
3512        }
3513     }
3514   %}
3515 
3516 
3517   enc_class enc_rethrow()
3518   %{
3519     cbuf.set_insts_mark();
3520     emit_opcode(cbuf, 0xE9); // jmp entry
3521     emit_d32_reloc(cbuf,
3522                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3523                    runtime_call_Relocation::spec(),
3524                    RELOC_DISP32);
3525   %}
3526 
3527   enc_class absF_encoding(regF dst)
3528   %{
3529     int dstenc = $dst$$reg;
3530     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3531 
3532     cbuf.set_insts_mark();
3533     if (dstenc >= 8) {
3534       emit_opcode(cbuf, Assembler::REX_R);
3535       dstenc -= 8;
3536     }
3537     // XXX reg_mem doesn't support RIP-relative addressing yet
3538     emit_opcode(cbuf, 0x0F);
3539     emit_opcode(cbuf, 0x54);
3540     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3541     emit_d32_reloc(cbuf, signmask_address);
3542   %}
3543 
3544   enc_class absD_encoding(regD dst)
3545   %{
3546     int dstenc = $dst$$reg;
3547     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3548 
3549     cbuf.set_insts_mark();
3550     emit_opcode(cbuf, 0x66);
3551     if (dstenc >= 8) {
3552       emit_opcode(cbuf, Assembler::REX_R);
3553       dstenc -= 8;
3554     }
3555     // XXX reg_mem doesn't support RIP-relative addressing yet
3556     emit_opcode(cbuf, 0x0F);
3557     emit_opcode(cbuf, 0x54);
3558     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3559     emit_d32_reloc(cbuf, signmask_address);
3560   %}
3561 
3562   enc_class negF_encoding(regF dst)
3563   %{
3564     int dstenc = $dst$$reg;
3565     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3566 
3567     cbuf.set_insts_mark();
3568     if (dstenc >= 8) {
3569       emit_opcode(cbuf, Assembler::REX_R);
3570       dstenc -= 8;
3571     }
3572     // XXX reg_mem doesn't support RIP-relative addressing yet
3573     emit_opcode(cbuf, 0x0F);
3574     emit_opcode(cbuf, 0x57);
3575     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3576     emit_d32_reloc(cbuf, signflip_address);
3577   %}
3578 
3579   enc_class negD_encoding(regD dst)
3580   %{
3581     int dstenc = $dst$$reg;
3582     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3583 
3584     cbuf.set_insts_mark();
3585     emit_opcode(cbuf, 0x66);
3586     if (dstenc >= 8) {
3587       emit_opcode(cbuf, Assembler::REX_R);
3588       dstenc -= 8;
3589     }
3590     // XXX reg_mem doesn't support RIP-relative addressing yet
3591     emit_opcode(cbuf, 0x0F);
3592     emit_opcode(cbuf, 0x57);
3593     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3594     emit_d32_reloc(cbuf, signflip_address);
3595   %}
3596 
3597   enc_class f2i_fixup(rRegI dst, regF src)
3598   %{
3599     int dstenc = $dst$$reg;
3600     int srcenc = $src$$reg;
3601 
3602     // cmpl $dst, #0x80000000
3603     if (dstenc >= 8) {
3604       emit_opcode(cbuf, Assembler::REX_B);
3605     }
3606     emit_opcode(cbuf, 0x81);
3607     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3608     emit_d32(cbuf, 0x80000000);
3609 
3610     // jne,s done
3611     emit_opcode(cbuf, 0x75);
3612     if (srcenc < 8 && dstenc < 8) {
3613       emit_d8(cbuf, 0xF);
3614     } else if (srcenc >= 8 && dstenc >= 8) {
3615       emit_d8(cbuf, 0x11);
3616     } else {
3617       emit_d8(cbuf, 0x10);
3618     }
3619 
3620     // subq rsp, #8
3621     emit_opcode(cbuf, Assembler::REX_W);
3622     emit_opcode(cbuf, 0x83);
3623     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3624     emit_d8(cbuf, 8);
3625 
3626     // movss [rsp], $src
3627     emit_opcode(cbuf, 0xF3);
3628     if (srcenc >= 8) {
3629       emit_opcode(cbuf, Assembler::REX_R);
3630     }
3631     emit_opcode(cbuf, 0x0F);
3632     emit_opcode(cbuf, 0x11);
3633     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3634 
3635     // call f2i_fixup
3636     cbuf.set_insts_mark();
3637     emit_opcode(cbuf, 0xE8);
3638     emit_d32_reloc(cbuf,
3639                    (int)
3640                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3641                    runtime_call_Relocation::spec(),
3642                    RELOC_DISP32);
3643 
3644     // popq $dst
3645     if (dstenc >= 8) {
3646       emit_opcode(cbuf, Assembler::REX_B);
3647     }
3648     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3649 
3650     // done:
3651   %}
3652 
3653   enc_class f2l_fixup(rRegL dst, regF src)
3654   %{
3655     int dstenc = $dst$$reg;
3656     int srcenc = $src$$reg;
3657     address const_address = (address) StubRoutines::x86::double_sign_flip();
3658 
3659     // cmpq $dst, [0x8000000000000000]
3660     cbuf.set_insts_mark();
3661     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3662     emit_opcode(cbuf, 0x39);
3663     // XXX reg_mem doesn't support RIP-relative addressing yet
3664     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3665     emit_d32_reloc(cbuf, const_address);
3666 
3667 
3668     // jne,s done
3669     emit_opcode(cbuf, 0x75);
3670     if (srcenc < 8 && dstenc < 8) {
3671       emit_d8(cbuf, 0xF);
3672     } else if (srcenc >= 8 && dstenc >= 8) {
3673       emit_d8(cbuf, 0x11);
3674     } else {
3675       emit_d8(cbuf, 0x10);
3676     }
3677 
3678     // subq rsp, #8
3679     emit_opcode(cbuf, Assembler::REX_W);
3680     emit_opcode(cbuf, 0x83);
3681     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3682     emit_d8(cbuf, 8);
3683 
3684     // movss [rsp], $src
3685     emit_opcode(cbuf, 0xF3);
3686     if (srcenc >= 8) {
3687       emit_opcode(cbuf, Assembler::REX_R);
3688     }
3689     emit_opcode(cbuf, 0x0F);
3690     emit_opcode(cbuf, 0x11);
3691     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3692 
3693     // call f2l_fixup
3694     cbuf.set_insts_mark();
3695     emit_opcode(cbuf, 0xE8);
3696     emit_d32_reloc(cbuf,
3697                    (int)
3698                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3699                    runtime_call_Relocation::spec(),
3700                    RELOC_DISP32);
3701 
3702     // popq $dst
3703     if (dstenc >= 8) {
3704       emit_opcode(cbuf, Assembler::REX_B);
3705     }
3706     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3707 
3708     // done:
3709   %}
3710 
3711   enc_class d2i_fixup(rRegI dst, regD src)
3712   %{
3713     int dstenc = $dst$$reg;
3714     int srcenc = $src$$reg;
3715 
3716     // cmpl $dst, #0x80000000
3717     if (dstenc >= 8) {
3718       emit_opcode(cbuf, Assembler::REX_B);
3719     }
3720     emit_opcode(cbuf, 0x81);
3721     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3722     emit_d32(cbuf, 0x80000000);
3723 
3724     // jne,s done
3725     emit_opcode(cbuf, 0x75);
3726     if (srcenc < 8 && dstenc < 8) {
3727       emit_d8(cbuf, 0xF);
3728     } else if (srcenc >= 8 && dstenc >= 8) {
3729       emit_d8(cbuf, 0x11);
3730     } else {
3731       emit_d8(cbuf, 0x10);
3732     }
3733 
3734     // subq rsp, #8
3735     emit_opcode(cbuf, Assembler::REX_W);
3736     emit_opcode(cbuf, 0x83);
3737     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3738     emit_d8(cbuf, 8);
3739 
3740     // movsd [rsp], $src
3741     emit_opcode(cbuf, 0xF2);
3742     if (srcenc >= 8) {
3743       emit_opcode(cbuf, Assembler::REX_R);
3744     }
3745     emit_opcode(cbuf, 0x0F);
3746     emit_opcode(cbuf, 0x11);
3747     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3748 
3749     // call d2i_fixup
3750     cbuf.set_insts_mark();
3751     emit_opcode(cbuf, 0xE8);
3752     emit_d32_reloc(cbuf,
3753                    (int)
3754                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3755                    runtime_call_Relocation::spec(),
3756                    RELOC_DISP32);
3757 
3758     // popq $dst
3759     if (dstenc >= 8) {
3760       emit_opcode(cbuf, Assembler::REX_B);
3761     }
3762     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3763 
3764     // done:
3765   %}
3766 
3767   enc_class d2l_fixup(rRegL dst, regD src)
3768   %{
3769     int dstenc = $dst$$reg;
3770     int srcenc = $src$$reg;
3771     address const_address = (address) StubRoutines::x86::double_sign_flip();
3772 
3773     // cmpq $dst, [0x8000000000000000]
3774     cbuf.set_insts_mark();
3775     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3776     emit_opcode(cbuf, 0x39);
3777     // XXX reg_mem doesn't support RIP-relative addressing yet
3778     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3779     emit_d32_reloc(cbuf, const_address);
3780 
3781 
3782     // jne,s done
3783     emit_opcode(cbuf, 0x75);
3784     if (srcenc < 8 && dstenc < 8) {
3785       emit_d8(cbuf, 0xF);
3786     } else if (srcenc >= 8 && dstenc >= 8) {
3787       emit_d8(cbuf, 0x11);
3788     } else {
3789       emit_d8(cbuf, 0x10);
3790     }
3791 
3792     // subq rsp, #8
3793     emit_opcode(cbuf, Assembler::REX_W);
3794     emit_opcode(cbuf, 0x83);
3795     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3796     emit_d8(cbuf, 8);
3797 
3798     // movsd [rsp], $src
3799     emit_opcode(cbuf, 0xF2);
3800     if (srcenc >= 8) {
3801       emit_opcode(cbuf, Assembler::REX_R);
3802     }
3803     emit_opcode(cbuf, 0x0F);
3804     emit_opcode(cbuf, 0x11);
3805     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3806 
3807     // call d2l_fixup
3808     cbuf.set_insts_mark();
3809     emit_opcode(cbuf, 0xE8);
3810     emit_d32_reloc(cbuf,
3811                    (int)
3812                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3813                    runtime_call_Relocation::spec(),
3814                    RELOC_DISP32);
3815 
3816     // popq $dst
3817     if (dstenc >= 8) {
3818       emit_opcode(cbuf, Assembler::REX_B);
3819     }
3820     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3821 
3822     // done:
3823   %}
3824 %}
3825 
3826 
3827 
3828 //----------FRAME--------------------------------------------------------------
3829 // Definition of frame structure and management information.
3830 //
3831 //  S T A C K   L A Y O U T    Allocators stack-slot number
3832 //                             |   (to get allocators register number
3833 //  G  Owned by    |        |  v    add OptoReg::stack0())
3834 //  r   CALLER     |        |
3835 //  o     |        +--------+      pad to even-align allocators stack-slot
3836 //  w     V        |  pad0  |        numbers; owned by CALLER
3837 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3838 //  h     ^        |   in   |  5
3839 //        |        |  args  |  4   Holes in incoming args owned by SELF
3840 //  |     |        |        |  3
3841 //  |     |        +--------+
3842 //  V     |        | old out|      Empty on Intel, window on Sparc
3843 //        |    old |preserve|      Must be even aligned.
3844 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3845 //        |        |   in   |  3   area for Intel ret address
3846 //     Owned by    |preserve|      Empty on Sparc.
3847 //       SELF      +--------+
3848 //        |        |  pad2  |  2   pad to align old SP
3849 //        |        +--------+  1
3850 //        |        | locks  |  0
3851 //        |        +--------+----> OptoReg::stack0(), even aligned
3852 //        |        |  pad1  | 11   pad to align new SP
3853 //        |        +--------+
3854 //        |        |        | 10
3855 //        |        | spills |  9   spills
3856 //        V        |        |  8   (pad0 slot for callee)
3857 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3858 //        ^        |  out   |  7
3859 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3860 //     Owned by    +--------+
3861 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3862 //        |    new |preserve|      Must be even-aligned.
3863 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3864 //        |        |        |
3865 //
3866 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3867 //         known from SELF's arguments and the Java calling convention.
3868 //         Region 6-7 is determined per call site.
3869 // Note 2: If the calling convention leaves holes in the incoming argument
3870 //         area, those holes are owned by SELF.  Holes in the outgoing area
3871 //         are owned by the CALLEE.  Holes should not be nessecary in the
3872 //         incoming area, as the Java calling convention is completely under
3873 //         the control of the AD file.  Doubles can be sorted and packed to
3874 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3875 //         varargs C calling conventions.
3876 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3877 //         even aligned with pad0 as needed.
3878 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3879 //         region 6-11 is even aligned; it may be padded out more so that
3880 //         the region from SP to FP meets the minimum stack alignment.
3881 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3882 //         alignment.  Region 11, pad1, may be dynamically extended so that
3883 //         SP meets the minimum alignment.
3884 
3885 frame
3886 %{
3887   // What direction does stack grow in (assumed to be same for C & Java)
3888   stack_direction(TOWARDS_LOW);
3889 
3890   // These three registers define part of the calling convention
3891   // between compiled code and the interpreter.
3892   inline_cache_reg(RAX);                // Inline Cache Register
3893   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3894                                         // calling interpreter
3895 
3896   // Optional: name the operand used by cisc-spilling to access
3897   // [stack_pointer + offset]
3898   cisc_spilling_operand_name(indOffset32);
3899 
3900   // Number of stack slots consumed by locking an object
3901   sync_stack_slots(2);
3902 
3903   // Compiled code's Frame Pointer
3904   frame_pointer(RSP);
3905 
3906   // Interpreter stores its frame pointer in a register which is
3907   // stored to the stack by I2CAdaptors.
3908   // I2CAdaptors convert from interpreted java to compiled java.
3909   interpreter_frame_pointer(RBP);
3910 
3911   // Stack alignment requirement
3912   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3913 
3914   // Number of stack slots between incoming argument block and the start of
3915   // a new frame.  The PROLOG must add this many slots to the stack.  The
3916   // EPILOG must remove this many slots.  amd64 needs two slots for
3917   // return address.
3918   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3919 
3920   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3921   // for calls to C.  Supports the var-args backing area for register parms.
3922   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3923 
3924   // The after-PROLOG location of the return address.  Location of
3925   // return address specifies a type (REG or STACK) and a number
3926   // representing the register number (i.e. - use a register name) or
3927   // stack slot.
3928   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3929   // Otherwise, it is above the locks and verification slot and alignment word
3930   return_addr(STACK - 2 +
3931               round_to(2 + 2 * VerifyStackAtCalls +
3932                        Compile::current()->fixed_slots(),
3933                        WordsPerLong * 2));
3934 
3935   // Body of function which returns an integer array locating
3936   // arguments either in registers or in stack slots.  Passed an array
3937   // of ideal registers called "sig" and a "length" count.  Stack-slot
3938   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3939   // arguments for a CALLEE.  Incoming stack arguments are
3940   // automatically biased by the preserve_stack_slots field above.
3941 
3942   calling_convention
3943   %{
3944     // No difference between ingoing/outgoing just pass false
3945     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3946   %}
3947 
3948   c_calling_convention
3949   %{
3950     // This is obviously always outgoing
3951     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3952   %}
3953 
3954   // Location of compiled Java return values.  Same as C for now.
3955   return_value
3956   %{
3957     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3958            "only return normal values");
3959 
3960     static const int lo[Op_RegL + 1] = {
3961       0,
3962       0,
3963       RAX_num,  // Op_RegN
3964       RAX_num,  // Op_RegI
3965       RAX_num,  // Op_RegP
3966       XMM0_num, // Op_RegF
3967       XMM0_num, // Op_RegD
3968       RAX_num   // Op_RegL
3969     };
3970     static const int hi[Op_RegL + 1] = {
3971       0,
3972       0,
3973       OptoReg::Bad, // Op_RegN
3974       OptoReg::Bad, // Op_RegI
3975       RAX_H_num,    // Op_RegP
3976       OptoReg::Bad, // Op_RegF
3977       XMM0_H_num,   // Op_RegD
3978       RAX_H_num     // Op_RegL
3979     };
3980     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
3981     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3982   %}
3983 %}
3984 
3985 //----------ATTRIBUTES---------------------------------------------------------
3986 //----------Operand Attributes-------------------------------------------------
3987 op_attrib op_cost(0);        // Required cost attribute
3988 
3989 //----------Instruction Attributes---------------------------------------------
3990 ins_attrib ins_cost(100);       // Required cost attribute
3991 ins_attrib ins_size(8);         // Required size attribute (in bits)
3992 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3993                                 // a non-matching short branch variant
3994                                 // of some long branch?
3995 ins_attrib ins_alignment(1);    // Required alignment attribute (must
3996                                 // be a power of 2) specifies the
3997                                 // alignment that some part of the
3998                                 // instruction (not necessarily the
3999                                 // start) requires.  If > 1, a
4000                                 // compute_padding() function must be
4001                                 // provided for the instruction
4002 
4003 //----------OPERANDS-----------------------------------------------------------
4004 // Operand definitions must precede instruction definitions for correct parsing
4005 // in the ADLC because operands constitute user defined types which are used in
4006 // instruction definitions.
4007 
4008 //----------Simple Operands----------------------------------------------------
4009 // Immediate Operands
4010 // Integer Immediate
4011 operand immI()
4012 %{
4013   match(ConI);
4014 
4015   op_cost(10);
4016   format %{ %}
4017   interface(CONST_INTER);
4018 %}
4019 
4020 // Constant for test vs zero
4021 operand immI0()
4022 %{
4023   predicate(n->get_int() == 0);
4024   match(ConI);
4025 
4026   op_cost(0);
4027   format %{ %}
4028   interface(CONST_INTER);
4029 %}
4030 
4031 // Constant for increment
4032 operand immI1()
4033 %{
4034   predicate(n->get_int() == 1);
4035   match(ConI);
4036 
4037   op_cost(0);
4038   format %{ %}
4039   interface(CONST_INTER);
4040 %}
4041 
4042 // Constant for decrement
4043 operand immI_M1()
4044 %{
4045   predicate(n->get_int() == -1);
4046   match(ConI);
4047 
4048   op_cost(0);
4049   format %{ %}
4050   interface(CONST_INTER);
4051 %}
4052 
4053 // Valid scale values for addressing modes
4054 operand immI2()
4055 %{
4056   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4057   match(ConI);
4058 
4059   format %{ %}
4060   interface(CONST_INTER);
4061 %}
4062 
4063 operand immI8()
4064 %{
4065   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4066   match(ConI);
4067 
4068   op_cost(5);
4069   format %{ %}
4070   interface(CONST_INTER);
4071 %}
4072 
4073 operand immI16()
4074 %{
4075   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4076   match(ConI);
4077 
4078   op_cost(10);
4079   format %{ %}
4080   interface(CONST_INTER);
4081 %}
4082 
4083 // Constant for long shifts
4084 operand immI_32()
4085 %{
4086   predicate( n->get_int() == 32 );
4087   match(ConI);
4088 
4089   op_cost(0);
4090   format %{ %}
4091   interface(CONST_INTER);
4092 %}
4093 
4094 // Constant for long shifts
4095 operand immI_64()
4096 %{
4097   predicate( n->get_int() == 64 );
4098   match(ConI);
4099 
4100   op_cost(0);
4101   format %{ %}
4102   interface(CONST_INTER);
4103 %}
4104 
4105 // Pointer Immediate
4106 operand immP()
4107 %{
4108   match(ConP);
4109 
4110   op_cost(10);
4111   format %{ %}
4112   interface(CONST_INTER);
4113 %}
4114 
4115 // NULL Pointer Immediate
4116 operand immP0()
4117 %{
4118   predicate(n->get_ptr() == 0);
4119   match(ConP);
4120 
4121   op_cost(5);
4122   format %{ %}
4123   interface(CONST_INTER);
4124 %}
4125 
4126 operand immP_poll() %{
4127   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4128   match(ConP);
4129 
4130   // formats are generated automatically for constants and base registers
4131   format %{ %}
4132   interface(CONST_INTER);
4133 %}
4134 
4135 // Pointer Immediate
4136 operand immN() %{
4137   match(ConN);
4138 
4139   op_cost(10);
4140   format %{ %}
4141   interface(CONST_INTER);
4142 %}
4143 
4144 // NULL Pointer Immediate
4145 operand immN0() %{
4146   predicate(n->get_narrowcon() == 0);
4147   match(ConN);
4148 
4149   op_cost(5);
4150   format %{ %}
4151   interface(CONST_INTER);
4152 %}
4153 
4154 operand immP31()
4155 %{
4156   predicate(!n->as_Type()->type()->isa_oopptr()
4157             && (n->get_ptr() >> 31) == 0);
4158   match(ConP);
4159 
4160   op_cost(5);
4161   format %{ %}
4162   interface(CONST_INTER);
4163 %}
4164 
4165 
4166 // Long Immediate
4167 operand immL()
4168 %{
4169   match(ConL);
4170 
4171   op_cost(20);
4172   format %{ %}
4173   interface(CONST_INTER);
4174 %}
4175 
4176 // Long Immediate 8-bit
4177 operand immL8()
4178 %{
4179   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4180   match(ConL);
4181 
4182   op_cost(5);
4183   format %{ %}
4184   interface(CONST_INTER);
4185 %}
4186 
4187 // Long Immediate 32-bit unsigned
4188 operand immUL32()
4189 %{
4190   predicate(n->get_long() == (unsigned int) (n->get_long()));
4191   match(ConL);
4192 
4193   op_cost(10);
4194   format %{ %}
4195   interface(CONST_INTER);
4196 %}
4197 
4198 // Long Immediate 32-bit signed
4199 operand immL32()
4200 %{
4201   predicate(n->get_long() == (int) (n->get_long()));
4202   match(ConL);
4203 
4204   op_cost(15);
4205   format %{ %}
4206   interface(CONST_INTER);
4207 %}
4208 
4209 // Long Immediate zero
4210 operand immL0()
4211 %{
4212   predicate(n->get_long() == 0L);
4213   match(ConL);
4214 
4215   op_cost(10);
4216   format %{ %}
4217   interface(CONST_INTER);
4218 %}
4219 
4220 // Constant for increment
4221 operand immL1()
4222 %{
4223   predicate(n->get_long() == 1);
4224   match(ConL);
4225 
4226   format %{ %}
4227   interface(CONST_INTER);
4228 %}
4229 
4230 // Constant for decrement
4231 operand immL_M1()
4232 %{
4233   predicate(n->get_long() == -1);
4234   match(ConL);
4235 
4236   format %{ %}
4237   interface(CONST_INTER);
4238 %}
4239 
4240 // Long Immediate: the value 10
4241 operand immL10()
4242 %{
4243   predicate(n->get_long() == 10);
4244   match(ConL);
4245 
4246   format %{ %}
4247   interface(CONST_INTER);
4248 %}
4249 
4250 // Long immediate from 0 to 127.
4251 // Used for a shorter form of long mul by 10.
4252 operand immL_127()
4253 %{
4254   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4255   match(ConL);
4256 
4257   op_cost(10);
4258   format %{ %}
4259   interface(CONST_INTER);
4260 %}
4261 
4262 // Long Immediate: low 32-bit mask
4263 operand immL_32bits()
4264 %{
4265   predicate(n->get_long() == 0xFFFFFFFFL);
4266   match(ConL);
4267   op_cost(20);
4268 
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 // Float Immediate zero
4274 operand immF0()
4275 %{
4276   predicate(jint_cast(n->getf()) == 0);
4277   match(ConF);
4278 
4279   op_cost(5);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // Float Immediate
4285 operand immF()
4286 %{
4287   match(ConF);
4288 
4289   op_cost(15);
4290   format %{ %}
4291   interface(CONST_INTER);
4292 %}
4293 
4294 // Double Immediate zero
4295 operand immD0()
4296 %{
4297   predicate(jlong_cast(n->getd()) == 0);
4298   match(ConD);
4299 
4300   op_cost(5);
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 // Double Immediate
4306 operand immD()
4307 %{
4308   match(ConD);
4309 
4310   op_cost(15);
4311   format %{ %}
4312   interface(CONST_INTER);
4313 %}
4314 
4315 // Immediates for special shifts (sign extend)
4316 
4317 // Constants for increment
4318 operand immI_16()
4319 %{
4320   predicate(n->get_int() == 16);
4321   match(ConI);
4322 
4323   format %{ %}
4324   interface(CONST_INTER);
4325 %}
4326 
4327 operand immI_24()
4328 %{
4329   predicate(n->get_int() == 24);
4330   match(ConI);
4331 
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Constant for byte-wide masking
4337 operand immI_255()
4338 %{
4339   predicate(n->get_int() == 255);
4340   match(ConI);
4341 
4342   format %{ %}
4343   interface(CONST_INTER);
4344 %}
4345 
4346 // Constant for short-wide masking
4347 operand immI_65535()
4348 %{
4349   predicate(n->get_int() == 65535);
4350   match(ConI);
4351 
4352   format %{ %}
4353   interface(CONST_INTER);
4354 %}
4355 
4356 // Constant for byte-wide masking
4357 operand immL_255()
4358 %{
4359   predicate(n->get_long() == 255);
4360   match(ConL);
4361 
4362   format %{ %}
4363   interface(CONST_INTER);
4364 %}
4365 
4366 // Constant for short-wide masking
4367 operand immL_65535()
4368 %{
4369   predicate(n->get_long() == 65535);
4370   match(ConL);
4371 
4372   format %{ %}
4373   interface(CONST_INTER);
4374 %}
4375 
4376 // Register Operands
4377 // Integer Register
4378 operand rRegI()
4379 %{
4380   constraint(ALLOC_IN_RC(int_reg));
4381   match(RegI);
4382 
4383   match(rax_RegI);
4384   match(rbx_RegI);
4385   match(rcx_RegI);
4386   match(rdx_RegI);
4387   match(rdi_RegI);
4388 
4389   format %{ %}
4390   interface(REG_INTER);
4391 %}
4392 
4393 // Special Registers
4394 operand rax_RegI()
4395 %{
4396   constraint(ALLOC_IN_RC(int_rax_reg));
4397   match(RegI);
4398   match(rRegI);
4399 
4400   format %{ "RAX" %}
4401   interface(REG_INTER);
4402 %}
4403 
4404 // Special Registers
4405 operand rbx_RegI()
4406 %{
4407   constraint(ALLOC_IN_RC(int_rbx_reg));
4408   match(RegI);
4409   match(rRegI);
4410 
4411   format %{ "RBX" %}
4412   interface(REG_INTER);
4413 %}
4414 
4415 operand rcx_RegI()
4416 %{
4417   constraint(ALLOC_IN_RC(int_rcx_reg));
4418   match(RegI);
4419   match(rRegI);
4420 
4421   format %{ "RCX" %}
4422   interface(REG_INTER);
4423 %}
4424 
4425 operand rdx_RegI()
4426 %{
4427   constraint(ALLOC_IN_RC(int_rdx_reg));
4428   match(RegI);
4429   match(rRegI);
4430 
4431   format %{ "RDX" %}
4432   interface(REG_INTER);
4433 %}
4434 
4435 operand rdi_RegI()
4436 %{
4437   constraint(ALLOC_IN_RC(int_rdi_reg));
4438   match(RegI);
4439   match(rRegI);
4440 
4441   format %{ "RDI" %}
4442   interface(REG_INTER);
4443 %}
4444 
4445 operand no_rcx_RegI()
4446 %{
4447   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4448   match(RegI);
4449   match(rax_RegI);
4450   match(rbx_RegI);
4451   match(rdx_RegI);
4452   match(rdi_RegI);
4453 
4454   format %{ %}
4455   interface(REG_INTER);
4456 %}
4457 
4458 operand no_rax_rdx_RegI()
4459 %{
4460   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4461   match(RegI);
4462   match(rbx_RegI);
4463   match(rcx_RegI);
4464   match(rdi_RegI);
4465 
4466   format %{ %}
4467   interface(REG_INTER);
4468 %}
4469 
4470 // Pointer Register
4471 operand any_RegP()
4472 %{
4473   constraint(ALLOC_IN_RC(any_reg));
4474   match(RegP);
4475   match(rax_RegP);
4476   match(rbx_RegP);
4477   match(rdi_RegP);
4478   match(rsi_RegP);
4479   match(rbp_RegP);
4480   match(r15_RegP);
4481   match(rRegP);
4482 
4483   format %{ %}
4484   interface(REG_INTER);
4485 %}
4486 
4487 operand rRegP()
4488 %{
4489   constraint(ALLOC_IN_RC(ptr_reg));
4490   match(RegP);
4491   match(rax_RegP);
4492   match(rbx_RegP);
4493   match(rdi_RegP);
4494   match(rsi_RegP);
4495   match(rbp_RegP);
4496   match(r15_RegP);  // See Q&A below about r15_RegP.
4497 
4498   format %{ %}
4499   interface(REG_INTER);
4500 %}
4501 
4502 operand rRegN() %{
4503   constraint(ALLOC_IN_RC(int_reg));
4504   match(RegN);
4505 
4506   format %{ %}
4507   interface(REG_INTER);
4508 %}
4509 
4510 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4511 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4512 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4513 // The output of an instruction is controlled by the allocator, which respects
4514 // register class masks, not match rules.  Unless an instruction mentions
4515 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4516 // by the allocator as an input.
4517 
4518 operand no_rax_RegP()
4519 %{
4520   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4521   match(RegP);
4522   match(rbx_RegP);
4523   match(rsi_RegP);
4524   match(rdi_RegP);
4525 
4526   format %{ %}
4527   interface(REG_INTER);
4528 %}
4529 
4530 operand no_rbp_RegP()
4531 %{
4532   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4533   match(RegP);
4534   match(rbx_RegP);
4535   match(rsi_RegP);
4536   match(rdi_RegP);
4537 
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 operand no_rax_rbx_RegP()
4543 %{
4544   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4545   match(RegP);
4546   match(rsi_RegP);
4547   match(rdi_RegP);
4548 
4549   format %{ %}
4550   interface(REG_INTER);
4551 %}
4552 
4553 // Special Registers
4554 // Return a pointer value
4555 operand rax_RegP()
4556 %{
4557   constraint(ALLOC_IN_RC(ptr_rax_reg));
4558   match(RegP);
4559   match(rRegP);
4560 
4561   format %{ %}
4562   interface(REG_INTER);
4563 %}
4564 
4565 // Special Registers
4566 // Return a compressed pointer value
4567 operand rax_RegN()
4568 %{
4569   constraint(ALLOC_IN_RC(int_rax_reg));
4570   match(RegN);
4571   match(rRegN);
4572 
4573   format %{ %}
4574   interface(REG_INTER);
4575 %}
4576 
4577 // Used in AtomicAdd
4578 operand rbx_RegP()
4579 %{
4580   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4581   match(RegP);
4582   match(rRegP);
4583 
4584   format %{ %}
4585   interface(REG_INTER);
4586 %}
4587 
4588 operand rsi_RegP()
4589 %{
4590   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4591   match(RegP);
4592   match(rRegP);
4593 
4594   format %{ %}
4595   interface(REG_INTER);
4596 %}
4597 
4598 // Used in rep stosq
4599 operand rdi_RegP()
4600 %{
4601   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4602   match(RegP);
4603   match(rRegP);
4604 
4605   format %{ %}
4606   interface(REG_INTER);
4607 %}
4608 
4609 operand rbp_RegP()
4610 %{
4611   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4612   match(RegP);
4613   match(rRegP);
4614 
4615   format %{ %}
4616   interface(REG_INTER);
4617 %}
4618 
4619 operand r15_RegP()
4620 %{
4621   constraint(ALLOC_IN_RC(ptr_r15_reg));
4622   match(RegP);
4623   match(rRegP);
4624 
4625   format %{ %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 operand rRegL()
4630 %{
4631   constraint(ALLOC_IN_RC(long_reg));
4632   match(RegL);
4633   match(rax_RegL);
4634   match(rdx_RegL);
4635 
4636   format %{ %}
4637   interface(REG_INTER);
4638 %}
4639 
4640 // Special Registers
4641 operand no_rax_rdx_RegL()
4642 %{
4643   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4644   match(RegL);
4645   match(rRegL);
4646 
4647   format %{ %}
4648   interface(REG_INTER);
4649 %}
4650 
4651 operand no_rax_RegL()
4652 %{
4653   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4654   match(RegL);
4655   match(rRegL);
4656   match(rdx_RegL);
4657 
4658   format %{ %}
4659   interface(REG_INTER);
4660 %}
4661 
4662 operand no_rcx_RegL()
4663 %{
4664   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4665   match(RegL);
4666   match(rRegL);
4667 
4668   format %{ %}
4669   interface(REG_INTER);
4670 %}
4671 
4672 operand rax_RegL()
4673 %{
4674   constraint(ALLOC_IN_RC(long_rax_reg));
4675   match(RegL);
4676   match(rRegL);
4677 
4678   format %{ "RAX" %}
4679   interface(REG_INTER);
4680 %}
4681 
4682 operand rcx_RegL()
4683 %{
4684   constraint(ALLOC_IN_RC(long_rcx_reg));
4685   match(RegL);
4686   match(rRegL);
4687 
4688   format %{ %}
4689   interface(REG_INTER);
4690 %}
4691 
4692 operand rdx_RegL()
4693 %{
4694   constraint(ALLOC_IN_RC(long_rdx_reg));
4695   match(RegL);
4696   match(rRegL);
4697 
4698   format %{ %}
4699   interface(REG_INTER);
4700 %}
4701 
4702 // Flags register, used as output of compare instructions
4703 operand rFlagsReg()
4704 %{
4705   constraint(ALLOC_IN_RC(int_flags));
4706   match(RegFlags);
4707 
4708   format %{ "RFLAGS" %}
4709   interface(REG_INTER);
4710 %}
4711 
4712 // Flags register, used as output of FLOATING POINT compare instructions
4713 operand rFlagsRegU()
4714 %{
4715   constraint(ALLOC_IN_RC(int_flags));
4716   match(RegFlags);
4717 
4718   format %{ "RFLAGS_U" %}
4719   interface(REG_INTER);
4720 %}
4721 
4722 operand rFlagsRegUCF() %{
4723   constraint(ALLOC_IN_RC(int_flags));
4724   match(RegFlags);
4725   predicate(false);
4726 
4727   format %{ "RFLAGS_U_CF" %}
4728   interface(REG_INTER);
4729 %}
4730 
4731 // Float register operands
4732 operand regF()
4733 %{
4734   constraint(ALLOC_IN_RC(float_reg));
4735   match(RegF);
4736 
4737   format %{ %}
4738   interface(REG_INTER);
4739 %}
4740 
4741 // Double register operands
4742 operand regD()
4743 %{
4744   constraint(ALLOC_IN_RC(double_reg));
4745   match(RegD);
4746 
4747   format %{ %}
4748   interface(REG_INTER);
4749 %}
4750 
4751 
4752 //----------Memory Operands----------------------------------------------------
4753 // Direct Memory Operand
4754 // operand direct(immP addr)
4755 // %{
4756 //   match(addr);
4757 
4758 //   format %{ "[$addr]" %}
4759 //   interface(MEMORY_INTER) %{
4760 //     base(0xFFFFFFFF);
4761 //     index(0x4);
4762 //     scale(0x0);
4763 //     disp($addr);
4764 //   %}
4765 // %}
4766 
4767 // Indirect Memory Operand
4768 operand indirect(any_RegP reg)
4769 %{
4770   constraint(ALLOC_IN_RC(ptr_reg));
4771   match(reg);
4772 
4773   format %{ "[$reg]" %}
4774   interface(MEMORY_INTER) %{
4775     base($reg);
4776     index(0x4);
4777     scale(0x0);
4778     disp(0x0);
4779   %}
4780 %}
4781 
4782 // Indirect Memory Plus Short Offset Operand
4783 operand indOffset8(any_RegP reg, immL8 off)
4784 %{
4785   constraint(ALLOC_IN_RC(ptr_reg));
4786   match(AddP reg off);
4787 
4788   format %{ "[$reg + $off (8-bit)]" %}
4789   interface(MEMORY_INTER) %{
4790     base($reg);
4791     index(0x4);
4792     scale(0x0);
4793     disp($off);
4794   %}
4795 %}
4796 
4797 // Indirect Memory Plus Long Offset Operand
4798 operand indOffset32(any_RegP reg, immL32 off)
4799 %{
4800   constraint(ALLOC_IN_RC(ptr_reg));
4801   match(AddP reg off);
4802 
4803   format %{ "[$reg + $off (32-bit)]" %}
4804   interface(MEMORY_INTER) %{
4805     base($reg);
4806     index(0x4);
4807     scale(0x0);
4808     disp($off);
4809   %}
4810 %}
4811 
4812 // Indirect Memory Plus Index Register Plus Offset Operand
4813 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4814 %{
4815   constraint(ALLOC_IN_RC(ptr_reg));
4816   match(AddP (AddP reg lreg) off);
4817 
4818   op_cost(10);
4819   format %{"[$reg + $off + $lreg]" %}
4820   interface(MEMORY_INTER) %{
4821     base($reg);
4822     index($lreg);
4823     scale(0x0);
4824     disp($off);
4825   %}
4826 %}
4827 
4828 // Indirect Memory Plus Index Register Plus Offset Operand
4829 operand indIndex(any_RegP reg, rRegL lreg)
4830 %{
4831   constraint(ALLOC_IN_RC(ptr_reg));
4832   match(AddP reg lreg);
4833 
4834   op_cost(10);
4835   format %{"[$reg + $lreg]" %}
4836   interface(MEMORY_INTER) %{
4837     base($reg);
4838     index($lreg);
4839     scale(0x0);
4840     disp(0x0);
4841   %}
4842 %}
4843 
4844 // Indirect Memory Times Scale Plus Index Register
4845 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4846 %{
4847   constraint(ALLOC_IN_RC(ptr_reg));
4848   match(AddP reg (LShiftL lreg scale));
4849 
4850   op_cost(10);
4851   format %{"[$reg + $lreg << $scale]" %}
4852   interface(MEMORY_INTER) %{
4853     base($reg);
4854     index($lreg);
4855     scale($scale);
4856     disp(0x0);
4857   %}
4858 %}
4859 
4860 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4861 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4862 %{
4863   constraint(ALLOC_IN_RC(ptr_reg));
4864   match(AddP (AddP reg (LShiftL lreg scale)) off);
4865 
4866   op_cost(10);
4867   format %{"[$reg + $off + $lreg << $scale]" %}
4868   interface(MEMORY_INTER) %{
4869     base($reg);
4870     index($lreg);
4871     scale($scale);
4872     disp($off);
4873   %}
4874 %}
4875 
4876 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4877 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4878 %{
4879   constraint(ALLOC_IN_RC(ptr_reg));
4880   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4881   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4882 
4883   op_cost(10);
4884   format %{"[$reg + $off + $idx << $scale]" %}
4885   interface(MEMORY_INTER) %{
4886     base($reg);
4887     index($idx);
4888     scale($scale);
4889     disp($off);
4890   %}
4891 %}
4892 
4893 // Indirect Narrow Oop Plus Offset Operand
4894 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4895 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4896 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4897   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4898   constraint(ALLOC_IN_RC(ptr_reg));
4899   match(AddP (DecodeN reg) off);
4900 
4901   op_cost(10);
4902   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4903   interface(MEMORY_INTER) %{
4904     base(0xc); // R12
4905     index($reg);
4906     scale(0x3);
4907     disp($off);
4908   %}
4909 %}
4910 
4911 // Indirect Memory Operand
4912 operand indirectNarrow(rRegN reg)
4913 %{
4914   predicate(Universe::narrow_oop_shift() == 0);
4915   constraint(ALLOC_IN_RC(ptr_reg));
4916   match(DecodeN reg);
4917 
4918   format %{ "[$reg]" %}
4919   interface(MEMORY_INTER) %{
4920     base($reg);
4921     index(0x4);
4922     scale(0x0);
4923     disp(0x0);
4924   %}
4925 %}
4926 
4927 // Indirect Memory Plus Short Offset Operand
4928 operand indOffset8Narrow(rRegN reg, immL8 off)
4929 %{
4930   predicate(Universe::narrow_oop_shift() == 0);
4931   constraint(ALLOC_IN_RC(ptr_reg));
4932   match(AddP (DecodeN reg) off);
4933 
4934   format %{ "[$reg + $off (8-bit)]" %}
4935   interface(MEMORY_INTER) %{
4936     base($reg);
4937     index(0x4);
4938     scale(0x0);
4939     disp($off);
4940   %}
4941 %}
4942 
4943 // Indirect Memory Plus Long Offset Operand
4944 operand indOffset32Narrow(rRegN reg, immL32 off)
4945 %{
4946   predicate(Universe::narrow_oop_shift() == 0);
4947   constraint(ALLOC_IN_RC(ptr_reg));
4948   match(AddP (DecodeN reg) off);
4949 
4950   format %{ "[$reg + $off (32-bit)]" %}
4951   interface(MEMORY_INTER) %{
4952     base($reg);
4953     index(0x4);
4954     scale(0x0);
4955     disp($off);
4956   %}
4957 %}
4958 
4959 // Indirect Memory Plus Index Register Plus Offset Operand
4960 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4961 %{
4962   predicate(Universe::narrow_oop_shift() == 0);
4963   constraint(ALLOC_IN_RC(ptr_reg));
4964   match(AddP (AddP (DecodeN reg) lreg) off);
4965 
4966   op_cost(10);
4967   format %{"[$reg + $off + $lreg]" %}
4968   interface(MEMORY_INTER) %{
4969     base($reg);
4970     index($lreg);
4971     scale(0x0);
4972     disp($off);
4973   %}
4974 %}
4975 
4976 // Indirect Memory Plus Index Register Plus Offset Operand
4977 operand indIndexNarrow(rRegN reg, rRegL lreg)
4978 %{
4979   predicate(Universe::narrow_oop_shift() == 0);
4980   constraint(ALLOC_IN_RC(ptr_reg));
4981   match(AddP (DecodeN reg) lreg);
4982 
4983   op_cost(10);
4984   format %{"[$reg + $lreg]" %}
4985   interface(MEMORY_INTER) %{
4986     base($reg);
4987     index($lreg);
4988     scale(0x0);
4989     disp(0x0);
4990   %}
4991 %}
4992 
4993 // Indirect Memory Times Scale Plus Index Register
4994 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4995 %{
4996   predicate(Universe::narrow_oop_shift() == 0);
4997   constraint(ALLOC_IN_RC(ptr_reg));
4998   match(AddP (DecodeN reg) (LShiftL lreg scale));
4999 
5000   op_cost(10);
5001   format %{"[$reg + $lreg << $scale]" %}
5002   interface(MEMORY_INTER) %{
5003     base($reg);
5004     index($lreg);
5005     scale($scale);
5006     disp(0x0);
5007   %}
5008 %}
5009 
5010 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5011 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5012 %{
5013   predicate(Universe::narrow_oop_shift() == 0);
5014   constraint(ALLOC_IN_RC(ptr_reg));
5015   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5016 
5017   op_cost(10);
5018   format %{"[$reg + $off + $lreg << $scale]" %}
5019   interface(MEMORY_INTER) %{
5020     base($reg);
5021     index($lreg);
5022     scale($scale);
5023     disp($off);
5024   %}
5025 %}
5026 
5027 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5028 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5029 %{
5030   constraint(ALLOC_IN_RC(ptr_reg));
5031   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5032   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5033 
5034   op_cost(10);
5035   format %{"[$reg + $off + $idx << $scale]" %}
5036   interface(MEMORY_INTER) %{
5037     base($reg);
5038     index($idx);
5039     scale($scale);
5040     disp($off);
5041   %}
5042 %}
5043 
5044 
5045 //----------Special Memory Operands--------------------------------------------
5046 // Stack Slot Operand - This operand is used for loading and storing temporary
5047 //                      values on the stack where a match requires a value to
5048 //                      flow through memory.
5049 operand stackSlotP(sRegP reg)
5050 %{
5051   constraint(ALLOC_IN_RC(stack_slots));
5052   // No match rule because this operand is only generated in matching
5053 
5054   format %{ "[$reg]" %}
5055   interface(MEMORY_INTER) %{
5056     base(0x4);   // RSP
5057     index(0x4);  // No Index
5058     scale(0x0);  // No Scale
5059     disp($reg);  // Stack Offset
5060   %}
5061 %}
5062 
5063 operand stackSlotI(sRegI reg)
5064 %{
5065   constraint(ALLOC_IN_RC(stack_slots));
5066   // No match rule because this operand is only generated in matching
5067 
5068   format %{ "[$reg]" %}
5069   interface(MEMORY_INTER) %{
5070     base(0x4);   // RSP
5071     index(0x4);  // No Index
5072     scale(0x0);  // No Scale
5073     disp($reg);  // Stack Offset
5074   %}
5075 %}
5076 
5077 operand stackSlotF(sRegF reg)
5078 %{
5079   constraint(ALLOC_IN_RC(stack_slots));
5080   // No match rule because this operand is only generated in matching
5081 
5082   format %{ "[$reg]" %}
5083   interface(MEMORY_INTER) %{
5084     base(0x4);   // RSP
5085     index(0x4);  // No Index
5086     scale(0x0);  // No Scale
5087     disp($reg);  // Stack Offset
5088   %}
5089 %}
5090 
5091 operand stackSlotD(sRegD reg)
5092 %{
5093   constraint(ALLOC_IN_RC(stack_slots));
5094   // No match rule because this operand is only generated in matching
5095 
5096   format %{ "[$reg]" %}
5097   interface(MEMORY_INTER) %{
5098     base(0x4);   // RSP
5099     index(0x4);  // No Index
5100     scale(0x0);  // No Scale
5101     disp($reg);  // Stack Offset
5102   %}
5103 %}
5104 operand stackSlotL(sRegL reg)
5105 %{
5106   constraint(ALLOC_IN_RC(stack_slots));
5107   // No match rule because this operand is only generated in matching
5108 
5109   format %{ "[$reg]" %}
5110   interface(MEMORY_INTER) %{
5111     base(0x4);   // RSP
5112     index(0x4);  // No Index
5113     scale(0x0);  // No Scale
5114     disp($reg);  // Stack Offset
5115   %}
5116 %}
5117 
5118 //----------Conditional Branch Operands----------------------------------------
5119 // Comparison Op  - This is the operation of the comparison, and is limited to
5120 //                  the following set of codes:
5121 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5122 //
5123 // Other attributes of the comparison, such as unsignedness, are specified
5124 // by the comparison instruction that sets a condition code flags register.
5125 // That result is represented by a flags operand whose subtype is appropriate
5126 // to the unsignedness (etc.) of the comparison.
5127 //
5128 // Later, the instruction which matches both the Comparison Op (a Bool) and
5129 // the flags (produced by the Cmp) specifies the coding of the comparison op
5130 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5131 
5132 // Comparision Code
5133 operand cmpOp()
5134 %{
5135   match(Bool);
5136 
5137   format %{ "" %}
5138   interface(COND_INTER) %{
5139     equal(0x4, "e");
5140     not_equal(0x5, "ne");
5141     less(0xC, "l");
5142     greater_equal(0xD, "ge");
5143     less_equal(0xE, "le");
5144     greater(0xF, "g");
5145   %}
5146 %}
5147 
5148 // Comparison Code, unsigned compare.  Used by FP also, with
5149 // C2 (unordered) turned into GT or LT already.  The other bits
5150 // C0 and C3 are turned into Carry & Zero flags.
5151 operand cmpOpU()
5152 %{
5153   match(Bool);
5154 
5155   format %{ "" %}
5156   interface(COND_INTER) %{
5157     equal(0x4, "e");
5158     not_equal(0x5, "ne");
5159     less(0x2, "b");
5160     greater_equal(0x3, "nb");
5161     less_equal(0x6, "be");
5162     greater(0x7, "nbe");
5163   %}
5164 %}
5165 
5166 
5167 // Floating comparisons that don't require any fixup for the unordered case
5168 operand cmpOpUCF() %{
5169   match(Bool);
5170   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5171             n->as_Bool()->_test._test == BoolTest::ge ||
5172             n->as_Bool()->_test._test == BoolTest::le ||
5173             n->as_Bool()->_test._test == BoolTest::gt);
5174   format %{ "" %}
5175   interface(COND_INTER) %{
5176     equal(0x4, "e");
5177     not_equal(0x5, "ne");
5178     less(0x2, "b");
5179     greater_equal(0x3, "nb");
5180     less_equal(0x6, "be");
5181     greater(0x7, "nbe");
5182   %}
5183 %}
5184 
5185 
5186 // Floating comparisons that can be fixed up with extra conditional jumps
5187 operand cmpOpUCF2() %{
5188   match(Bool);
5189   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5190             n->as_Bool()->_test._test == BoolTest::eq);
5191   format %{ "" %}
5192   interface(COND_INTER) %{
5193     equal(0x4, "e");
5194     not_equal(0x5, "ne");
5195     less(0x2, "b");
5196     greater_equal(0x3, "nb");
5197     less_equal(0x6, "be");
5198     greater(0x7, "nbe");
5199   %}
5200 %}
5201 
5202 
5203 //----------OPERAND CLASSES----------------------------------------------------
5204 // Operand Classes are groups of operands that are used as to simplify
5205 // instruction definitions by not requiring the AD writer to specify separate
5206 // instructions for every form of operand when the instruction accepts
5207 // multiple operand types with the same basic encoding and format.  The classic
5208 // case of this is memory operands.
5209 
5210 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5211                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5212                indCompressedOopOffset,
5213                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5214                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5215                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5216 
5217 //----------PIPELINE-----------------------------------------------------------
5218 // Rules which define the behavior of the target architectures pipeline.
5219 pipeline %{
5220 
5221 //----------ATTRIBUTES---------------------------------------------------------
5222 attributes %{
5223   variable_size_instructions;        // Fixed size instructions
5224   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5225   instruction_unit_size = 1;         // An instruction is 1 bytes long
5226   instruction_fetch_unit_size = 16;  // The processor fetches one line
5227   instruction_fetch_units = 1;       // of 16 bytes
5228 
5229   // List of nop instructions
5230   nops( MachNop );
5231 %}
5232 
5233 //----------RESOURCES----------------------------------------------------------
5234 // Resources are the functional units available to the machine
5235 
5236 // Generic P2/P3 pipeline
5237 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5238 // 3 instructions decoded per cycle.
5239 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5240 // 3 ALU op, only ALU0 handles mul instructions.
5241 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5242            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5243            BR, FPU,
5244            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5245 
5246 //----------PIPELINE DESCRIPTION-----------------------------------------------
5247 // Pipeline Description specifies the stages in the machine's pipeline
5248 
5249 // Generic P2/P3 pipeline
5250 pipe_desc(S0, S1, S2, S3, S4, S5);
5251 
5252 //----------PIPELINE CLASSES---------------------------------------------------
5253 // Pipeline Classes describe the stages in which input and output are
5254 // referenced by the hardware pipeline.
5255 
5256 // Naming convention: ialu or fpu
5257 // Then: _reg
5258 // Then: _reg if there is a 2nd register
5259 // Then: _long if it's a pair of instructions implementing a long
5260 // Then: _fat if it requires the big decoder
5261 //   Or: _mem if it requires the big decoder and a memory unit.
5262 
5263 // Integer ALU reg operation
5264 pipe_class ialu_reg(rRegI dst)
5265 %{
5266     single_instruction;
5267     dst    : S4(write);
5268     dst    : S3(read);
5269     DECODE : S0;        // any decoder
5270     ALU    : S3;        // any alu
5271 %}
5272 
5273 // Long ALU reg operation
5274 pipe_class ialu_reg_long(rRegL dst)
5275 %{
5276     instruction_count(2);
5277     dst    : S4(write);
5278     dst    : S3(read);
5279     DECODE : S0(2);     // any 2 decoders
5280     ALU    : S3(2);     // both alus
5281 %}
5282 
5283 // Integer ALU reg operation using big decoder
5284 pipe_class ialu_reg_fat(rRegI dst)
5285 %{
5286     single_instruction;
5287     dst    : S4(write);
5288     dst    : S3(read);
5289     D0     : S0;        // big decoder only
5290     ALU    : S3;        // any alu
5291 %}
5292 
5293 // Long ALU reg operation using big decoder
5294 pipe_class ialu_reg_long_fat(rRegL dst)
5295 %{
5296     instruction_count(2);
5297     dst    : S4(write);
5298     dst    : S3(read);
5299     D0     : S0(2);     // big decoder only; twice
5300     ALU    : S3(2);     // any 2 alus
5301 %}
5302 
5303 // Integer ALU reg-reg operation
5304 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5305 %{
5306     single_instruction;
5307     dst    : S4(write);
5308     src    : S3(read);
5309     DECODE : S0;        // any decoder
5310     ALU    : S3;        // any alu
5311 %}
5312 
5313 // Long ALU reg-reg operation
5314 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5315 %{
5316     instruction_count(2);
5317     dst    : S4(write);
5318     src    : S3(read);
5319     DECODE : S0(2);     // any 2 decoders
5320     ALU    : S3(2);     // both alus
5321 %}
5322 
5323 // Integer ALU reg-reg operation
5324 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5325 %{
5326     single_instruction;
5327     dst    : S4(write);
5328     src    : S3(read);
5329     D0     : S0;        // big decoder only
5330     ALU    : S3;        // any alu
5331 %}
5332 
5333 // Long ALU reg-reg operation
5334 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5335 %{
5336     instruction_count(2);
5337     dst    : S4(write);
5338     src    : S3(read);
5339     D0     : S0(2);     // big decoder only; twice
5340     ALU    : S3(2);     // both alus
5341 %}
5342 
5343 // Integer ALU reg-mem operation
5344 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5345 %{
5346     single_instruction;
5347     dst    : S5(write);
5348     mem    : S3(read);
5349     D0     : S0;        // big decoder only
5350     ALU    : S4;        // any alu
5351     MEM    : S3;        // any mem
5352 %}
5353 
5354 // Integer mem operation (prefetch)
5355 pipe_class ialu_mem(memory mem)
5356 %{
5357     single_instruction;
5358     mem    : S3(read);
5359     D0     : S0;        // big decoder only
5360     MEM    : S3;        // any mem
5361 %}
5362 
5363 // Integer Store to Memory
5364 pipe_class ialu_mem_reg(memory mem, rRegI src)
5365 %{
5366     single_instruction;
5367     mem    : S3(read);
5368     src    : S5(read);
5369     D0     : S0;        // big decoder only
5370     ALU    : S4;        // any alu
5371     MEM    : S3;
5372 %}
5373 
5374 // // Long Store to Memory
5375 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5376 // %{
5377 //     instruction_count(2);
5378 //     mem    : S3(read);
5379 //     src    : S5(read);
5380 //     D0     : S0(2);          // big decoder only; twice
5381 //     ALU    : S4(2);     // any 2 alus
5382 //     MEM    : S3(2);  // Both mems
5383 // %}
5384 
5385 // Integer Store to Memory
5386 pipe_class ialu_mem_imm(memory mem)
5387 %{
5388     single_instruction;
5389     mem    : S3(read);
5390     D0     : S0;        // big decoder only
5391     ALU    : S4;        // any alu
5392     MEM    : S3;
5393 %}
5394 
5395 // Integer ALU0 reg-reg operation
5396 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5397 %{
5398     single_instruction;
5399     dst    : S4(write);
5400     src    : S3(read);
5401     D0     : S0;        // Big decoder only
5402     ALU0   : S3;        // only alu0
5403 %}
5404 
5405 // Integer ALU0 reg-mem operation
5406 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5407 %{
5408     single_instruction;
5409     dst    : S5(write);
5410     mem    : S3(read);
5411     D0     : S0;        // big decoder only
5412     ALU0   : S4;        // ALU0 only
5413     MEM    : S3;        // any mem
5414 %}
5415 
5416 // Integer ALU reg-reg operation
5417 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5418 %{
5419     single_instruction;
5420     cr     : S4(write);
5421     src1   : S3(read);
5422     src2   : S3(read);
5423     DECODE : S0;        // any decoder
5424     ALU    : S3;        // any alu
5425 %}
5426 
5427 // Integer ALU reg-imm operation
5428 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5429 %{
5430     single_instruction;
5431     cr     : S4(write);
5432     src1   : S3(read);
5433     DECODE : S0;        // any decoder
5434     ALU    : S3;        // any alu
5435 %}
5436 
5437 // Integer ALU reg-mem operation
5438 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5439 %{
5440     single_instruction;
5441     cr     : S4(write);
5442     src1   : S3(read);
5443     src2   : S3(read);
5444     D0     : S0;        // big decoder only
5445     ALU    : S4;        // any alu
5446     MEM    : S3;
5447 %}
5448 
5449 // Conditional move reg-reg
5450 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5451 %{
5452     instruction_count(4);
5453     y      : S4(read);
5454     q      : S3(read);
5455     p      : S3(read);
5456     DECODE : S0(4);     // any decoder
5457 %}
5458 
5459 // Conditional move reg-reg
5460 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5461 %{
5462     single_instruction;
5463     dst    : S4(write);
5464     src    : S3(read);
5465     cr     : S3(read);
5466     DECODE : S0;        // any decoder
5467 %}
5468 
5469 // Conditional move reg-mem
5470 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5471 %{
5472     single_instruction;
5473     dst    : S4(write);
5474     src    : S3(read);
5475     cr     : S3(read);
5476     DECODE : S0;        // any decoder
5477     MEM    : S3;
5478 %}
5479 
5480 // Conditional move reg-reg long
5481 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5482 %{
5483     single_instruction;
5484     dst    : S4(write);
5485     src    : S3(read);
5486     cr     : S3(read);
5487     DECODE : S0(2);     // any 2 decoders
5488 %}
5489 
5490 // XXX
5491 // // Conditional move double reg-reg
5492 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5493 // %{
5494 //     single_instruction;
5495 //     dst    : S4(write);
5496 //     src    : S3(read);
5497 //     cr     : S3(read);
5498 //     DECODE : S0;     // any decoder
5499 // %}
5500 
5501 // Float reg-reg operation
5502 pipe_class fpu_reg(regD dst)
5503 %{
5504     instruction_count(2);
5505     dst    : S3(read);
5506     DECODE : S0(2);     // any 2 decoders
5507     FPU    : S3;
5508 %}
5509 
5510 // Float reg-reg operation
5511 pipe_class fpu_reg_reg(regD dst, regD src)
5512 %{
5513     instruction_count(2);
5514     dst    : S4(write);
5515     src    : S3(read);
5516     DECODE : S0(2);     // any 2 decoders
5517     FPU    : S3;
5518 %}
5519 
5520 // Float reg-reg operation
5521 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5522 %{
5523     instruction_count(3);
5524     dst    : S4(write);
5525     src1   : S3(read);
5526     src2   : S3(read);
5527     DECODE : S0(3);     // any 3 decoders
5528     FPU    : S3(2);
5529 %}
5530 
5531 // Float reg-reg operation
5532 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5533 %{
5534     instruction_count(4);
5535     dst    : S4(write);
5536     src1   : S3(read);
5537     src2   : S3(read);
5538     src3   : S3(read);
5539     DECODE : S0(4);     // any 3 decoders
5540     FPU    : S3(2);
5541 %}
5542 
5543 // Float reg-reg operation
5544 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5545 %{
5546     instruction_count(4);
5547     dst    : S4(write);
5548     src1   : S3(read);
5549     src2   : S3(read);
5550     src3   : S3(read);
5551     DECODE : S1(3);     // any 3 decoders
5552     D0     : S0;        // Big decoder only
5553     FPU    : S3(2);
5554     MEM    : S3;
5555 %}
5556 
5557 // Float reg-mem operation
5558 pipe_class fpu_reg_mem(regD dst, memory mem)
5559 %{
5560     instruction_count(2);
5561     dst    : S5(write);
5562     mem    : S3(read);
5563     D0     : S0;        // big decoder only
5564     DECODE : S1;        // any decoder for FPU POP
5565     FPU    : S4;
5566     MEM    : S3;        // any mem
5567 %}
5568 
5569 // Float reg-mem operation
5570 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5571 %{
5572     instruction_count(3);
5573     dst    : S5(write);
5574     src1   : S3(read);
5575     mem    : S3(read);
5576     D0     : S0;        // big decoder only
5577     DECODE : S1(2);     // any decoder for FPU POP
5578     FPU    : S4;
5579     MEM    : S3;        // any mem
5580 %}
5581 
5582 // Float mem-reg operation
5583 pipe_class fpu_mem_reg(memory mem, regD src)
5584 %{
5585     instruction_count(2);
5586     src    : S5(read);
5587     mem    : S3(read);
5588     DECODE : S0;        // any decoder for FPU PUSH
5589     D0     : S1;        // big decoder only
5590     FPU    : S4;
5591     MEM    : S3;        // any mem
5592 %}
5593 
5594 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5595 %{
5596     instruction_count(3);
5597     src1   : S3(read);
5598     src2   : S3(read);
5599     mem    : S3(read);
5600     DECODE : S0(2);     // any decoder for FPU PUSH
5601     D0     : S1;        // big decoder only
5602     FPU    : S4;
5603     MEM    : S3;        // any mem
5604 %}
5605 
5606 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5607 %{
5608     instruction_count(3);
5609     src1   : S3(read);
5610     src2   : S3(read);
5611     mem    : S4(read);
5612     DECODE : S0;        // any decoder for FPU PUSH
5613     D0     : S0(2);     // big decoder only
5614     FPU    : S4;
5615     MEM    : S3(2);     // any mem
5616 %}
5617 
5618 pipe_class fpu_mem_mem(memory dst, memory src1)
5619 %{
5620     instruction_count(2);
5621     src1   : S3(read);
5622     dst    : S4(read);
5623     D0     : S0(2);     // big decoder only
5624     MEM    : S3(2);     // any mem
5625 %}
5626 
5627 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5628 %{
5629     instruction_count(3);
5630     src1   : S3(read);
5631     src2   : S3(read);
5632     dst    : S4(read);
5633     D0     : S0(3);     // big decoder only
5634     FPU    : S4;
5635     MEM    : S3(3);     // any mem
5636 %}
5637 
5638 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5639 %{
5640     instruction_count(3);
5641     src1   : S4(read);
5642     mem    : S4(read);
5643     DECODE : S0;        // any decoder for FPU PUSH
5644     D0     : S0(2);     // big decoder only
5645     FPU    : S4;
5646     MEM    : S3(2);     // any mem
5647 %}
5648 
5649 // Float load constant
5650 pipe_class fpu_reg_con(regD dst)
5651 %{
5652     instruction_count(2);
5653     dst    : S5(write);
5654     D0     : S0;        // big decoder only for the load
5655     DECODE : S1;        // any decoder for FPU POP
5656     FPU    : S4;
5657     MEM    : S3;        // any mem
5658 %}
5659 
5660 // Float load constant
5661 pipe_class fpu_reg_reg_con(regD dst, regD src)
5662 %{
5663     instruction_count(3);
5664     dst    : S5(write);
5665     src    : S3(read);
5666     D0     : S0;        // big decoder only for the load
5667     DECODE : S1(2);     // any decoder for FPU POP
5668     FPU    : S4;
5669     MEM    : S3;        // any mem
5670 %}
5671 
5672 // UnConditional branch
5673 pipe_class pipe_jmp(label labl)
5674 %{
5675     single_instruction;
5676     BR   : S3;
5677 %}
5678 
5679 // Conditional branch
5680 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5681 %{
5682     single_instruction;
5683     cr    : S1(read);
5684     BR    : S3;
5685 %}
5686 
5687 // Allocation idiom
5688 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5689 %{
5690     instruction_count(1); force_serialization;
5691     fixed_latency(6);
5692     heap_ptr : S3(read);
5693     DECODE   : S0(3);
5694     D0       : S2;
5695     MEM      : S3;
5696     ALU      : S3(2);
5697     dst      : S5(write);
5698     BR       : S5;
5699 %}
5700 
5701 // Generic big/slow expanded idiom
5702 pipe_class pipe_slow()
5703 %{
5704     instruction_count(10); multiple_bundles; force_serialization;
5705     fixed_latency(100);
5706     D0  : S0(2);
5707     MEM : S3(2);
5708 %}
5709 
5710 // The real do-nothing guy
5711 pipe_class empty()
5712 %{
5713     instruction_count(0);
5714 %}
5715 
5716 // Define the class for the Nop node
5717 define
5718 %{
5719    MachNop = empty;
5720 %}
5721 
5722 %}
5723 
5724 //----------INSTRUCTIONS-------------------------------------------------------
5725 //
5726 // match      -- States which machine-independent subtree may be replaced
5727 //               by this instruction.
5728 // ins_cost   -- The estimated cost of this instruction is used by instruction
5729 //               selection to identify a minimum cost tree of machine
5730 //               instructions that matches a tree of machine-independent
5731 //               instructions.
5732 // format     -- A string providing the disassembly for this instruction.
5733 //               The value of an instruction's operand may be inserted
5734 //               by referring to it with a '$' prefix.
5735 // opcode     -- Three instruction opcodes may be provided.  These are referred
5736 //               to within an encode class as $primary, $secondary, and $tertiary
5737 //               rrspectively.  The primary opcode is commonly used to
5738 //               indicate the type of machine instruction, while secondary
5739 //               and tertiary are often used for prefix options or addressing
5740 //               modes.
5741 // ins_encode -- A list of encode classes with parameters. The encode class
5742 //               name must have been defined in an 'enc_class' specification
5743 //               in the encode section of the architecture description.
5744 
5745 
5746 //----------Load/Store/Move Instructions---------------------------------------
5747 //----------Load Instructions--------------------------------------------------
5748 
5749 // Load Byte (8 bit signed)
5750 instruct loadB(rRegI dst, memory mem)
5751 %{
5752   match(Set dst (LoadB mem));
5753 
5754   ins_cost(125);
5755   format %{ "movsbl  $dst, $mem\t# byte" %}
5756 
5757   ins_encode %{
5758     __ movsbl($dst$$Register, $mem$$Address);
5759   %}
5760 
5761   ins_pipe(ialu_reg_mem);
5762 %}
5763 
5764 // Load Byte (8 bit signed) into Long Register
5765 instruct loadB2L(rRegL dst, memory mem)
5766 %{
5767   match(Set dst (ConvI2L (LoadB mem)));
5768 
5769   ins_cost(125);
5770   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5771 
5772   ins_encode %{
5773     __ movsbq($dst$$Register, $mem$$Address);
5774   %}
5775 
5776   ins_pipe(ialu_reg_mem);
5777 %}
5778 
5779 // Load Unsigned Byte (8 bit UNsigned)
5780 instruct loadUB(rRegI dst, memory mem)
5781 %{
5782   match(Set dst (LoadUB mem));
5783 
5784   ins_cost(125);
5785   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5786 
5787   ins_encode %{
5788     __ movzbl($dst$$Register, $mem$$Address);
5789   %}
5790 
5791   ins_pipe(ialu_reg_mem);
5792 %}
5793 
5794 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5795 instruct loadUB2L(rRegL dst, memory mem)
5796 %{
5797   match(Set dst (ConvI2L (LoadUB mem)));
5798 
5799   ins_cost(125);
5800   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5801 
5802   ins_encode %{
5803     __ movzbq($dst$$Register, $mem$$Address);
5804   %}
5805 
5806   ins_pipe(ialu_reg_mem);
5807 %}
5808 
5809 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5810 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5811   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5812   effect(KILL cr);
5813 
5814   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5815             "andl    $dst, $mask" %}
5816   ins_encode %{
5817     Register Rdst = $dst$$Register;
5818     __ movzbq(Rdst, $mem$$Address);
5819     __ andl(Rdst, $mask$$constant);
5820   %}
5821   ins_pipe(ialu_reg_mem);
5822 %}
5823 
5824 // Load Short (16 bit signed)
5825 instruct loadS(rRegI dst, memory mem)
5826 %{
5827   match(Set dst (LoadS mem));
5828 
5829   ins_cost(125);
5830   format %{ "movswl $dst, $mem\t# short" %}
5831 
5832   ins_encode %{
5833     __ movswl($dst$$Register, $mem$$Address);
5834   %}
5835 
5836   ins_pipe(ialu_reg_mem);
5837 %}
5838 
5839 // Load Short (16 bit signed) to Byte (8 bit signed)
5840 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5841   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5842 
5843   ins_cost(125);
5844   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5845   ins_encode %{
5846     __ movsbl($dst$$Register, $mem$$Address);
5847   %}
5848   ins_pipe(ialu_reg_mem);
5849 %}
5850 
5851 // Load Short (16 bit signed) into Long Register
5852 instruct loadS2L(rRegL dst, memory mem)
5853 %{
5854   match(Set dst (ConvI2L (LoadS mem)));
5855 
5856   ins_cost(125);
5857   format %{ "movswq $dst, $mem\t# short -> long" %}
5858 
5859   ins_encode %{
5860     __ movswq($dst$$Register, $mem$$Address);
5861   %}
5862 
5863   ins_pipe(ialu_reg_mem);
5864 %}
5865 
5866 // Load Unsigned Short/Char (16 bit UNsigned)
5867 instruct loadUS(rRegI dst, memory mem)
5868 %{
5869   match(Set dst (LoadUS mem));
5870 
5871   ins_cost(125);
5872   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5873 
5874   ins_encode %{
5875     __ movzwl($dst$$Register, $mem$$Address);
5876   %}
5877 
5878   ins_pipe(ialu_reg_mem);
5879 %}
5880 
5881 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5882 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5883   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5884 
5885   ins_cost(125);
5886   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5887   ins_encode %{
5888     __ movsbl($dst$$Register, $mem$$Address);
5889   %}
5890   ins_pipe(ialu_reg_mem);
5891 %}
5892 
5893 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5894 instruct loadUS2L(rRegL dst, memory mem)
5895 %{
5896   match(Set dst (ConvI2L (LoadUS mem)));
5897 
5898   ins_cost(125);
5899   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5900 
5901   ins_encode %{
5902     __ movzwq($dst$$Register, $mem$$Address);
5903   %}
5904 
5905   ins_pipe(ialu_reg_mem);
5906 %}
5907 
5908 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5909 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5910   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5911 
5912   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5913   ins_encode %{
5914     __ movzbq($dst$$Register, $mem$$Address);
5915   %}
5916   ins_pipe(ialu_reg_mem);
5917 %}
5918 
5919 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5920 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5921   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5922   effect(KILL cr);
5923 
5924   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5925             "andl    $dst, $mask" %}
5926   ins_encode %{
5927     Register Rdst = $dst$$Register;
5928     __ movzwq(Rdst, $mem$$Address);
5929     __ andl(Rdst, $mask$$constant);
5930   %}
5931   ins_pipe(ialu_reg_mem);
5932 %}
5933 
5934 // Load Integer
5935 instruct loadI(rRegI dst, memory mem)
5936 %{
5937   match(Set dst (LoadI mem));
5938 
5939   ins_cost(125);
5940   format %{ "movl    $dst, $mem\t# int" %}
5941 
5942   ins_encode %{
5943     __ movl($dst$$Register, $mem$$Address);
5944   %}
5945 
5946   ins_pipe(ialu_reg_mem);
5947 %}
5948 
5949 // Load Integer (32 bit signed) to Byte (8 bit signed)
5950 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5951   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5952 
5953   ins_cost(125);
5954   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5955   ins_encode %{
5956     __ movsbl($dst$$Register, $mem$$Address);
5957   %}
5958   ins_pipe(ialu_reg_mem);
5959 %}
5960 
5961 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5962 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5963   match(Set dst (AndI (LoadI mem) mask));
5964 
5965   ins_cost(125);
5966   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5967   ins_encode %{
5968     __ movzbl($dst$$Register, $mem$$Address);
5969   %}
5970   ins_pipe(ialu_reg_mem);
5971 %}
5972 
5973 // Load Integer (32 bit signed) to Short (16 bit signed)
5974 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5975   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5976 
5977   ins_cost(125);
5978   format %{ "movswl  $dst, $mem\t# int -> short" %}
5979   ins_encode %{
5980     __ movswl($dst$$Register, $mem$$Address);
5981   %}
5982   ins_pipe(ialu_reg_mem);
5983 %}
5984 
5985 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5986 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5987   match(Set dst (AndI (LoadI mem) mask));
5988 
5989   ins_cost(125);
5990   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5991   ins_encode %{
5992     __ movzwl($dst$$Register, $mem$$Address);
5993   %}
5994   ins_pipe(ialu_reg_mem);
5995 %}
5996 
5997 // Load Integer into Long Register
5998 instruct loadI2L(rRegL dst, memory mem)
5999 %{
6000   match(Set dst (ConvI2L (LoadI mem)));
6001 
6002   ins_cost(125);
6003   format %{ "movslq  $dst, $mem\t# int -> long" %}
6004 
6005   ins_encode %{
6006     __ movslq($dst$$Register, $mem$$Address);
6007   %}
6008 
6009   ins_pipe(ialu_reg_mem);
6010 %}
6011 
6012 // Load Integer with mask 0xFF into Long Register
6013 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6014   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6015 
6016   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6017   ins_encode %{
6018     __ movzbq($dst$$Register, $mem$$Address);
6019   %}
6020   ins_pipe(ialu_reg_mem);
6021 %}
6022 
6023 // Load Integer with mask 0xFFFF into Long Register
6024 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6025   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6026 
6027   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6028   ins_encode %{
6029     __ movzwq($dst$$Register, $mem$$Address);
6030   %}
6031   ins_pipe(ialu_reg_mem);
6032 %}
6033 
6034 // Load Integer with a 32-bit mask into Long Register
6035 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6036   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6037   effect(KILL cr);
6038 
6039   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6040             "andl    $dst, $mask" %}
6041   ins_encode %{
6042     Register Rdst = $dst$$Register;
6043     __ movl(Rdst, $mem$$Address);
6044     __ andl(Rdst, $mask$$constant);
6045   %}
6046   ins_pipe(ialu_reg_mem);
6047 %}
6048 
6049 // Load Unsigned Integer into Long Register
6050 instruct loadUI2L(rRegL dst, memory mem)
6051 %{
6052   match(Set dst (LoadUI2L mem));
6053 
6054   ins_cost(125);
6055   format %{ "movl    $dst, $mem\t# uint -> long" %}
6056 
6057   ins_encode %{
6058     __ movl($dst$$Register, $mem$$Address);
6059   %}
6060 
6061   ins_pipe(ialu_reg_mem);
6062 %}
6063 
6064 // Load Long
6065 instruct loadL(rRegL dst, memory mem)
6066 %{
6067   match(Set dst (LoadL mem));
6068 
6069   ins_cost(125);
6070   format %{ "movq    $dst, $mem\t# long" %}
6071 
6072   ins_encode %{
6073     __ movq($dst$$Register, $mem$$Address);
6074   %}
6075 
6076   ins_pipe(ialu_reg_mem); // XXX
6077 %}
6078 
6079 // Load Range
6080 instruct loadRange(rRegI dst, memory mem)
6081 %{
6082   match(Set dst (LoadRange mem));
6083 
6084   ins_cost(125); // XXX
6085   format %{ "movl    $dst, $mem\t# range" %}
6086   opcode(0x8B);
6087   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6088   ins_pipe(ialu_reg_mem);
6089 %}
6090 
6091 // Load Pointer
6092 instruct loadP(rRegP dst, memory mem)
6093 %{
6094   match(Set dst (LoadP mem));
6095 
6096   ins_cost(125); // XXX
6097   format %{ "movq    $dst, $mem\t# ptr" %}
6098   opcode(0x8B);
6099   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6100   ins_pipe(ialu_reg_mem); // XXX
6101 %}
6102 
6103 // Load Compressed Pointer
6104 instruct loadN(rRegN dst, memory mem)
6105 %{
6106    match(Set dst (LoadN mem));
6107 
6108    ins_cost(125); // XXX
6109    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6110    ins_encode %{
6111      __ movl($dst$$Register, $mem$$Address);
6112    %}
6113    ins_pipe(ialu_reg_mem); // XXX
6114 %}
6115 
6116 
6117 // Load Klass Pointer
6118 instruct loadKlass(rRegP dst, memory mem)
6119 %{
6120   match(Set dst (LoadKlass mem));
6121 
6122   ins_cost(125); // XXX
6123   format %{ "movq    $dst, $mem\t# class" %}
6124   opcode(0x8B);
6125   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6126   ins_pipe(ialu_reg_mem); // XXX
6127 %}
6128 
6129 // Load narrow Klass Pointer
6130 instruct loadNKlass(rRegN dst, memory mem)
6131 %{
6132   match(Set dst (LoadNKlass mem));
6133 
6134   ins_cost(125); // XXX
6135   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6136   ins_encode %{
6137     __ movl($dst$$Register, $mem$$Address);
6138   %}
6139   ins_pipe(ialu_reg_mem); // XXX
6140 %}
6141 
6142 // Load Float
6143 instruct loadF(regF dst, memory mem)
6144 %{
6145   match(Set dst (LoadF mem));
6146 
6147   ins_cost(145); // XXX
6148   format %{ "movss   $dst, $mem\t# float" %}
6149   opcode(0xF3, 0x0F, 0x10);
6150   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6151   ins_pipe(pipe_slow); // XXX
6152 %}
6153 
6154 // Load Double
6155 instruct loadD_partial(regD dst, memory mem)
6156 %{
6157   predicate(!UseXmmLoadAndClearUpper);
6158   match(Set dst (LoadD mem));
6159 
6160   ins_cost(145); // XXX
6161   format %{ "movlpd  $dst, $mem\t# double" %}
6162   opcode(0x66, 0x0F, 0x12);
6163   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6164   ins_pipe(pipe_slow); // XXX
6165 %}
6166 
6167 instruct loadD(regD dst, memory mem)
6168 %{
6169   predicate(UseXmmLoadAndClearUpper);
6170   match(Set dst (LoadD mem));
6171 
6172   ins_cost(145); // XXX
6173   format %{ "movsd   $dst, $mem\t# double" %}
6174   opcode(0xF2, 0x0F, 0x10);
6175   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6176   ins_pipe(pipe_slow); // XXX
6177 %}
6178 
6179 // Load Aligned Packed Byte to XMM register
6180 instruct loadA8B(regD dst, memory mem) %{
6181   match(Set dst (Load8B mem));
6182   ins_cost(125);
6183   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6184   ins_encode( movq_ld(dst, mem));
6185   ins_pipe( pipe_slow );
6186 %}
6187 
6188 // Load Aligned Packed Short to XMM register
6189 instruct loadA4S(regD dst, memory mem) %{
6190   match(Set dst (Load4S mem));
6191   ins_cost(125);
6192   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6193   ins_encode( movq_ld(dst, mem));
6194   ins_pipe( pipe_slow );
6195 %}
6196 
6197 // Load Aligned Packed Char to XMM register
6198 instruct loadA4C(regD dst, memory mem) %{
6199   match(Set dst (Load4C mem));
6200   ins_cost(125);
6201   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6202   ins_encode( movq_ld(dst, mem));
6203   ins_pipe( pipe_slow );
6204 %}
6205 
6206 // Load Aligned Packed Integer to XMM register
6207 instruct load2IU(regD dst, memory mem) %{
6208   match(Set dst (Load2I mem));
6209   ins_cost(125);
6210   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6211   ins_encode( movq_ld(dst, mem));
6212   ins_pipe( pipe_slow );
6213 %}
6214 
6215 // Load Aligned Packed Single to XMM
6216 instruct loadA2F(regD dst, memory mem) %{
6217   match(Set dst (Load2F mem));
6218   ins_cost(145);
6219   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6220   ins_encode( movq_ld(dst, mem));
6221   ins_pipe( pipe_slow );
6222 %}
6223 
6224 // Load Effective Address
6225 instruct leaP8(rRegP dst, indOffset8 mem)
6226 %{
6227   match(Set dst mem);
6228 
6229   ins_cost(110); // XXX
6230   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6231   opcode(0x8D);
6232   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6233   ins_pipe(ialu_reg_reg_fat);
6234 %}
6235 
6236 instruct leaP32(rRegP dst, indOffset32 mem)
6237 %{
6238   match(Set dst mem);
6239 
6240   ins_cost(110);
6241   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6242   opcode(0x8D);
6243   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6244   ins_pipe(ialu_reg_reg_fat);
6245 %}
6246 
6247 // instruct leaPIdx(rRegP dst, indIndex mem)
6248 // %{
6249 //   match(Set dst mem);
6250 
6251 //   ins_cost(110);
6252 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6253 //   opcode(0x8D);
6254 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6255 //   ins_pipe(ialu_reg_reg_fat);
6256 // %}
6257 
6258 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6259 %{
6260   match(Set dst mem);
6261 
6262   ins_cost(110);
6263   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6264   opcode(0x8D);
6265   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6266   ins_pipe(ialu_reg_reg_fat);
6267 %}
6268 
6269 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6270 %{
6271   match(Set dst mem);
6272 
6273   ins_cost(110);
6274   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6275   opcode(0x8D);
6276   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6277   ins_pipe(ialu_reg_reg_fat);
6278 %}
6279 
6280 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6281 %{
6282   match(Set dst mem);
6283 
6284   ins_cost(110);
6285   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6286   opcode(0x8D);
6287   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6288   ins_pipe(ialu_reg_reg_fat);
6289 %}
6290 
6291 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6292 %{
6293   match(Set dst mem);
6294 
6295   ins_cost(110);
6296   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6297   opcode(0x8D);
6298   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6299   ins_pipe(ialu_reg_reg_fat);
6300 %}
6301 
6302 // Load Effective Address which uses Narrow (32-bits) oop
6303 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6304 %{
6305   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6306   match(Set dst mem);
6307 
6308   ins_cost(110);
6309   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6310   opcode(0x8D);
6311   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6312   ins_pipe(ialu_reg_reg_fat);
6313 %}
6314 
6315 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6316 %{
6317   predicate(Universe::narrow_oop_shift() == 0);
6318   match(Set dst mem);
6319 
6320   ins_cost(110); // XXX
6321   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6322   opcode(0x8D);
6323   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6324   ins_pipe(ialu_reg_reg_fat);
6325 %}
6326 
6327 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6328 %{
6329   predicate(Universe::narrow_oop_shift() == 0);
6330   match(Set dst mem);
6331 
6332   ins_cost(110);
6333   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6334   opcode(0x8D);
6335   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6336   ins_pipe(ialu_reg_reg_fat);
6337 %}
6338 
6339 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6340 %{
6341   predicate(Universe::narrow_oop_shift() == 0);
6342   match(Set dst mem);
6343 
6344   ins_cost(110);
6345   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6346   opcode(0x8D);
6347   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6348   ins_pipe(ialu_reg_reg_fat);
6349 %}
6350 
6351 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6352 %{
6353   predicate(Universe::narrow_oop_shift() == 0);
6354   match(Set dst mem);
6355 
6356   ins_cost(110);
6357   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6358   opcode(0x8D);
6359   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6360   ins_pipe(ialu_reg_reg_fat);
6361 %}
6362 
6363 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6364 %{
6365   predicate(Universe::narrow_oop_shift() == 0);
6366   match(Set dst mem);
6367 
6368   ins_cost(110);
6369   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6370   opcode(0x8D);
6371   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6372   ins_pipe(ialu_reg_reg_fat);
6373 %}
6374 
6375 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6376 %{
6377   predicate(Universe::narrow_oop_shift() == 0);
6378   match(Set dst mem);
6379 
6380   ins_cost(110);
6381   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6382   opcode(0x8D);
6383   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6384   ins_pipe(ialu_reg_reg_fat);
6385 %}
6386 
6387 instruct loadConI(rRegI dst, immI src)
6388 %{
6389   match(Set dst src);
6390 
6391   format %{ "movl    $dst, $src\t# int" %}
6392   ins_encode(load_immI(dst, src));
6393   ins_pipe(ialu_reg_fat); // XXX
6394 %}
6395 
6396 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6397 %{
6398   match(Set dst src);
6399   effect(KILL cr);
6400 
6401   ins_cost(50);
6402   format %{ "xorl    $dst, $dst\t# int" %}
6403   opcode(0x33); /* + rd */
6404   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6405   ins_pipe(ialu_reg);
6406 %}
6407 
6408 instruct loadConL(rRegL dst, immL src)
6409 %{
6410   match(Set dst src);
6411 
6412   ins_cost(150);
6413   format %{ "movq    $dst, $src\t# long" %}
6414   ins_encode(load_immL(dst, src));
6415   ins_pipe(ialu_reg);
6416 %}
6417 
6418 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6419 %{
6420   match(Set dst src);
6421   effect(KILL cr);
6422 
6423   ins_cost(50);
6424   format %{ "xorl    $dst, $dst\t# long" %}
6425   opcode(0x33); /* + rd */
6426   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6427   ins_pipe(ialu_reg); // XXX
6428 %}
6429 
6430 instruct loadConUL32(rRegL dst, immUL32 src)
6431 %{
6432   match(Set dst src);
6433 
6434   ins_cost(60);
6435   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6436   ins_encode(load_immUL32(dst, src));
6437   ins_pipe(ialu_reg);
6438 %}
6439 
6440 instruct loadConL32(rRegL dst, immL32 src)
6441 %{
6442   match(Set dst src);
6443 
6444   ins_cost(70);
6445   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6446   ins_encode(load_immL32(dst, src));
6447   ins_pipe(ialu_reg);
6448 %}
6449 
6450 instruct loadConP(rRegP dst, immP con) %{
6451   match(Set dst con);
6452 
6453   format %{ "movq    $dst, $con\t# ptr" %}
6454   ins_encode(load_immP(dst, con));
6455   ins_pipe(ialu_reg_fat); // XXX
6456 %}
6457 
6458 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6459 %{
6460   match(Set dst src);
6461   effect(KILL cr);
6462 
6463   ins_cost(50);
6464   format %{ "xorl    $dst, $dst\t# ptr" %}
6465   opcode(0x33); /* + rd */
6466   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6467   ins_pipe(ialu_reg);
6468 %}
6469 
6470 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6471   match(Set dst src);
6472   format %{ "movq    $dst, $src\t!ptr" %}
6473   ins_encode %{
6474     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6475     __ lea($dst$$Register, polling_page);
6476   %}
6477   ins_pipe(ialu_reg_fat);
6478 %}
6479 
6480 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6481 %{
6482   match(Set dst src);
6483   effect(KILL cr);
6484 
6485   ins_cost(60);
6486   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6487   ins_encode(load_immP31(dst, src));
6488   ins_pipe(ialu_reg);
6489 %}
6490 
6491 instruct loadConF(regF dst, immF con) %{
6492   match(Set dst con);
6493   ins_cost(125);
6494   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6495   ins_encode %{
6496     __ movflt($dst$$XMMRegister, $constantaddress($con));
6497   %}
6498   ins_pipe(pipe_slow);
6499 %}
6500 
6501 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6502   match(Set dst src);
6503   effect(KILL cr);
6504   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6505   ins_encode %{
6506     __ xorq($dst$$Register, $dst$$Register);
6507   %}
6508   ins_pipe(ialu_reg);
6509 %}
6510 
6511 instruct loadConN(rRegN dst, immN src) %{
6512   match(Set dst src);
6513 
6514   ins_cost(125);
6515   format %{ "movl    $dst, $src\t# compressed ptr" %}
6516   ins_encode %{
6517     address con = (address)$src$$constant;
6518     if (con == NULL) {
6519       ShouldNotReachHere();
6520     } else {
6521       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6522     }
6523   %}
6524   ins_pipe(ialu_reg_fat); // XXX
6525 %}
6526 
6527 instruct loadConF0(regF dst, immF0 src)
6528 %{
6529   match(Set dst src);
6530   ins_cost(100);
6531 
6532   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6533   opcode(0x0F, 0x57);
6534   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6535   ins_pipe(pipe_slow);
6536 %}
6537 
6538 // Use the same format since predicate() can not be used here.
6539 instruct loadConD(regD dst, immD con) %{
6540   match(Set dst con);
6541   ins_cost(125);
6542   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6543   ins_encode %{
6544     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6545   %}
6546   ins_pipe(pipe_slow);
6547 %}
6548 
6549 instruct loadConD0(regD dst, immD0 src)
6550 %{
6551   match(Set dst src);
6552   ins_cost(100);
6553 
6554   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6555   opcode(0x66, 0x0F, 0x57);
6556   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6557   ins_pipe(pipe_slow);
6558 %}
6559 
6560 instruct loadSSI(rRegI dst, stackSlotI src)
6561 %{
6562   match(Set dst src);
6563 
6564   ins_cost(125);
6565   format %{ "movl    $dst, $src\t# int stk" %}
6566   opcode(0x8B);
6567   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6568   ins_pipe(ialu_reg_mem);
6569 %}
6570 
6571 instruct loadSSL(rRegL dst, stackSlotL src)
6572 %{
6573   match(Set dst src);
6574 
6575   ins_cost(125);
6576   format %{ "movq    $dst, $src\t# long stk" %}
6577   opcode(0x8B);
6578   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6579   ins_pipe(ialu_reg_mem);
6580 %}
6581 
6582 instruct loadSSP(rRegP dst, stackSlotP src)
6583 %{
6584   match(Set dst src);
6585 
6586   ins_cost(125);
6587   format %{ "movq    $dst, $src\t# ptr stk" %}
6588   opcode(0x8B);
6589   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6590   ins_pipe(ialu_reg_mem);
6591 %}
6592 
6593 instruct loadSSF(regF dst, stackSlotF src)
6594 %{
6595   match(Set dst src);
6596 
6597   ins_cost(125);
6598   format %{ "movss   $dst, $src\t# float stk" %}
6599   opcode(0xF3, 0x0F, 0x10);
6600   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6601   ins_pipe(pipe_slow); // XXX
6602 %}
6603 
6604 // Use the same format since predicate() can not be used here.
6605 instruct loadSSD(regD dst, stackSlotD src)
6606 %{
6607   match(Set dst src);
6608 
6609   ins_cost(125);
6610   format %{ "movsd   $dst, $src\t# double stk" %}
6611   ins_encode  %{
6612     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6613   %}
6614   ins_pipe(pipe_slow); // XXX
6615 %}
6616 
6617 // Prefetch instructions.
6618 // Must be safe to execute with invalid address (cannot fault).
6619 
6620 instruct prefetchr( memory mem ) %{
6621   predicate(ReadPrefetchInstr==3);
6622   match(PrefetchRead mem);
6623   ins_cost(125);
6624 
6625   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6626   ins_encode %{
6627     __ prefetchr($mem$$Address);
6628   %}
6629   ins_pipe(ialu_mem);
6630 %}
6631 
6632 instruct prefetchrNTA( memory mem ) %{
6633   predicate(ReadPrefetchInstr==0);
6634   match(PrefetchRead mem);
6635   ins_cost(125);
6636 
6637   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6638   ins_encode %{
6639     __ prefetchnta($mem$$Address);
6640   %}
6641   ins_pipe(ialu_mem);
6642 %}
6643 
6644 instruct prefetchrT0( memory mem ) %{
6645   predicate(ReadPrefetchInstr==1);
6646   match(PrefetchRead mem);
6647   ins_cost(125);
6648 
6649   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6650   ins_encode %{
6651     __ prefetcht0($mem$$Address);
6652   %}
6653   ins_pipe(ialu_mem);
6654 %}
6655 
6656 instruct prefetchrT2( memory mem ) %{
6657   predicate(ReadPrefetchInstr==2);
6658   match(PrefetchRead mem);
6659   ins_cost(125);
6660 
6661   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6662   ins_encode %{
6663     __ prefetcht2($mem$$Address);
6664   %}
6665   ins_pipe(ialu_mem);
6666 %}
6667 
6668 instruct prefetchwNTA( memory mem ) %{
6669   match(PrefetchWrite mem);
6670   ins_cost(125);
6671 
6672   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6673   ins_encode %{
6674     __ prefetchnta($mem$$Address);
6675   %}
6676   ins_pipe(ialu_mem);
6677 %}
6678 
6679 // Prefetch instructions for allocation.
6680 
6681 instruct prefetchAlloc( memory mem ) %{
6682   predicate(AllocatePrefetchInstr==3);
6683   match(PrefetchAllocation mem);
6684   ins_cost(125);
6685 
6686   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
6687   ins_encode %{
6688     __ prefetchw($mem$$Address);
6689   %}
6690   ins_pipe(ialu_mem);
6691 %}
6692 
6693 instruct prefetchAllocNTA( memory mem ) %{
6694   predicate(AllocatePrefetchInstr==0);
6695   match(PrefetchAllocation mem);
6696   ins_cost(125);
6697 
6698   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
6699   ins_encode %{
6700     __ prefetchnta($mem$$Address);
6701   %}
6702   ins_pipe(ialu_mem);
6703 %}
6704 
6705 instruct prefetchAllocT0( memory mem ) %{
6706   predicate(AllocatePrefetchInstr==1);
6707   match(PrefetchAllocation mem);
6708   ins_cost(125);
6709 
6710   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
6711   ins_encode %{
6712     __ prefetcht0($mem$$Address);
6713   %}
6714   ins_pipe(ialu_mem);
6715 %}
6716 
6717 instruct prefetchAllocT2( memory mem ) %{
6718   predicate(AllocatePrefetchInstr==2);
6719   match(PrefetchAllocation mem);
6720   ins_cost(125);
6721 
6722   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
6723   ins_encode %{
6724     __ prefetcht2($mem$$Address);
6725   %}
6726   ins_pipe(ialu_mem);
6727 %}
6728 
6729 //----------Store Instructions-------------------------------------------------
6730 
6731 // Store Byte
6732 instruct storeB(memory mem, rRegI src)
6733 %{
6734   match(Set mem (StoreB mem src));
6735 
6736   ins_cost(125); // XXX
6737   format %{ "movb    $mem, $src\t# byte" %}
6738   opcode(0x88);
6739   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6740   ins_pipe(ialu_mem_reg);
6741 %}
6742 
6743 // Store Char/Short
6744 instruct storeC(memory mem, rRegI src)
6745 %{
6746   match(Set mem (StoreC mem src));
6747 
6748   ins_cost(125); // XXX
6749   format %{ "movw    $mem, $src\t# char/short" %}
6750   opcode(0x89);
6751   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6752   ins_pipe(ialu_mem_reg);
6753 %}
6754 
6755 // Store Integer
6756 instruct storeI(memory mem, rRegI src)
6757 %{
6758   match(Set mem (StoreI mem src));
6759 
6760   ins_cost(125); // XXX
6761   format %{ "movl    $mem, $src\t# int" %}
6762   opcode(0x89);
6763   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6764   ins_pipe(ialu_mem_reg);
6765 %}
6766 
6767 // Store Long
6768 instruct storeL(memory mem, rRegL src)
6769 %{
6770   match(Set mem (StoreL mem src));
6771 
6772   ins_cost(125); // XXX
6773   format %{ "movq    $mem, $src\t# long" %}
6774   opcode(0x89);
6775   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6776   ins_pipe(ialu_mem_reg); // XXX
6777 %}
6778 
6779 // Store Pointer
6780 instruct storeP(memory mem, any_RegP src)
6781 %{
6782   match(Set mem (StoreP mem src));
6783 
6784   ins_cost(125); // XXX
6785   format %{ "movq    $mem, $src\t# ptr" %}
6786   opcode(0x89);
6787   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6788   ins_pipe(ialu_mem_reg);
6789 %}
6790 
6791 instruct storeImmP0(memory mem, immP0 zero)
6792 %{
6793   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6794   match(Set mem (StoreP mem zero));
6795 
6796   ins_cost(125); // XXX
6797   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6798   ins_encode %{
6799     __ movq($mem$$Address, r12);
6800   %}
6801   ins_pipe(ialu_mem_reg);
6802 %}
6803 
6804 // Store NULL Pointer, mark word, or other simple pointer constant.
6805 instruct storeImmP(memory mem, immP31 src)
6806 %{
6807   match(Set mem (StoreP mem src));
6808 
6809   ins_cost(150); // XXX
6810   format %{ "movq    $mem, $src\t# ptr" %}
6811   opcode(0xC7); /* C7 /0 */
6812   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6813   ins_pipe(ialu_mem_imm);
6814 %}
6815 
6816 // Store Compressed Pointer
6817 instruct storeN(memory mem, rRegN src)
6818 %{
6819   match(Set mem (StoreN mem src));
6820 
6821   ins_cost(125); // XXX
6822   format %{ "movl    $mem, $src\t# compressed ptr" %}
6823   ins_encode %{
6824     __ movl($mem$$Address, $src$$Register);
6825   %}
6826   ins_pipe(ialu_mem_reg);
6827 %}
6828 
6829 instruct storeImmN0(memory mem, immN0 zero)
6830 %{
6831   predicate(Universe::narrow_oop_base() == NULL);
6832   match(Set mem (StoreN mem zero));
6833 
6834   ins_cost(125); // XXX
6835   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6836   ins_encode %{
6837     __ movl($mem$$Address, r12);
6838   %}
6839   ins_pipe(ialu_mem_reg);
6840 %}
6841 
6842 instruct storeImmN(memory mem, immN src)
6843 %{
6844   match(Set mem (StoreN mem src));
6845 
6846   ins_cost(150); // XXX
6847   format %{ "movl    $mem, $src\t# compressed ptr" %}
6848   ins_encode %{
6849     address con = (address)$src$$constant;
6850     if (con == NULL) {
6851       __ movl($mem$$Address, (int32_t)0);
6852     } else {
6853       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6854     }
6855   %}
6856   ins_pipe(ialu_mem_imm);
6857 %}
6858 
6859 // Store Integer Immediate
6860 instruct storeImmI0(memory mem, immI0 zero)
6861 %{
6862   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6863   match(Set mem (StoreI mem zero));
6864 
6865   ins_cost(125); // XXX
6866   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6867   ins_encode %{
6868     __ movl($mem$$Address, r12);
6869   %}
6870   ins_pipe(ialu_mem_reg);
6871 %}
6872 
6873 instruct storeImmI(memory mem, immI src)
6874 %{
6875   match(Set mem (StoreI mem src));
6876 
6877   ins_cost(150);
6878   format %{ "movl    $mem, $src\t# int" %}
6879   opcode(0xC7); /* C7 /0 */
6880   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6881   ins_pipe(ialu_mem_imm);
6882 %}
6883 
6884 // Store Long Immediate
6885 instruct storeImmL0(memory mem, immL0 zero)
6886 %{
6887   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6888   match(Set mem (StoreL mem zero));
6889 
6890   ins_cost(125); // XXX
6891   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6892   ins_encode %{
6893     __ movq($mem$$Address, r12);
6894   %}
6895   ins_pipe(ialu_mem_reg);
6896 %}
6897 
6898 instruct storeImmL(memory mem, immL32 src)
6899 %{
6900   match(Set mem (StoreL mem src));
6901 
6902   ins_cost(150);
6903   format %{ "movq    $mem, $src\t# long" %}
6904   opcode(0xC7); /* C7 /0 */
6905   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6906   ins_pipe(ialu_mem_imm);
6907 %}
6908 
6909 // Store Short/Char Immediate
6910 instruct storeImmC0(memory mem, immI0 zero)
6911 %{
6912   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6913   match(Set mem (StoreC mem zero));
6914 
6915   ins_cost(125); // XXX
6916   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6917   ins_encode %{
6918     __ movw($mem$$Address, r12);
6919   %}
6920   ins_pipe(ialu_mem_reg);
6921 %}
6922 
6923 instruct storeImmI16(memory mem, immI16 src)
6924 %{
6925   predicate(UseStoreImmI16);
6926   match(Set mem (StoreC mem src));
6927 
6928   ins_cost(150);
6929   format %{ "movw    $mem, $src\t# short/char" %}
6930   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6931   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6932   ins_pipe(ialu_mem_imm);
6933 %}
6934 
6935 // Store Byte Immediate
6936 instruct storeImmB0(memory mem, immI0 zero)
6937 %{
6938   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6939   match(Set mem (StoreB mem zero));
6940 
6941   ins_cost(125); // XXX
6942   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6943   ins_encode %{
6944     __ movb($mem$$Address, r12);
6945   %}
6946   ins_pipe(ialu_mem_reg);
6947 %}
6948 
6949 instruct storeImmB(memory mem, immI8 src)
6950 %{
6951   match(Set mem (StoreB mem src));
6952 
6953   ins_cost(150); // XXX
6954   format %{ "movb    $mem, $src\t# byte" %}
6955   opcode(0xC6); /* C6 /0 */
6956   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6957   ins_pipe(ialu_mem_imm);
6958 %}
6959 
6960 // Store Aligned Packed Byte XMM register to memory
6961 instruct storeA8B(memory mem, regD src) %{
6962   match(Set mem (Store8B mem src));
6963   ins_cost(145);
6964   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6965   ins_encode( movq_st(mem, src));
6966   ins_pipe( pipe_slow );
6967 %}
6968 
6969 // Store Aligned Packed Char/Short XMM register to memory
6970 instruct storeA4C(memory mem, regD src) %{
6971   match(Set mem (Store4C mem src));
6972   ins_cost(145);
6973   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6974   ins_encode( movq_st(mem, src));
6975   ins_pipe( pipe_slow );
6976 %}
6977 
6978 // Store Aligned Packed Integer XMM register to memory
6979 instruct storeA2I(memory mem, regD src) %{
6980   match(Set mem (Store2I mem src));
6981   ins_cost(145);
6982   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6983   ins_encode( movq_st(mem, src));
6984   ins_pipe( pipe_slow );
6985 %}
6986 
6987 // Store CMS card-mark Immediate
6988 instruct storeImmCM0_reg(memory mem, immI0 zero)
6989 %{
6990   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6991   match(Set mem (StoreCM mem zero));
6992 
6993   ins_cost(125); // XXX
6994   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6995   ins_encode %{
6996     __ movb($mem$$Address, r12);
6997   %}
6998   ins_pipe(ialu_mem_reg);
6999 %}
7000 
7001 instruct storeImmCM0(memory mem, immI0 src)
7002 %{
7003   match(Set mem (StoreCM mem src));
7004 
7005   ins_cost(150); // XXX
7006   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7007   opcode(0xC6); /* C6 /0 */
7008   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7009   ins_pipe(ialu_mem_imm);
7010 %}
7011 
7012 // Store Aligned Packed Single Float XMM register to memory
7013 instruct storeA2F(memory mem, regD src) %{
7014   match(Set mem (Store2F mem src));
7015   ins_cost(145);
7016   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7017   ins_encode( movq_st(mem, src));
7018   ins_pipe( pipe_slow );
7019 %}
7020 
7021 // Store Float
7022 instruct storeF(memory mem, regF src)
7023 %{
7024   match(Set mem (StoreF mem src));
7025 
7026   ins_cost(95); // XXX
7027   format %{ "movss   $mem, $src\t# float" %}
7028   opcode(0xF3, 0x0F, 0x11);
7029   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7030   ins_pipe(pipe_slow); // XXX
7031 %}
7032 
7033 // Store immediate Float value (it is faster than store from XMM register)
7034 instruct storeF0(memory mem, immF0 zero)
7035 %{
7036   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7037   match(Set mem (StoreF mem zero));
7038 
7039   ins_cost(25); // XXX
7040   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7041   ins_encode %{
7042     __ movl($mem$$Address, r12);
7043   %}
7044   ins_pipe(ialu_mem_reg);
7045 %}
7046 
7047 instruct storeF_imm(memory mem, immF src)
7048 %{
7049   match(Set mem (StoreF mem src));
7050 
7051   ins_cost(50);
7052   format %{ "movl    $mem, $src\t# float" %}
7053   opcode(0xC7); /* C7 /0 */
7054   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7055   ins_pipe(ialu_mem_imm);
7056 %}
7057 
7058 // Store Double
7059 instruct storeD(memory mem, regD src)
7060 %{
7061   match(Set mem (StoreD mem src));
7062 
7063   ins_cost(95); // XXX
7064   format %{ "movsd   $mem, $src\t# double" %}
7065   opcode(0xF2, 0x0F, 0x11);
7066   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7067   ins_pipe(pipe_slow); // XXX
7068 %}
7069 
7070 // Store immediate double 0.0 (it is faster than store from XMM register)
7071 instruct storeD0_imm(memory mem, immD0 src)
7072 %{
7073   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7074   match(Set mem (StoreD mem src));
7075 
7076   ins_cost(50);
7077   format %{ "movq    $mem, $src\t# double 0." %}
7078   opcode(0xC7); /* C7 /0 */
7079   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7080   ins_pipe(ialu_mem_imm);
7081 %}
7082 
7083 instruct storeD0(memory mem, immD0 zero)
7084 %{
7085   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7086   match(Set mem (StoreD mem zero));
7087 
7088   ins_cost(25); // XXX
7089   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7090   ins_encode %{
7091     __ movq($mem$$Address, r12);
7092   %}
7093   ins_pipe(ialu_mem_reg);
7094 %}
7095 
7096 instruct storeSSI(stackSlotI dst, rRegI src)
7097 %{
7098   match(Set dst src);
7099 
7100   ins_cost(100);
7101   format %{ "movl    $dst, $src\t# int stk" %}
7102   opcode(0x89);
7103   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7104   ins_pipe( ialu_mem_reg );
7105 %}
7106 
7107 instruct storeSSL(stackSlotL dst, rRegL src)
7108 %{
7109   match(Set dst src);
7110 
7111   ins_cost(100);
7112   format %{ "movq    $dst, $src\t# long stk" %}
7113   opcode(0x89);
7114   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7115   ins_pipe(ialu_mem_reg);
7116 %}
7117 
7118 instruct storeSSP(stackSlotP dst, rRegP src)
7119 %{
7120   match(Set dst src);
7121 
7122   ins_cost(100);
7123   format %{ "movq    $dst, $src\t# ptr stk" %}
7124   opcode(0x89);
7125   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7126   ins_pipe(ialu_mem_reg);
7127 %}
7128 
7129 instruct storeSSF(stackSlotF dst, regF src)
7130 %{
7131   match(Set dst src);
7132 
7133   ins_cost(95); // XXX
7134   format %{ "movss   $dst, $src\t# float stk" %}
7135   opcode(0xF3, 0x0F, 0x11);
7136   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7137   ins_pipe(pipe_slow); // XXX
7138 %}
7139 
7140 instruct storeSSD(stackSlotD dst, regD src)
7141 %{
7142   match(Set dst src);
7143 
7144   ins_cost(95); // XXX
7145   format %{ "movsd   $dst, $src\t# double stk" %}
7146   opcode(0xF2, 0x0F, 0x11);
7147   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7148   ins_pipe(pipe_slow); // XXX
7149 %}
7150 
7151 //----------BSWAP Instructions-------------------------------------------------
7152 instruct bytes_reverse_int(rRegI dst) %{
7153   match(Set dst (ReverseBytesI dst));
7154 
7155   format %{ "bswapl  $dst" %}
7156   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7157   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7158   ins_pipe( ialu_reg );
7159 %}
7160 
7161 instruct bytes_reverse_long(rRegL dst) %{
7162   match(Set dst (ReverseBytesL dst));
7163 
7164   format %{ "bswapq  $dst" %}
7165 
7166   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7167   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7168   ins_pipe( ialu_reg);
7169 %}
7170 
7171 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7172   match(Set dst (ReverseBytesUS dst));
7173 
7174   format %{ "bswapl  $dst\n\t"
7175             "shrl    $dst,16\n\t" %}
7176   ins_encode %{
7177     __ bswapl($dst$$Register);
7178     __ shrl($dst$$Register, 16);
7179   %}
7180   ins_pipe( ialu_reg );
7181 %}
7182 
7183 instruct bytes_reverse_short(rRegI dst) %{
7184   match(Set dst (ReverseBytesS dst));
7185 
7186   format %{ "bswapl  $dst\n\t"
7187             "sar     $dst,16\n\t" %}
7188   ins_encode %{
7189     __ bswapl($dst$$Register);
7190     __ sarl($dst$$Register, 16);
7191   %}
7192   ins_pipe( ialu_reg );
7193 %}
7194 
7195 //---------- Zeros Count Instructions ------------------------------------------
7196 
7197 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7198   predicate(UseCountLeadingZerosInstruction);
7199   match(Set dst (CountLeadingZerosI src));
7200   effect(KILL cr);
7201 
7202   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7203   ins_encode %{
7204     __ lzcntl($dst$$Register, $src$$Register);
7205   %}
7206   ins_pipe(ialu_reg);
7207 %}
7208 
7209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7210   predicate(!UseCountLeadingZerosInstruction);
7211   match(Set dst (CountLeadingZerosI src));
7212   effect(KILL cr);
7213 
7214   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7215             "jnz     skip\n\t"
7216             "movl    $dst, -1\n"
7217       "skip:\n\t"
7218             "negl    $dst\n\t"
7219             "addl    $dst, 31" %}
7220   ins_encode %{
7221     Register Rdst = $dst$$Register;
7222     Register Rsrc = $src$$Register;
7223     Label skip;
7224     __ bsrl(Rdst, Rsrc);
7225     __ jccb(Assembler::notZero, skip);
7226     __ movl(Rdst, -1);
7227     __ bind(skip);
7228     __ negl(Rdst);
7229     __ addl(Rdst, BitsPerInt - 1);
7230   %}
7231   ins_pipe(ialu_reg);
7232 %}
7233 
7234 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7235   predicate(UseCountLeadingZerosInstruction);
7236   match(Set dst (CountLeadingZerosL src));
7237   effect(KILL cr);
7238 
7239   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7240   ins_encode %{
7241     __ lzcntq($dst$$Register, $src$$Register);
7242   %}
7243   ins_pipe(ialu_reg);
7244 %}
7245 
7246 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7247   predicate(!UseCountLeadingZerosInstruction);
7248   match(Set dst (CountLeadingZerosL src));
7249   effect(KILL cr);
7250 
7251   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7252             "jnz     skip\n\t"
7253             "movl    $dst, -1\n"
7254       "skip:\n\t"
7255             "negl    $dst\n\t"
7256             "addl    $dst, 63" %}
7257   ins_encode %{
7258     Register Rdst = $dst$$Register;
7259     Register Rsrc = $src$$Register;
7260     Label skip;
7261     __ bsrq(Rdst, Rsrc);
7262     __ jccb(Assembler::notZero, skip);
7263     __ movl(Rdst, -1);
7264     __ bind(skip);
7265     __ negl(Rdst);
7266     __ addl(Rdst, BitsPerLong - 1);
7267   %}
7268   ins_pipe(ialu_reg);
7269 %}
7270 
7271 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7272   match(Set dst (CountTrailingZerosI src));
7273   effect(KILL cr);
7274 
7275   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7276             "jnz     done\n\t"
7277             "movl    $dst, 32\n"
7278       "done:" %}
7279   ins_encode %{
7280     Register Rdst = $dst$$Register;
7281     Label done;
7282     __ bsfl(Rdst, $src$$Register);
7283     __ jccb(Assembler::notZero, done);
7284     __ movl(Rdst, BitsPerInt);
7285     __ bind(done);
7286   %}
7287   ins_pipe(ialu_reg);
7288 %}
7289 
7290 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7291   match(Set dst (CountTrailingZerosL src));
7292   effect(KILL cr);
7293 
7294   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7295             "jnz     done\n\t"
7296             "movl    $dst, 64\n"
7297       "done:" %}
7298   ins_encode %{
7299     Register Rdst = $dst$$Register;
7300     Label done;
7301     __ bsfq(Rdst, $src$$Register);
7302     __ jccb(Assembler::notZero, done);
7303     __ movl(Rdst, BitsPerLong);
7304     __ bind(done);
7305   %}
7306   ins_pipe(ialu_reg);
7307 %}
7308 
7309 
7310 //---------- Population Count Instructions -------------------------------------
7311 
7312 instruct popCountI(rRegI dst, rRegI src) %{
7313   predicate(UsePopCountInstruction);
7314   match(Set dst (PopCountI src));
7315 
7316   format %{ "popcnt  $dst, $src" %}
7317   ins_encode %{
7318     __ popcntl($dst$$Register, $src$$Register);
7319   %}
7320   ins_pipe(ialu_reg);
7321 %}
7322 
7323 instruct popCountI_mem(rRegI dst, memory mem) %{
7324   predicate(UsePopCountInstruction);
7325   match(Set dst (PopCountI (LoadI mem)));
7326 
7327   format %{ "popcnt  $dst, $mem" %}
7328   ins_encode %{
7329     __ popcntl($dst$$Register, $mem$$Address);
7330   %}
7331   ins_pipe(ialu_reg);
7332 %}
7333 
7334 // Note: Long.bitCount(long) returns an int.
7335 instruct popCountL(rRegI dst, rRegL src) %{
7336   predicate(UsePopCountInstruction);
7337   match(Set dst (PopCountL src));
7338 
7339   format %{ "popcnt  $dst, $src" %}
7340   ins_encode %{
7341     __ popcntq($dst$$Register, $src$$Register);
7342   %}
7343   ins_pipe(ialu_reg);
7344 %}
7345 
7346 // Note: Long.bitCount(long) returns an int.
7347 instruct popCountL_mem(rRegI dst, memory mem) %{
7348   predicate(UsePopCountInstruction);
7349   match(Set dst (PopCountL (LoadL mem)));
7350 
7351   format %{ "popcnt  $dst, $mem" %}
7352   ins_encode %{
7353     __ popcntq($dst$$Register, $mem$$Address);
7354   %}
7355   ins_pipe(ialu_reg);
7356 %}
7357 
7358 
7359 //----------MemBar Instructions-----------------------------------------------
7360 // Memory barrier flavors
7361 
7362 instruct membar_acquire()
7363 %{
7364   match(MemBarAcquire);
7365   ins_cost(0);
7366 
7367   size(0);
7368   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7369   ins_encode();
7370   ins_pipe(empty);
7371 %}
7372 
7373 instruct membar_acquire_lock()
7374 %{
7375   match(MemBarAcquireLock);
7376   ins_cost(0);
7377 
7378   size(0);
7379   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7380   ins_encode();
7381   ins_pipe(empty);
7382 %}
7383 
7384 instruct membar_release()
7385 %{
7386   match(MemBarRelease);
7387   ins_cost(0);
7388 
7389   size(0);
7390   format %{ "MEMBAR-release ! (empty encoding)" %}
7391   ins_encode();
7392   ins_pipe(empty);
7393 %}
7394 
7395 instruct membar_release_lock()
7396 %{
7397   match(MemBarReleaseLock);
7398   ins_cost(0);
7399 
7400   size(0);
7401   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7402   ins_encode();
7403   ins_pipe(empty);
7404 %}
7405 
7406 instruct membar_volatile(rFlagsReg cr) %{
7407   match(MemBarVolatile);
7408   effect(KILL cr);
7409   ins_cost(400);
7410 
7411   format %{
7412     $$template
7413     if (os::is_MP()) {
7414       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7415     } else {
7416       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7417     }
7418   %}
7419   ins_encode %{
7420     __ membar(Assembler::StoreLoad);
7421   %}
7422   ins_pipe(pipe_slow);
7423 %}
7424 
7425 instruct unnecessary_membar_volatile()
7426 %{
7427   match(MemBarVolatile);
7428   predicate(Matcher::post_store_load_barrier(n));
7429   ins_cost(0);
7430 
7431   size(0);
7432   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7433   ins_encode();
7434   ins_pipe(empty);
7435 %}
7436 
7437 //----------Move Instructions--------------------------------------------------
7438 
7439 instruct castX2P(rRegP dst, rRegL src)
7440 %{
7441   match(Set dst (CastX2P src));
7442 
7443   format %{ "movq    $dst, $src\t# long->ptr" %}
7444   ins_encode(enc_copy_wide(dst, src));
7445   ins_pipe(ialu_reg_reg); // XXX
7446 %}
7447 
7448 instruct castP2X(rRegL dst, rRegP src)
7449 %{
7450   match(Set dst (CastP2X src));
7451 
7452   format %{ "movq    $dst, $src\t# ptr -> long" %}
7453   ins_encode(enc_copy_wide(dst, src));
7454   ins_pipe(ialu_reg_reg); // XXX
7455 %}
7456 
7457 
7458 // Convert oop pointer into compressed form
7459 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7460   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7461   match(Set dst (EncodeP src));
7462   effect(KILL cr);
7463   format %{ "encode_heap_oop $dst,$src" %}
7464   ins_encode %{
7465     Register s = $src$$Register;
7466     Register d = $dst$$Register;
7467     if (s != d) {
7468       __ movq(d, s);
7469     }
7470     __ encode_heap_oop(d);
7471   %}
7472   ins_pipe(ialu_reg_long);
7473 %}
7474 
7475 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7476   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7477   match(Set dst (EncodeP src));
7478   effect(KILL cr);
7479   format %{ "encode_heap_oop_not_null $dst,$src" %}
7480   ins_encode %{
7481     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7482   %}
7483   ins_pipe(ialu_reg_long);
7484 %}
7485 
7486 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7487   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7488             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7489   match(Set dst (DecodeN src));
7490   effect(KILL cr);
7491   format %{ "decode_heap_oop $dst,$src" %}
7492   ins_encode %{
7493     Register s = $src$$Register;
7494     Register d = $dst$$Register;
7495     if (s != d) {
7496       __ movq(d, s);
7497     }
7498     __ decode_heap_oop(d);
7499   %}
7500   ins_pipe(ialu_reg_long);
7501 %}
7502 
7503 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7504   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7505             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7506   match(Set dst (DecodeN src));
7507   effect(KILL cr);
7508   format %{ "decode_heap_oop_not_null $dst,$src" %}
7509   ins_encode %{
7510     Register s = $src$$Register;
7511     Register d = $dst$$Register;
7512     if (s != d) {
7513       __ decode_heap_oop_not_null(d, s);
7514     } else {
7515       __ decode_heap_oop_not_null(d);
7516     }
7517   %}
7518   ins_pipe(ialu_reg_long);
7519 %}
7520 
7521 
7522 //----------Conditional Move---------------------------------------------------
7523 // Jump
7524 // dummy instruction for generating temp registers
7525 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7526   match(Jump (LShiftL switch_val shift));
7527   ins_cost(350);
7528   predicate(false);
7529   effect(TEMP dest);
7530 
7531   format %{ "leaq    $dest, [$constantaddress]\n\t"
7532             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7533   ins_encode %{
7534     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7535     // to do that and the compiler is using that register as one it can allocate.
7536     // So we build it all by hand.
7537     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7538     // ArrayAddress dispatch(table, index);
7539     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7540     __ lea($dest$$Register, $constantaddress);
7541     __ jmp(dispatch);
7542   %}
7543   ins_pipe(pipe_jmp);
7544 %}
7545 
7546 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7547   match(Jump (AddL (LShiftL switch_val shift) offset));
7548   ins_cost(350);
7549   effect(TEMP dest);
7550 
7551   format %{ "leaq    $dest, [$constantaddress]\n\t"
7552             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7553   ins_encode %{
7554     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7555     // to do that and the compiler is using that register as one it can allocate.
7556     // So we build it all by hand.
7557     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7558     // ArrayAddress dispatch(table, index);
7559     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7560     __ lea($dest$$Register, $constantaddress);
7561     __ jmp(dispatch);
7562   %}
7563   ins_pipe(pipe_jmp);
7564 %}
7565 
7566 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7567   match(Jump switch_val);
7568   ins_cost(350);
7569   effect(TEMP dest);
7570 
7571   format %{ "leaq    $dest, [$constantaddress]\n\t"
7572             "jmp     [$dest + $switch_val]\n\t" %}
7573   ins_encode %{
7574     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7575     // to do that and the compiler is using that register as one it can allocate.
7576     // So we build it all by hand.
7577     // Address index(noreg, switch_reg, Address::times_1);
7578     // ArrayAddress dispatch(table, index);
7579     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7580     __ lea($dest$$Register, $constantaddress);
7581     __ jmp(dispatch);
7582   %}
7583   ins_pipe(pipe_jmp);
7584 %}
7585 
7586 // Conditional move
7587 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7588 %{
7589   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7590 
7591   ins_cost(200); // XXX
7592   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7593   opcode(0x0F, 0x40);
7594   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7595   ins_pipe(pipe_cmov_reg);
7596 %}
7597 
7598 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7599   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7600 
7601   ins_cost(200); // XXX
7602   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7603   opcode(0x0F, 0x40);
7604   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7605   ins_pipe(pipe_cmov_reg);
7606 %}
7607 
7608 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7609   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7610   ins_cost(200);
7611   expand %{
7612     cmovI_regU(cop, cr, dst, src);
7613   %}
7614 %}
7615 
7616 // Conditional move
7617 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7618   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7619 
7620   ins_cost(250); // XXX
7621   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7622   opcode(0x0F, 0x40);
7623   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7624   ins_pipe(pipe_cmov_mem);
7625 %}
7626 
7627 // Conditional move
7628 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7629 %{
7630   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7631 
7632   ins_cost(250); // XXX
7633   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7634   opcode(0x0F, 0x40);
7635   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7636   ins_pipe(pipe_cmov_mem);
7637 %}
7638 
7639 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7640   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7641   ins_cost(250);
7642   expand %{
7643     cmovI_memU(cop, cr, dst, src);
7644   %}
7645 %}
7646 
7647 // Conditional move
7648 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7649 %{
7650   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7651 
7652   ins_cost(200); // XXX
7653   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7654   opcode(0x0F, 0x40);
7655   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7656   ins_pipe(pipe_cmov_reg);
7657 %}
7658 
7659 // Conditional move
7660 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7661 %{
7662   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7663 
7664   ins_cost(200); // XXX
7665   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7666   opcode(0x0F, 0x40);
7667   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7668   ins_pipe(pipe_cmov_reg);
7669 %}
7670 
7671 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7672   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7673   ins_cost(200);
7674   expand %{
7675     cmovN_regU(cop, cr, dst, src);
7676   %}
7677 %}
7678 
7679 // Conditional move
7680 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7681 %{
7682   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7683 
7684   ins_cost(200); // XXX
7685   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7686   opcode(0x0F, 0x40);
7687   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7688   ins_pipe(pipe_cmov_reg);  // XXX
7689 %}
7690 
7691 // Conditional move
7692 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7693 %{
7694   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7695 
7696   ins_cost(200); // XXX
7697   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7698   opcode(0x0F, 0x40);
7699   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7700   ins_pipe(pipe_cmov_reg); // XXX
7701 %}
7702 
7703 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7704   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7705   ins_cost(200);
7706   expand %{
7707     cmovP_regU(cop, cr, dst, src);
7708   %}
7709 %}
7710 
7711 // DISABLED: Requires the ADLC to emit a bottom_type call that
7712 // correctly meets the two pointer arguments; one is an incoming
7713 // register but the other is a memory operand.  ALSO appears to
7714 // be buggy with implicit null checks.
7715 //
7716 //// Conditional move
7717 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7718 //%{
7719 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7720 //  ins_cost(250);
7721 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7722 //  opcode(0x0F,0x40);
7723 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7724 //  ins_pipe( pipe_cmov_mem );
7725 //%}
7726 //
7727 //// Conditional move
7728 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7729 //%{
7730 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7731 //  ins_cost(250);
7732 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7733 //  opcode(0x0F,0x40);
7734 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7735 //  ins_pipe( pipe_cmov_mem );
7736 //%}
7737 
7738 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7739 %{
7740   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7741 
7742   ins_cost(200); // XXX
7743   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7744   opcode(0x0F, 0x40);
7745   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7746   ins_pipe(pipe_cmov_reg);  // XXX
7747 %}
7748 
7749 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7750 %{
7751   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7752 
7753   ins_cost(200); // XXX
7754   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7755   opcode(0x0F, 0x40);
7756   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7757   ins_pipe(pipe_cmov_mem);  // XXX
7758 %}
7759 
7760 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7761 %{
7762   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7763 
7764   ins_cost(200); // XXX
7765   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7766   opcode(0x0F, 0x40);
7767   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7768   ins_pipe(pipe_cmov_reg); // XXX
7769 %}
7770 
7771 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7772   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7773   ins_cost(200);
7774   expand %{
7775     cmovL_regU(cop, cr, dst, src);
7776   %}
7777 %}
7778 
7779 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7780 %{
7781   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7782 
7783   ins_cost(200); // XXX
7784   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7785   opcode(0x0F, 0x40);
7786   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7787   ins_pipe(pipe_cmov_mem); // XXX
7788 %}
7789 
7790 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7791   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7792   ins_cost(200);
7793   expand %{
7794     cmovL_memU(cop, cr, dst, src);
7795   %}
7796 %}
7797 
7798 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7799 %{
7800   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7801 
7802   ins_cost(200); // XXX
7803   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7804             "movss     $dst, $src\n"
7805     "skip:" %}
7806   ins_encode(enc_cmovf_branch(cop, dst, src));
7807   ins_pipe(pipe_slow);
7808 %}
7809 
7810 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7811 // %{
7812 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7813 
7814 //   ins_cost(200); // XXX
7815 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7816 //             "movss     $dst, $src\n"
7817 //     "skip:" %}
7818 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7819 //   ins_pipe(pipe_slow);
7820 // %}
7821 
7822 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7823 %{
7824   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7825 
7826   ins_cost(200); // XXX
7827   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7828             "movss     $dst, $src\n"
7829     "skip:" %}
7830   ins_encode(enc_cmovf_branch(cop, dst, src));
7831   ins_pipe(pipe_slow);
7832 %}
7833 
7834 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7835   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7836   ins_cost(200);
7837   expand %{
7838     cmovF_regU(cop, cr, dst, src);
7839   %}
7840 %}
7841 
7842 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7843 %{
7844   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7845 
7846   ins_cost(200); // XXX
7847   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7848             "movsd     $dst, $src\n"
7849     "skip:" %}
7850   ins_encode(enc_cmovd_branch(cop, dst, src));
7851   ins_pipe(pipe_slow);
7852 %}
7853 
7854 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7855 %{
7856   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7857 
7858   ins_cost(200); // XXX
7859   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7860             "movsd     $dst, $src\n"
7861     "skip:" %}
7862   ins_encode(enc_cmovd_branch(cop, dst, src));
7863   ins_pipe(pipe_slow);
7864 %}
7865 
7866 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7867   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7868   ins_cost(200);
7869   expand %{
7870     cmovD_regU(cop, cr, dst, src);
7871   %}
7872 %}
7873 
7874 //----------Arithmetic Instructions--------------------------------------------
7875 //----------Addition Instructions----------------------------------------------
7876 
7877 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7878 %{
7879   match(Set dst (AddI dst src));
7880   effect(KILL cr);
7881 
7882   format %{ "addl    $dst, $src\t# int" %}
7883   opcode(0x03);
7884   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7885   ins_pipe(ialu_reg_reg);
7886 %}
7887 
7888 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7889 %{
7890   match(Set dst (AddI dst src));
7891   effect(KILL cr);
7892 
7893   format %{ "addl    $dst, $src\t# int" %}
7894   opcode(0x81, 0x00); /* /0 id */
7895   ins_encode(OpcSErm(dst, src), Con8or32(src));
7896   ins_pipe( ialu_reg );
7897 %}
7898 
7899 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7900 %{
7901   match(Set dst (AddI dst (LoadI src)));
7902   effect(KILL cr);
7903 
7904   ins_cost(125); // XXX
7905   format %{ "addl    $dst, $src\t# int" %}
7906   opcode(0x03);
7907   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7908   ins_pipe(ialu_reg_mem);
7909 %}
7910 
7911 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7912 %{
7913   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7914   effect(KILL cr);
7915 
7916   ins_cost(150); // XXX
7917   format %{ "addl    $dst, $src\t# int" %}
7918   opcode(0x01); /* Opcode 01 /r */
7919   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7920   ins_pipe(ialu_mem_reg);
7921 %}
7922 
7923 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7924 %{
7925   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7926   effect(KILL cr);
7927 
7928   ins_cost(125); // XXX
7929   format %{ "addl    $dst, $src\t# int" %}
7930   opcode(0x81); /* Opcode 81 /0 id */
7931   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7932   ins_pipe(ialu_mem_imm);
7933 %}
7934 
7935 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7936 %{
7937   predicate(UseIncDec);
7938   match(Set dst (AddI dst src));
7939   effect(KILL cr);
7940 
7941   format %{ "incl    $dst\t# int" %}
7942   opcode(0xFF, 0x00); // FF /0
7943   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7944   ins_pipe(ialu_reg);
7945 %}
7946 
7947 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7948 %{
7949   predicate(UseIncDec);
7950   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7951   effect(KILL cr);
7952 
7953   ins_cost(125); // XXX
7954   format %{ "incl    $dst\t# int" %}
7955   opcode(0xFF); /* Opcode FF /0 */
7956   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7957   ins_pipe(ialu_mem_imm);
7958 %}
7959 
7960 // XXX why does that use AddI
7961 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7962 %{
7963   predicate(UseIncDec);
7964   match(Set dst (AddI dst src));
7965   effect(KILL cr);
7966 
7967   format %{ "decl    $dst\t# int" %}
7968   opcode(0xFF, 0x01); // FF /1
7969   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7970   ins_pipe(ialu_reg);
7971 %}
7972 
7973 // XXX why does that use AddI
7974 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7975 %{
7976   predicate(UseIncDec);
7977   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7978   effect(KILL cr);
7979 
7980   ins_cost(125); // XXX
7981   format %{ "decl    $dst\t# int" %}
7982   opcode(0xFF); /* Opcode FF /1 */
7983   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7984   ins_pipe(ialu_mem_imm);
7985 %}
7986 
7987 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7988 %{
7989   match(Set dst (AddI src0 src1));
7990 
7991   ins_cost(110);
7992   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7993   opcode(0x8D); /* 0x8D /r */
7994   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7995   ins_pipe(ialu_reg_reg);
7996 %}
7997 
7998 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7999 %{
8000   match(Set dst (AddL dst src));
8001   effect(KILL cr);
8002 
8003   format %{ "addq    $dst, $src\t# long" %}
8004   opcode(0x03);
8005   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8006   ins_pipe(ialu_reg_reg);
8007 %}
8008 
8009 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8010 %{
8011   match(Set dst (AddL dst src));
8012   effect(KILL cr);
8013 
8014   format %{ "addq    $dst, $src\t# long" %}
8015   opcode(0x81, 0x00); /* /0 id */
8016   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8017   ins_pipe( ialu_reg );
8018 %}
8019 
8020 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8021 %{
8022   match(Set dst (AddL dst (LoadL src)));
8023   effect(KILL cr);
8024 
8025   ins_cost(125); // XXX
8026   format %{ "addq    $dst, $src\t# long" %}
8027   opcode(0x03);
8028   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8029   ins_pipe(ialu_reg_mem);
8030 %}
8031 
8032 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8033 %{
8034   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8035   effect(KILL cr);
8036 
8037   ins_cost(150); // XXX
8038   format %{ "addq    $dst, $src\t# long" %}
8039   opcode(0x01); /* Opcode 01 /r */
8040   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8041   ins_pipe(ialu_mem_reg);
8042 %}
8043 
8044 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8045 %{
8046   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8047   effect(KILL cr);
8048 
8049   ins_cost(125); // XXX
8050   format %{ "addq    $dst, $src\t# long" %}
8051   opcode(0x81); /* Opcode 81 /0 id */
8052   ins_encode(REX_mem_wide(dst),
8053              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8054   ins_pipe(ialu_mem_imm);
8055 %}
8056 
8057 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8058 %{
8059   predicate(UseIncDec);
8060   match(Set dst (AddL dst src));
8061   effect(KILL cr);
8062 
8063   format %{ "incq    $dst\t# long" %}
8064   opcode(0xFF, 0x00); // FF /0
8065   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8066   ins_pipe(ialu_reg);
8067 %}
8068 
8069 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8070 %{
8071   predicate(UseIncDec);
8072   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8073   effect(KILL cr);
8074 
8075   ins_cost(125); // XXX
8076   format %{ "incq    $dst\t# long" %}
8077   opcode(0xFF); /* Opcode FF /0 */
8078   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8079   ins_pipe(ialu_mem_imm);
8080 %}
8081 
8082 // XXX why does that use AddL
8083 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8084 %{
8085   predicate(UseIncDec);
8086   match(Set dst (AddL dst src));
8087   effect(KILL cr);
8088 
8089   format %{ "decq    $dst\t# long" %}
8090   opcode(0xFF, 0x01); // FF /1
8091   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8092   ins_pipe(ialu_reg);
8093 %}
8094 
8095 // XXX why does that use AddL
8096 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8097 %{
8098   predicate(UseIncDec);
8099   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8100   effect(KILL cr);
8101 
8102   ins_cost(125); // XXX
8103   format %{ "decq    $dst\t# long" %}
8104   opcode(0xFF); /* Opcode FF /1 */
8105   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8106   ins_pipe(ialu_mem_imm);
8107 %}
8108 
8109 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8110 %{
8111   match(Set dst (AddL src0 src1));
8112 
8113   ins_cost(110);
8114   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8115   opcode(0x8D); /* 0x8D /r */
8116   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8117   ins_pipe(ialu_reg_reg);
8118 %}
8119 
8120 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8121 %{
8122   match(Set dst (AddP dst src));
8123   effect(KILL cr);
8124 
8125   format %{ "addq    $dst, $src\t# ptr" %}
8126   opcode(0x03);
8127   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8128   ins_pipe(ialu_reg_reg);
8129 %}
8130 
8131 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8132 %{
8133   match(Set dst (AddP dst src));
8134   effect(KILL cr);
8135 
8136   format %{ "addq    $dst, $src\t# ptr" %}
8137   opcode(0x81, 0x00); /* /0 id */
8138   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8139   ins_pipe( ialu_reg );
8140 %}
8141 
8142 // XXX addP mem ops ????
8143 
8144 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8145 %{
8146   match(Set dst (AddP src0 src1));
8147 
8148   ins_cost(110);
8149   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8150   opcode(0x8D); /* 0x8D /r */
8151   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8152   ins_pipe(ialu_reg_reg);
8153 %}
8154 
8155 instruct checkCastPP(rRegP dst)
8156 %{
8157   match(Set dst (CheckCastPP dst));
8158 
8159   size(0);
8160   format %{ "# checkcastPP of $dst" %}
8161   ins_encode(/* empty encoding */);
8162   ins_pipe(empty);
8163 %}
8164 
8165 instruct castPP(rRegP dst)
8166 %{
8167   match(Set dst (CastPP dst));
8168 
8169   size(0);
8170   format %{ "# castPP of $dst" %}
8171   ins_encode(/* empty encoding */);
8172   ins_pipe(empty);
8173 %}
8174 
8175 instruct castII(rRegI dst)
8176 %{
8177   match(Set dst (CastII dst));
8178 
8179   size(0);
8180   format %{ "# castII of $dst" %}
8181   ins_encode(/* empty encoding */);
8182   ins_cost(0);
8183   ins_pipe(empty);
8184 %}
8185 
8186 // LoadP-locked same as a regular LoadP when used with compare-swap
8187 instruct loadPLocked(rRegP dst, memory mem)
8188 %{
8189   match(Set dst (LoadPLocked mem));
8190 
8191   ins_cost(125); // XXX
8192   format %{ "movq    $dst, $mem\t# ptr locked" %}
8193   opcode(0x8B);
8194   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8195   ins_pipe(ialu_reg_mem); // XXX
8196 %}
8197 
8198 // LoadL-locked - same as a regular LoadL when used with compare-swap
8199 instruct loadLLocked(rRegL dst, memory mem)
8200 %{
8201   match(Set dst (LoadLLocked mem));
8202 
8203   ins_cost(125); // XXX
8204   format %{ "movq    $dst, $mem\t# long locked" %}
8205   opcode(0x8B);
8206   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8207   ins_pipe(ialu_reg_mem); // XXX
8208 %}
8209 
8210 // Conditional-store of the updated heap-top.
8211 // Used during allocation of the shared heap.
8212 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8213 
8214 instruct storePConditional(memory heap_top_ptr,
8215                            rax_RegP oldval, rRegP newval,
8216                            rFlagsReg cr)
8217 %{
8218   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8219 
8220   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8221             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8222   opcode(0x0F, 0xB1);
8223   ins_encode(lock_prefix,
8224              REX_reg_mem_wide(newval, heap_top_ptr),
8225              OpcP, OpcS,
8226              reg_mem(newval, heap_top_ptr));
8227   ins_pipe(pipe_cmpxchg);
8228 %}
8229 
8230 // Conditional-store of an int value.
8231 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8232 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8233 %{
8234   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8235   effect(KILL oldval);
8236 
8237   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8238   opcode(0x0F, 0xB1);
8239   ins_encode(lock_prefix,
8240              REX_reg_mem(newval, mem),
8241              OpcP, OpcS,
8242              reg_mem(newval, mem));
8243   ins_pipe(pipe_cmpxchg);
8244 %}
8245 
8246 // Conditional-store of a long value.
8247 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8248 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8249 %{
8250   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8251   effect(KILL oldval);
8252 
8253   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8254   opcode(0x0F, 0xB1);
8255   ins_encode(lock_prefix,
8256              REX_reg_mem_wide(newval, mem),
8257              OpcP, OpcS,
8258              reg_mem(newval, mem));
8259   ins_pipe(pipe_cmpxchg);
8260 %}
8261 
8262 
8263 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8264 instruct compareAndSwapP(rRegI res,
8265                          memory mem_ptr,
8266                          rax_RegP oldval, rRegP newval,
8267                          rFlagsReg cr)
8268 %{
8269   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8270   effect(KILL cr, KILL oldval);
8271 
8272   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8273             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8274             "sete    $res\n\t"
8275             "movzbl  $res, $res" %}
8276   opcode(0x0F, 0xB1);
8277   ins_encode(lock_prefix,
8278              REX_reg_mem_wide(newval, mem_ptr),
8279              OpcP, OpcS,
8280              reg_mem(newval, mem_ptr),
8281              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8282              REX_reg_breg(res, res), // movzbl
8283              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8284   ins_pipe( pipe_cmpxchg );
8285 %}
8286 
8287 instruct compareAndSwapL(rRegI res,
8288                          memory mem_ptr,
8289                          rax_RegL oldval, rRegL newval,
8290                          rFlagsReg cr)
8291 %{
8292   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8293   effect(KILL cr, KILL oldval);
8294 
8295   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8296             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8297             "sete    $res\n\t"
8298             "movzbl  $res, $res" %}
8299   opcode(0x0F, 0xB1);
8300   ins_encode(lock_prefix,
8301              REX_reg_mem_wide(newval, mem_ptr),
8302              OpcP, OpcS,
8303              reg_mem(newval, mem_ptr),
8304              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8305              REX_reg_breg(res, res), // movzbl
8306              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8307   ins_pipe( pipe_cmpxchg );
8308 %}
8309 
8310 instruct compareAndSwapI(rRegI res,
8311                          memory mem_ptr,
8312                          rax_RegI oldval, rRegI newval,
8313                          rFlagsReg cr)
8314 %{
8315   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8316   effect(KILL cr, KILL oldval);
8317 
8318   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8319             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8320             "sete    $res\n\t"
8321             "movzbl  $res, $res" %}
8322   opcode(0x0F, 0xB1);
8323   ins_encode(lock_prefix,
8324              REX_reg_mem(newval, mem_ptr),
8325              OpcP, OpcS,
8326              reg_mem(newval, mem_ptr),
8327              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8328              REX_reg_breg(res, res), // movzbl
8329              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8330   ins_pipe( pipe_cmpxchg );
8331 %}
8332 
8333 
8334 instruct compareAndSwapN(rRegI res,
8335                           memory mem_ptr,
8336                           rax_RegN oldval, rRegN newval,
8337                           rFlagsReg cr) %{
8338   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8339   effect(KILL cr, KILL oldval);
8340 
8341   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8342             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8343             "sete    $res\n\t"
8344             "movzbl  $res, $res" %}
8345   opcode(0x0F, 0xB1);
8346   ins_encode(lock_prefix,
8347              REX_reg_mem(newval, mem_ptr),
8348              OpcP, OpcS,
8349              reg_mem(newval, mem_ptr),
8350              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8351              REX_reg_breg(res, res), // movzbl
8352              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8353   ins_pipe( pipe_cmpxchg );
8354 %}
8355 
8356 //----------Subtraction Instructions-------------------------------------------
8357 
8358 // Integer Subtraction Instructions
8359 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8360 %{
8361   match(Set dst (SubI dst src));
8362   effect(KILL cr);
8363 
8364   format %{ "subl    $dst, $src\t# int" %}
8365   opcode(0x2B);
8366   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8367   ins_pipe(ialu_reg_reg);
8368 %}
8369 
8370 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8371 %{
8372   match(Set dst (SubI dst src));
8373   effect(KILL cr);
8374 
8375   format %{ "subl    $dst, $src\t# int" %}
8376   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8377   ins_encode(OpcSErm(dst, src), Con8or32(src));
8378   ins_pipe(ialu_reg);
8379 %}
8380 
8381 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8382 %{
8383   match(Set dst (SubI dst (LoadI src)));
8384   effect(KILL cr);
8385 
8386   ins_cost(125);
8387   format %{ "subl    $dst, $src\t# int" %}
8388   opcode(0x2B);
8389   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8390   ins_pipe(ialu_reg_mem);
8391 %}
8392 
8393 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8394 %{
8395   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8396   effect(KILL cr);
8397 
8398   ins_cost(150);
8399   format %{ "subl    $dst, $src\t# int" %}
8400   opcode(0x29); /* Opcode 29 /r */
8401   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8402   ins_pipe(ialu_mem_reg);
8403 %}
8404 
8405 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8406 %{
8407   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8408   effect(KILL cr);
8409 
8410   ins_cost(125); // XXX
8411   format %{ "subl    $dst, $src\t# int" %}
8412   opcode(0x81); /* Opcode 81 /5 id */
8413   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8414   ins_pipe(ialu_mem_imm);
8415 %}
8416 
8417 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8418 %{
8419   match(Set dst (SubL dst src));
8420   effect(KILL cr);
8421 
8422   format %{ "subq    $dst, $src\t# long" %}
8423   opcode(0x2B);
8424   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8425   ins_pipe(ialu_reg_reg);
8426 %}
8427 
8428 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8429 %{
8430   match(Set dst (SubL dst src));
8431   effect(KILL cr);
8432 
8433   format %{ "subq    $dst, $src\t# long" %}
8434   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8435   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8436   ins_pipe(ialu_reg);
8437 %}
8438 
8439 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8440 %{
8441   match(Set dst (SubL dst (LoadL src)));
8442   effect(KILL cr);
8443 
8444   ins_cost(125);
8445   format %{ "subq    $dst, $src\t# long" %}
8446   opcode(0x2B);
8447   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8448   ins_pipe(ialu_reg_mem);
8449 %}
8450 
8451 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8452 %{
8453   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8454   effect(KILL cr);
8455 
8456   ins_cost(150);
8457   format %{ "subq    $dst, $src\t# long" %}
8458   opcode(0x29); /* Opcode 29 /r */
8459   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8460   ins_pipe(ialu_mem_reg);
8461 %}
8462 
8463 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8464 %{
8465   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8466   effect(KILL cr);
8467 
8468   ins_cost(125); // XXX
8469   format %{ "subq    $dst, $src\t# long" %}
8470   opcode(0x81); /* Opcode 81 /5 id */
8471   ins_encode(REX_mem_wide(dst),
8472              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8473   ins_pipe(ialu_mem_imm);
8474 %}
8475 
8476 // Subtract from a pointer
8477 // XXX hmpf???
8478 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8479 %{
8480   match(Set dst (AddP dst (SubI zero src)));
8481   effect(KILL cr);
8482 
8483   format %{ "subq    $dst, $src\t# ptr - int" %}
8484   opcode(0x2B);
8485   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8486   ins_pipe(ialu_reg_reg);
8487 %}
8488 
8489 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8490 %{
8491   match(Set dst (SubI zero dst));
8492   effect(KILL cr);
8493 
8494   format %{ "negl    $dst\t# int" %}
8495   opcode(0xF7, 0x03);  // Opcode F7 /3
8496   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8497   ins_pipe(ialu_reg);
8498 %}
8499 
8500 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8501 %{
8502   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8503   effect(KILL cr);
8504 
8505   format %{ "negl    $dst\t# int" %}
8506   opcode(0xF7, 0x03);  // Opcode F7 /3
8507   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8508   ins_pipe(ialu_reg);
8509 %}
8510 
8511 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8512 %{
8513   match(Set dst (SubL zero dst));
8514   effect(KILL cr);
8515 
8516   format %{ "negq    $dst\t# long" %}
8517   opcode(0xF7, 0x03);  // Opcode F7 /3
8518   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8519   ins_pipe(ialu_reg);
8520 %}
8521 
8522 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8523 %{
8524   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8525   effect(KILL cr);
8526 
8527   format %{ "negq    $dst\t# long" %}
8528   opcode(0xF7, 0x03);  // Opcode F7 /3
8529   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8530   ins_pipe(ialu_reg);
8531 %}
8532 
8533 
8534 //----------Multiplication/Division Instructions-------------------------------
8535 // Integer Multiplication Instructions
8536 // Multiply Register
8537 
8538 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8539 %{
8540   match(Set dst (MulI dst src));
8541   effect(KILL cr);
8542 
8543   ins_cost(300);
8544   format %{ "imull   $dst, $src\t# int" %}
8545   opcode(0x0F, 0xAF);
8546   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8547   ins_pipe(ialu_reg_reg_alu0);
8548 %}
8549 
8550 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8551 %{
8552   match(Set dst (MulI src imm));
8553   effect(KILL cr);
8554 
8555   ins_cost(300);
8556   format %{ "imull   $dst, $src, $imm\t# int" %}
8557   opcode(0x69); /* 69 /r id */
8558   ins_encode(REX_reg_reg(dst, src),
8559              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8560   ins_pipe(ialu_reg_reg_alu0);
8561 %}
8562 
8563 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8564 %{
8565   match(Set dst (MulI dst (LoadI src)));
8566   effect(KILL cr);
8567 
8568   ins_cost(350);
8569   format %{ "imull   $dst, $src\t# int" %}
8570   opcode(0x0F, 0xAF);
8571   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8572   ins_pipe(ialu_reg_mem_alu0);
8573 %}
8574 
8575 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8576 %{
8577   match(Set dst (MulI (LoadI src) imm));
8578   effect(KILL cr);
8579 
8580   ins_cost(300);
8581   format %{ "imull   $dst, $src, $imm\t# int" %}
8582   opcode(0x69); /* 69 /r id */
8583   ins_encode(REX_reg_mem(dst, src),
8584              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8585   ins_pipe(ialu_reg_mem_alu0);
8586 %}
8587 
8588 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8589 %{
8590   match(Set dst (MulL dst src));
8591   effect(KILL cr);
8592 
8593   ins_cost(300);
8594   format %{ "imulq   $dst, $src\t# long" %}
8595   opcode(0x0F, 0xAF);
8596   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8597   ins_pipe(ialu_reg_reg_alu0);
8598 %}
8599 
8600 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8601 %{
8602   match(Set dst (MulL src imm));
8603   effect(KILL cr);
8604 
8605   ins_cost(300);
8606   format %{ "imulq   $dst, $src, $imm\t# long" %}
8607   opcode(0x69); /* 69 /r id */
8608   ins_encode(REX_reg_reg_wide(dst, src),
8609              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8610   ins_pipe(ialu_reg_reg_alu0);
8611 %}
8612 
8613 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8614 %{
8615   match(Set dst (MulL dst (LoadL src)));
8616   effect(KILL cr);
8617 
8618   ins_cost(350);
8619   format %{ "imulq   $dst, $src\t# long" %}
8620   opcode(0x0F, 0xAF);
8621   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8622   ins_pipe(ialu_reg_mem_alu0);
8623 %}
8624 
8625 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8626 %{
8627   match(Set dst (MulL (LoadL src) imm));
8628   effect(KILL cr);
8629 
8630   ins_cost(300);
8631   format %{ "imulq   $dst, $src, $imm\t# long" %}
8632   opcode(0x69); /* 69 /r id */
8633   ins_encode(REX_reg_mem_wide(dst, src),
8634              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8635   ins_pipe(ialu_reg_mem_alu0);
8636 %}
8637 
8638 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8639 %{
8640   match(Set dst (MulHiL src rax));
8641   effect(USE_KILL rax, KILL cr);
8642 
8643   ins_cost(300);
8644   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8645   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8646   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8647   ins_pipe(ialu_reg_reg_alu0);
8648 %}
8649 
8650 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8651                    rFlagsReg cr)
8652 %{
8653   match(Set rax (DivI rax div));
8654   effect(KILL rdx, KILL cr);
8655 
8656   ins_cost(30*100+10*100); // XXX
8657   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8658             "jne,s   normal\n\t"
8659             "xorl    rdx, rdx\n\t"
8660             "cmpl    $div, -1\n\t"
8661             "je,s    done\n"
8662     "normal: cdql\n\t"
8663             "idivl   $div\n"
8664     "done:"        %}
8665   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8666   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8667   ins_pipe(ialu_reg_reg_alu0);
8668 %}
8669 
8670 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8671                    rFlagsReg cr)
8672 %{
8673   match(Set rax (DivL rax div));
8674   effect(KILL rdx, KILL cr);
8675 
8676   ins_cost(30*100+10*100); // XXX
8677   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8678             "cmpq    rax, rdx\n\t"
8679             "jne,s   normal\n\t"
8680             "xorl    rdx, rdx\n\t"
8681             "cmpq    $div, -1\n\t"
8682             "je,s    done\n"
8683     "normal: cdqq\n\t"
8684             "idivq   $div\n"
8685     "done:"        %}
8686   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8687   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8688   ins_pipe(ialu_reg_reg_alu0);
8689 %}
8690 
8691 // Integer DIVMOD with Register, both quotient and mod results
8692 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8693                              rFlagsReg cr)
8694 %{
8695   match(DivModI rax div);
8696   effect(KILL cr);
8697 
8698   ins_cost(30*100+10*100); // XXX
8699   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8700             "jne,s   normal\n\t"
8701             "xorl    rdx, rdx\n\t"
8702             "cmpl    $div, -1\n\t"
8703             "je,s    done\n"
8704     "normal: cdql\n\t"
8705             "idivl   $div\n"
8706     "done:"        %}
8707   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8708   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8709   ins_pipe(pipe_slow);
8710 %}
8711 
8712 // Long DIVMOD with Register, both quotient and mod results
8713 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8714                              rFlagsReg cr)
8715 %{
8716   match(DivModL rax div);
8717   effect(KILL cr);
8718 
8719   ins_cost(30*100+10*100); // XXX
8720   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8721             "cmpq    rax, rdx\n\t"
8722             "jne,s   normal\n\t"
8723             "xorl    rdx, rdx\n\t"
8724             "cmpq    $div, -1\n\t"
8725             "je,s    done\n"
8726     "normal: cdqq\n\t"
8727             "idivq   $div\n"
8728     "done:"        %}
8729   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8730   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8731   ins_pipe(pipe_slow);
8732 %}
8733 
8734 //----------- DivL-By-Constant-Expansions--------------------------------------
8735 // DivI cases are handled by the compiler
8736 
8737 // Magic constant, reciprocal of 10
8738 instruct loadConL_0x6666666666666667(rRegL dst)
8739 %{
8740   effect(DEF dst);
8741 
8742   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8743   ins_encode(load_immL(dst, 0x6666666666666667));
8744   ins_pipe(ialu_reg);
8745 %}
8746 
8747 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8748 %{
8749   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8750 
8751   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8752   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8753   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8754   ins_pipe(ialu_reg_reg_alu0);
8755 %}
8756 
8757 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8758 %{
8759   effect(USE_DEF dst, KILL cr);
8760 
8761   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8762   opcode(0xC1, 0x7); /* C1 /7 ib */
8763   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8764   ins_pipe(ialu_reg);
8765 %}
8766 
8767 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8768 %{
8769   effect(USE_DEF dst, KILL cr);
8770 
8771   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8772   opcode(0xC1, 0x7); /* C1 /7 ib */
8773   ins_encode(reg_opc_imm_wide(dst, 0x2));
8774   ins_pipe(ialu_reg);
8775 %}
8776 
8777 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8778 %{
8779   match(Set dst (DivL src div));
8780 
8781   ins_cost((5+8)*100);
8782   expand %{
8783     rax_RegL rax;                     // Killed temp
8784     rFlagsReg cr;                     // Killed
8785     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8786     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8787     sarL_rReg_63(src, cr);            // sarq  src, 63
8788     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8789     subL_rReg(dst, src, cr);          // subl  rdx, src
8790   %}
8791 %}
8792 
8793 //-----------------------------------------------------------------------------
8794 
8795 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8796                    rFlagsReg cr)
8797 %{
8798   match(Set rdx (ModI rax div));
8799   effect(KILL rax, KILL cr);
8800 
8801   ins_cost(300); // XXX
8802   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8803             "jne,s   normal\n\t"
8804             "xorl    rdx, rdx\n\t"
8805             "cmpl    $div, -1\n\t"
8806             "je,s    done\n"
8807     "normal: cdql\n\t"
8808             "idivl   $div\n"
8809     "done:"        %}
8810   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8811   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8812   ins_pipe(ialu_reg_reg_alu0);
8813 %}
8814 
8815 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8816                    rFlagsReg cr)
8817 %{
8818   match(Set rdx (ModL rax div));
8819   effect(KILL rax, KILL cr);
8820 
8821   ins_cost(300); // XXX
8822   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8823             "cmpq    rax, rdx\n\t"
8824             "jne,s   normal\n\t"
8825             "xorl    rdx, rdx\n\t"
8826             "cmpq    $div, -1\n\t"
8827             "je,s    done\n"
8828     "normal: cdqq\n\t"
8829             "idivq   $div\n"
8830     "done:"        %}
8831   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8832   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8833   ins_pipe(ialu_reg_reg_alu0);
8834 %}
8835 
8836 // Integer Shift Instructions
8837 // Shift Left by one
8838 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8839 %{
8840   match(Set dst (LShiftI dst shift));
8841   effect(KILL cr);
8842 
8843   format %{ "sall    $dst, $shift" %}
8844   opcode(0xD1, 0x4); /* D1 /4 */
8845   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8846   ins_pipe(ialu_reg);
8847 %}
8848 
8849 // Shift Left by one
8850 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8851 %{
8852   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8853   effect(KILL cr);
8854 
8855   format %{ "sall    $dst, $shift\t" %}
8856   opcode(0xD1, 0x4); /* D1 /4 */
8857   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8858   ins_pipe(ialu_mem_imm);
8859 %}
8860 
8861 // Shift Left by 8-bit immediate
8862 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8863 %{
8864   match(Set dst (LShiftI dst shift));
8865   effect(KILL cr);
8866 
8867   format %{ "sall    $dst, $shift" %}
8868   opcode(0xC1, 0x4); /* C1 /4 ib */
8869   ins_encode(reg_opc_imm(dst, shift));
8870   ins_pipe(ialu_reg);
8871 %}
8872 
8873 // Shift Left by 8-bit immediate
8874 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8875 %{
8876   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8877   effect(KILL cr);
8878 
8879   format %{ "sall    $dst, $shift" %}
8880   opcode(0xC1, 0x4); /* C1 /4 ib */
8881   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8882   ins_pipe(ialu_mem_imm);
8883 %}
8884 
8885 // Shift Left by variable
8886 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8887 %{
8888   match(Set dst (LShiftI dst shift));
8889   effect(KILL cr);
8890 
8891   format %{ "sall    $dst, $shift" %}
8892   opcode(0xD3, 0x4); /* D3 /4 */
8893   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8894   ins_pipe(ialu_reg_reg);
8895 %}
8896 
8897 // Shift Left by variable
8898 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8899 %{
8900   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8901   effect(KILL cr);
8902 
8903   format %{ "sall    $dst, $shift" %}
8904   opcode(0xD3, 0x4); /* D3 /4 */
8905   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8906   ins_pipe(ialu_mem_reg);
8907 %}
8908 
8909 // Arithmetic shift right by one
8910 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8911 %{
8912   match(Set dst (RShiftI dst shift));
8913   effect(KILL cr);
8914 
8915   format %{ "sarl    $dst, $shift" %}
8916   opcode(0xD1, 0x7); /* D1 /7 */
8917   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8918   ins_pipe(ialu_reg);
8919 %}
8920 
8921 // Arithmetic shift right by one
8922 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8923 %{
8924   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8925   effect(KILL cr);
8926 
8927   format %{ "sarl    $dst, $shift" %}
8928   opcode(0xD1, 0x7); /* D1 /7 */
8929   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8930   ins_pipe(ialu_mem_imm);
8931 %}
8932 
8933 // Arithmetic Shift Right by 8-bit immediate
8934 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8935 %{
8936   match(Set dst (RShiftI dst shift));
8937   effect(KILL cr);
8938 
8939   format %{ "sarl    $dst, $shift" %}
8940   opcode(0xC1, 0x7); /* C1 /7 ib */
8941   ins_encode(reg_opc_imm(dst, shift));
8942   ins_pipe(ialu_mem_imm);
8943 %}
8944 
8945 // Arithmetic Shift Right by 8-bit immediate
8946 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8947 %{
8948   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8949   effect(KILL cr);
8950 
8951   format %{ "sarl    $dst, $shift" %}
8952   opcode(0xC1, 0x7); /* C1 /7 ib */
8953   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8954   ins_pipe(ialu_mem_imm);
8955 %}
8956 
8957 // Arithmetic Shift Right by variable
8958 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8959 %{
8960   match(Set dst (RShiftI dst shift));
8961   effect(KILL cr);
8962 
8963   format %{ "sarl    $dst, $shift" %}
8964   opcode(0xD3, 0x7); /* D3 /7 */
8965   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8966   ins_pipe(ialu_reg_reg);
8967 %}
8968 
8969 // Arithmetic Shift Right by variable
8970 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8971 %{
8972   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8973   effect(KILL cr);
8974 
8975   format %{ "sarl    $dst, $shift" %}
8976   opcode(0xD3, 0x7); /* D3 /7 */
8977   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8978   ins_pipe(ialu_mem_reg);
8979 %}
8980 
8981 // Logical shift right by one
8982 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8983 %{
8984   match(Set dst (URShiftI dst shift));
8985   effect(KILL cr);
8986 
8987   format %{ "shrl    $dst, $shift" %}
8988   opcode(0xD1, 0x5); /* D1 /5 */
8989   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8990   ins_pipe(ialu_reg);
8991 %}
8992 
8993 // Logical shift right by one
8994 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8995 %{
8996   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8997   effect(KILL cr);
8998 
8999   format %{ "shrl    $dst, $shift" %}
9000   opcode(0xD1, 0x5); /* D1 /5 */
9001   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9002   ins_pipe(ialu_mem_imm);
9003 %}
9004 
9005 // Logical Shift Right by 8-bit immediate
9006 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9007 %{
9008   match(Set dst (URShiftI dst shift));
9009   effect(KILL cr);
9010 
9011   format %{ "shrl    $dst, $shift" %}
9012   opcode(0xC1, 0x5); /* C1 /5 ib */
9013   ins_encode(reg_opc_imm(dst, shift));
9014   ins_pipe(ialu_reg);
9015 %}
9016 
9017 // Logical Shift Right by 8-bit immediate
9018 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9019 %{
9020   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9021   effect(KILL cr);
9022 
9023   format %{ "shrl    $dst, $shift" %}
9024   opcode(0xC1, 0x5); /* C1 /5 ib */
9025   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9026   ins_pipe(ialu_mem_imm);
9027 %}
9028 
9029 // Logical Shift Right by variable
9030 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9031 %{
9032   match(Set dst (URShiftI dst shift));
9033   effect(KILL cr);
9034 
9035   format %{ "shrl    $dst, $shift" %}
9036   opcode(0xD3, 0x5); /* D3 /5 */
9037   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9038   ins_pipe(ialu_reg_reg);
9039 %}
9040 
9041 // Logical Shift Right by variable
9042 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9043 %{
9044   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9045   effect(KILL cr);
9046 
9047   format %{ "shrl    $dst, $shift" %}
9048   opcode(0xD3, 0x5); /* D3 /5 */
9049   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9050   ins_pipe(ialu_mem_reg);
9051 %}
9052 
9053 // Long Shift Instructions
9054 // Shift Left by one
9055 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9056 %{
9057   match(Set dst (LShiftL dst shift));
9058   effect(KILL cr);
9059 
9060   format %{ "salq    $dst, $shift" %}
9061   opcode(0xD1, 0x4); /* D1 /4 */
9062   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9063   ins_pipe(ialu_reg);
9064 %}
9065 
9066 // Shift Left by one
9067 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9068 %{
9069   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9070   effect(KILL cr);
9071 
9072   format %{ "salq    $dst, $shift" %}
9073   opcode(0xD1, 0x4); /* D1 /4 */
9074   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9075   ins_pipe(ialu_mem_imm);
9076 %}
9077 
9078 // Shift Left by 8-bit immediate
9079 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9080 %{
9081   match(Set dst (LShiftL dst shift));
9082   effect(KILL cr);
9083 
9084   format %{ "salq    $dst, $shift" %}
9085   opcode(0xC1, 0x4); /* C1 /4 ib */
9086   ins_encode(reg_opc_imm_wide(dst, shift));
9087   ins_pipe(ialu_reg);
9088 %}
9089 
9090 // Shift Left by 8-bit immediate
9091 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9092 %{
9093   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9094   effect(KILL cr);
9095 
9096   format %{ "salq    $dst, $shift" %}
9097   opcode(0xC1, 0x4); /* C1 /4 ib */
9098   ins_encode(REX_mem_wide(dst), OpcP,
9099              RM_opc_mem(secondary, dst), Con8or32(shift));
9100   ins_pipe(ialu_mem_imm);
9101 %}
9102 
9103 // Shift Left by variable
9104 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9105 %{
9106   match(Set dst (LShiftL dst shift));
9107   effect(KILL cr);
9108 
9109   format %{ "salq    $dst, $shift" %}
9110   opcode(0xD3, 0x4); /* D3 /4 */
9111   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9112   ins_pipe(ialu_reg_reg);
9113 %}
9114 
9115 // Shift Left by variable
9116 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9117 %{
9118   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9119   effect(KILL cr);
9120 
9121   format %{ "salq    $dst, $shift" %}
9122   opcode(0xD3, 0x4); /* D3 /4 */
9123   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9124   ins_pipe(ialu_mem_reg);
9125 %}
9126 
9127 // Arithmetic shift right by one
9128 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9129 %{
9130   match(Set dst (RShiftL dst shift));
9131   effect(KILL cr);
9132 
9133   format %{ "sarq    $dst, $shift" %}
9134   opcode(0xD1, 0x7); /* D1 /7 */
9135   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9136   ins_pipe(ialu_reg);
9137 %}
9138 
9139 // Arithmetic shift right by one
9140 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9141 %{
9142   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9143   effect(KILL cr);
9144 
9145   format %{ "sarq    $dst, $shift" %}
9146   opcode(0xD1, 0x7); /* D1 /7 */
9147   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9148   ins_pipe(ialu_mem_imm);
9149 %}
9150 
9151 // Arithmetic Shift Right by 8-bit immediate
9152 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9153 %{
9154   match(Set dst (RShiftL dst shift));
9155   effect(KILL cr);
9156 
9157   format %{ "sarq    $dst, $shift" %}
9158   opcode(0xC1, 0x7); /* C1 /7 ib */
9159   ins_encode(reg_opc_imm_wide(dst, shift));
9160   ins_pipe(ialu_mem_imm);
9161 %}
9162 
9163 // Arithmetic Shift Right by 8-bit immediate
9164 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9165 %{
9166   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9167   effect(KILL cr);
9168 
9169   format %{ "sarq    $dst, $shift" %}
9170   opcode(0xC1, 0x7); /* C1 /7 ib */
9171   ins_encode(REX_mem_wide(dst), OpcP,
9172              RM_opc_mem(secondary, dst), Con8or32(shift));
9173   ins_pipe(ialu_mem_imm);
9174 %}
9175 
9176 // Arithmetic Shift Right by variable
9177 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9178 %{
9179   match(Set dst (RShiftL dst shift));
9180   effect(KILL cr);
9181 
9182   format %{ "sarq    $dst, $shift" %}
9183   opcode(0xD3, 0x7); /* D3 /7 */
9184   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9185   ins_pipe(ialu_reg_reg);
9186 %}
9187 
9188 // Arithmetic Shift Right by variable
9189 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9190 %{
9191   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9192   effect(KILL cr);
9193 
9194   format %{ "sarq    $dst, $shift" %}
9195   opcode(0xD3, 0x7); /* D3 /7 */
9196   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9197   ins_pipe(ialu_mem_reg);
9198 %}
9199 
9200 // Logical shift right by one
9201 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9202 %{
9203   match(Set dst (URShiftL dst shift));
9204   effect(KILL cr);
9205 
9206   format %{ "shrq    $dst, $shift" %}
9207   opcode(0xD1, 0x5); /* D1 /5 */
9208   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9209   ins_pipe(ialu_reg);
9210 %}
9211 
9212 // Logical shift right by one
9213 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9214 %{
9215   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9216   effect(KILL cr);
9217 
9218   format %{ "shrq    $dst, $shift" %}
9219   opcode(0xD1, 0x5); /* D1 /5 */
9220   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9221   ins_pipe(ialu_mem_imm);
9222 %}
9223 
9224 // Logical Shift Right by 8-bit immediate
9225 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9226 %{
9227   match(Set dst (URShiftL dst shift));
9228   effect(KILL cr);
9229 
9230   format %{ "shrq    $dst, $shift" %}
9231   opcode(0xC1, 0x5); /* C1 /5 ib */
9232   ins_encode(reg_opc_imm_wide(dst, shift));
9233   ins_pipe(ialu_reg);
9234 %}
9235 
9236 
9237 // Logical Shift Right by 8-bit immediate
9238 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9239 %{
9240   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9241   effect(KILL cr);
9242 
9243   format %{ "shrq    $dst, $shift" %}
9244   opcode(0xC1, 0x5); /* C1 /5 ib */
9245   ins_encode(REX_mem_wide(dst), OpcP,
9246              RM_opc_mem(secondary, dst), Con8or32(shift));
9247   ins_pipe(ialu_mem_imm);
9248 %}
9249 
9250 // Logical Shift Right by variable
9251 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9252 %{
9253   match(Set dst (URShiftL dst shift));
9254   effect(KILL cr);
9255 
9256   format %{ "shrq    $dst, $shift" %}
9257   opcode(0xD3, 0x5); /* D3 /5 */
9258   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9259   ins_pipe(ialu_reg_reg);
9260 %}
9261 
9262 // Logical Shift Right by variable
9263 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9264 %{
9265   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9266   effect(KILL cr);
9267 
9268   format %{ "shrq    $dst, $shift" %}
9269   opcode(0xD3, 0x5); /* D3 /5 */
9270   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9271   ins_pipe(ialu_mem_reg);
9272 %}
9273 
9274 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9275 // This idiom is used by the compiler for the i2b bytecode.
9276 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9277 %{
9278   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9279 
9280   format %{ "movsbl  $dst, $src\t# i2b" %}
9281   opcode(0x0F, 0xBE);
9282   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9283   ins_pipe(ialu_reg_reg);
9284 %}
9285 
9286 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9287 // This idiom is used by the compiler the i2s bytecode.
9288 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9289 %{
9290   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9291 
9292   format %{ "movswl  $dst, $src\t# i2s" %}
9293   opcode(0x0F, 0xBF);
9294   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9295   ins_pipe(ialu_reg_reg);
9296 %}
9297 
9298 // ROL/ROR instructions
9299 
9300 // ROL expand
9301 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9302   effect(KILL cr, USE_DEF dst);
9303 
9304   format %{ "roll    $dst" %}
9305   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9306   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9307   ins_pipe(ialu_reg);
9308 %}
9309 
9310 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9311   effect(USE_DEF dst, USE shift, KILL cr);
9312 
9313   format %{ "roll    $dst, $shift" %}
9314   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9315   ins_encode( reg_opc_imm(dst, shift) );
9316   ins_pipe(ialu_reg);
9317 %}
9318 
9319 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9320 %{
9321   effect(USE_DEF dst, USE shift, KILL cr);
9322 
9323   format %{ "roll    $dst, $shift" %}
9324   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9325   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9326   ins_pipe(ialu_reg_reg);
9327 %}
9328 // end of ROL expand
9329 
9330 // Rotate Left by one
9331 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9332 %{
9333   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9334 
9335   expand %{
9336     rolI_rReg_imm1(dst, cr);
9337   %}
9338 %}
9339 
9340 // Rotate Left by 8-bit immediate
9341 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9342 %{
9343   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9344   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9345 
9346   expand %{
9347     rolI_rReg_imm8(dst, lshift, cr);
9348   %}
9349 %}
9350 
9351 // Rotate Left by variable
9352 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9353 %{
9354   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9355 
9356   expand %{
9357     rolI_rReg_CL(dst, shift, cr);
9358   %}
9359 %}
9360 
9361 // Rotate Left by variable
9362 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9363 %{
9364   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9365 
9366   expand %{
9367     rolI_rReg_CL(dst, shift, cr);
9368   %}
9369 %}
9370 
9371 // ROR expand
9372 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9373 %{
9374   effect(USE_DEF dst, KILL cr);
9375 
9376   format %{ "rorl    $dst" %}
9377   opcode(0xD1, 0x1); /* D1 /1 */
9378   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9379   ins_pipe(ialu_reg);
9380 %}
9381 
9382 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9383 %{
9384   effect(USE_DEF dst, USE shift, KILL cr);
9385 
9386   format %{ "rorl    $dst, $shift" %}
9387   opcode(0xC1, 0x1); /* C1 /1 ib */
9388   ins_encode(reg_opc_imm(dst, shift));
9389   ins_pipe(ialu_reg);
9390 %}
9391 
9392 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9393 %{
9394   effect(USE_DEF dst, USE shift, KILL cr);
9395 
9396   format %{ "rorl    $dst, $shift" %}
9397   opcode(0xD3, 0x1); /* D3 /1 */
9398   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9399   ins_pipe(ialu_reg_reg);
9400 %}
9401 // end of ROR expand
9402 
9403 // Rotate Right by one
9404 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9405 %{
9406   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9407 
9408   expand %{
9409     rorI_rReg_imm1(dst, cr);
9410   %}
9411 %}
9412 
9413 // Rotate Right by 8-bit immediate
9414 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9415 %{
9416   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9417   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9418 
9419   expand %{
9420     rorI_rReg_imm8(dst, rshift, cr);
9421   %}
9422 %}
9423 
9424 // Rotate Right by variable
9425 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9426 %{
9427   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9428 
9429   expand %{
9430     rorI_rReg_CL(dst, shift, cr);
9431   %}
9432 %}
9433 
9434 // Rotate Right by variable
9435 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9436 %{
9437   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9438 
9439   expand %{
9440     rorI_rReg_CL(dst, shift, cr);
9441   %}
9442 %}
9443 
9444 // for long rotate
9445 // ROL expand
9446 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9447   effect(USE_DEF dst, KILL cr);
9448 
9449   format %{ "rolq    $dst" %}
9450   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9451   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9452   ins_pipe(ialu_reg);
9453 %}
9454 
9455 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9456   effect(USE_DEF dst, USE shift, KILL cr);
9457 
9458   format %{ "rolq    $dst, $shift" %}
9459   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9460   ins_encode( reg_opc_imm_wide(dst, shift) );
9461   ins_pipe(ialu_reg);
9462 %}
9463 
9464 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9465 %{
9466   effect(USE_DEF dst, USE shift, KILL cr);
9467 
9468   format %{ "rolq    $dst, $shift" %}
9469   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9470   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9471   ins_pipe(ialu_reg_reg);
9472 %}
9473 // end of ROL expand
9474 
9475 // Rotate Left by one
9476 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9477 %{
9478   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9479 
9480   expand %{
9481     rolL_rReg_imm1(dst, cr);
9482   %}
9483 %}
9484 
9485 // Rotate Left by 8-bit immediate
9486 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9487 %{
9488   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9489   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9490 
9491   expand %{
9492     rolL_rReg_imm8(dst, lshift, cr);
9493   %}
9494 %}
9495 
9496 // Rotate Left by variable
9497 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9498 %{
9499   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9500 
9501   expand %{
9502     rolL_rReg_CL(dst, shift, cr);
9503   %}
9504 %}
9505 
9506 // Rotate Left by variable
9507 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9508 %{
9509   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9510 
9511   expand %{
9512     rolL_rReg_CL(dst, shift, cr);
9513   %}
9514 %}
9515 
9516 // ROR expand
9517 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9518 %{
9519   effect(USE_DEF dst, KILL cr);
9520 
9521   format %{ "rorq    $dst" %}
9522   opcode(0xD1, 0x1); /* D1 /1 */
9523   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9524   ins_pipe(ialu_reg);
9525 %}
9526 
9527 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9528 %{
9529   effect(USE_DEF dst, USE shift, KILL cr);
9530 
9531   format %{ "rorq    $dst, $shift" %}
9532   opcode(0xC1, 0x1); /* C1 /1 ib */
9533   ins_encode(reg_opc_imm_wide(dst, shift));
9534   ins_pipe(ialu_reg);
9535 %}
9536 
9537 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9538 %{
9539   effect(USE_DEF dst, USE shift, KILL cr);
9540 
9541   format %{ "rorq    $dst, $shift" %}
9542   opcode(0xD3, 0x1); /* D3 /1 */
9543   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9544   ins_pipe(ialu_reg_reg);
9545 %}
9546 // end of ROR expand
9547 
9548 // Rotate Right by one
9549 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9550 %{
9551   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9552 
9553   expand %{
9554     rorL_rReg_imm1(dst, cr);
9555   %}
9556 %}
9557 
9558 // Rotate Right by 8-bit immediate
9559 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9560 %{
9561   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9562   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9563 
9564   expand %{
9565     rorL_rReg_imm8(dst, rshift, cr);
9566   %}
9567 %}
9568 
9569 // Rotate Right by variable
9570 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9571 %{
9572   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9573 
9574   expand %{
9575     rorL_rReg_CL(dst, shift, cr);
9576   %}
9577 %}
9578 
9579 // Rotate Right by variable
9580 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9581 %{
9582   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9583 
9584   expand %{
9585     rorL_rReg_CL(dst, shift, cr);
9586   %}
9587 %}
9588 
9589 // Logical Instructions
9590 
9591 // Integer Logical Instructions
9592 
9593 // And Instructions
9594 // And Register with Register
9595 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9596 %{
9597   match(Set dst (AndI dst src));
9598   effect(KILL cr);
9599 
9600   format %{ "andl    $dst, $src\t# int" %}
9601   opcode(0x23);
9602   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9603   ins_pipe(ialu_reg_reg);
9604 %}
9605 
9606 // And Register with Immediate 255
9607 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9608 %{
9609   match(Set dst (AndI dst src));
9610 
9611   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9612   opcode(0x0F, 0xB6);
9613   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9614   ins_pipe(ialu_reg);
9615 %}
9616 
9617 // And Register with Immediate 255 and promote to long
9618 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9619 %{
9620   match(Set dst (ConvI2L (AndI src mask)));
9621 
9622   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9623   opcode(0x0F, 0xB6);
9624   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9625   ins_pipe(ialu_reg);
9626 %}
9627 
9628 // And Register with Immediate 65535
9629 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9630 %{
9631   match(Set dst (AndI dst src));
9632 
9633   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9634   opcode(0x0F, 0xB7);
9635   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9636   ins_pipe(ialu_reg);
9637 %}
9638 
9639 // And Register with Immediate 65535 and promote to long
9640 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9641 %{
9642   match(Set dst (ConvI2L (AndI src mask)));
9643 
9644   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9645   opcode(0x0F, 0xB7);
9646   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9647   ins_pipe(ialu_reg);
9648 %}
9649 
9650 // And Register with Immediate
9651 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9652 %{
9653   match(Set dst (AndI dst src));
9654   effect(KILL cr);
9655 
9656   format %{ "andl    $dst, $src\t# int" %}
9657   opcode(0x81, 0x04); /* Opcode 81 /4 */
9658   ins_encode(OpcSErm(dst, src), Con8or32(src));
9659   ins_pipe(ialu_reg);
9660 %}
9661 
9662 // And Register with Memory
9663 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9664 %{
9665   match(Set dst (AndI dst (LoadI src)));
9666   effect(KILL cr);
9667 
9668   ins_cost(125);
9669   format %{ "andl    $dst, $src\t# int" %}
9670   opcode(0x23);
9671   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9672   ins_pipe(ialu_reg_mem);
9673 %}
9674 
9675 // And Memory with Register
9676 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9677 %{
9678   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9679   effect(KILL cr);
9680 
9681   ins_cost(150);
9682   format %{ "andl    $dst, $src\t# int" %}
9683   opcode(0x21); /* Opcode 21 /r */
9684   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9685   ins_pipe(ialu_mem_reg);
9686 %}
9687 
9688 // And Memory with Immediate
9689 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9690 %{
9691   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9692   effect(KILL cr);
9693 
9694   ins_cost(125);
9695   format %{ "andl    $dst, $src\t# int" %}
9696   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9697   ins_encode(REX_mem(dst), OpcSE(src),
9698              RM_opc_mem(secondary, dst), Con8or32(src));
9699   ins_pipe(ialu_mem_imm);
9700 %}
9701 
9702 // Or Instructions
9703 // Or Register with Register
9704 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9705 %{
9706   match(Set dst (OrI dst src));
9707   effect(KILL cr);
9708 
9709   format %{ "orl     $dst, $src\t# int" %}
9710   opcode(0x0B);
9711   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9712   ins_pipe(ialu_reg_reg);
9713 %}
9714 
9715 // Or Register with Immediate
9716 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9717 %{
9718   match(Set dst (OrI dst src));
9719   effect(KILL cr);
9720 
9721   format %{ "orl     $dst, $src\t# int" %}
9722   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9723   ins_encode(OpcSErm(dst, src), Con8or32(src));
9724   ins_pipe(ialu_reg);
9725 %}
9726 
9727 // Or Register with Memory
9728 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9729 %{
9730   match(Set dst (OrI dst (LoadI src)));
9731   effect(KILL cr);
9732 
9733   ins_cost(125);
9734   format %{ "orl     $dst, $src\t# int" %}
9735   opcode(0x0B);
9736   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9737   ins_pipe(ialu_reg_mem);
9738 %}
9739 
9740 // Or Memory with Register
9741 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9742 %{
9743   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9744   effect(KILL cr);
9745 
9746   ins_cost(150);
9747   format %{ "orl     $dst, $src\t# int" %}
9748   opcode(0x09); /* Opcode 09 /r */
9749   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9750   ins_pipe(ialu_mem_reg);
9751 %}
9752 
9753 // Or Memory with Immediate
9754 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9755 %{
9756   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9757   effect(KILL cr);
9758 
9759   ins_cost(125);
9760   format %{ "orl     $dst, $src\t# int" %}
9761   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9762   ins_encode(REX_mem(dst), OpcSE(src),
9763              RM_opc_mem(secondary, dst), Con8or32(src));
9764   ins_pipe(ialu_mem_imm);
9765 %}
9766 
9767 // Xor Instructions
9768 // Xor Register with Register
9769 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9770 %{
9771   match(Set dst (XorI dst src));
9772   effect(KILL cr);
9773 
9774   format %{ "xorl    $dst, $src\t# int" %}
9775   opcode(0x33);
9776   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9777   ins_pipe(ialu_reg_reg);
9778 %}
9779 
9780 // Xor Register with Immediate -1
9781 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9782   match(Set dst (XorI dst imm));
9783 
9784   format %{ "not    $dst" %}
9785   ins_encode %{
9786      __ notl($dst$$Register);
9787   %}
9788   ins_pipe(ialu_reg);
9789 %}
9790 
9791 // Xor Register with Immediate
9792 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9793 %{
9794   match(Set dst (XorI dst src));
9795   effect(KILL cr);
9796 
9797   format %{ "xorl    $dst, $src\t# int" %}
9798   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9799   ins_encode(OpcSErm(dst, src), Con8or32(src));
9800   ins_pipe(ialu_reg);
9801 %}
9802 
9803 // Xor Register with Memory
9804 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9805 %{
9806   match(Set dst (XorI dst (LoadI src)));
9807   effect(KILL cr);
9808 
9809   ins_cost(125);
9810   format %{ "xorl    $dst, $src\t# int" %}
9811   opcode(0x33);
9812   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9813   ins_pipe(ialu_reg_mem);
9814 %}
9815 
9816 // Xor Memory with Register
9817 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9818 %{
9819   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9820   effect(KILL cr);
9821 
9822   ins_cost(150);
9823   format %{ "xorl    $dst, $src\t# int" %}
9824   opcode(0x31); /* Opcode 31 /r */
9825   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9826   ins_pipe(ialu_mem_reg);
9827 %}
9828 
9829 // Xor Memory with Immediate
9830 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9831 %{
9832   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9833   effect(KILL cr);
9834 
9835   ins_cost(125);
9836   format %{ "xorl    $dst, $src\t# int" %}
9837   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9838   ins_encode(REX_mem(dst), OpcSE(src),
9839              RM_opc_mem(secondary, dst), Con8or32(src));
9840   ins_pipe(ialu_mem_imm);
9841 %}
9842 
9843 
9844 // Long Logical Instructions
9845 
9846 // And Instructions
9847 // And Register with Register
9848 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9849 %{
9850   match(Set dst (AndL dst src));
9851   effect(KILL cr);
9852 
9853   format %{ "andq    $dst, $src\t# long" %}
9854   opcode(0x23);
9855   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9856   ins_pipe(ialu_reg_reg);
9857 %}
9858 
9859 // And Register with Immediate 255
9860 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9861 %{
9862   match(Set dst (AndL dst src));
9863 
9864   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9865   opcode(0x0F, 0xB6);
9866   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9867   ins_pipe(ialu_reg);
9868 %}
9869 
9870 // And Register with Immediate 65535
9871 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9872 %{
9873   match(Set dst (AndL dst src));
9874 
9875   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9876   opcode(0x0F, 0xB7);
9877   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9878   ins_pipe(ialu_reg);
9879 %}
9880 
9881 // And Register with Immediate
9882 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9883 %{
9884   match(Set dst (AndL dst src));
9885   effect(KILL cr);
9886 
9887   format %{ "andq    $dst, $src\t# long" %}
9888   opcode(0x81, 0x04); /* Opcode 81 /4 */
9889   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9890   ins_pipe(ialu_reg);
9891 %}
9892 
9893 // And Register with Memory
9894 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9895 %{
9896   match(Set dst (AndL dst (LoadL src)));
9897   effect(KILL cr);
9898 
9899   ins_cost(125);
9900   format %{ "andq    $dst, $src\t# long" %}
9901   opcode(0x23);
9902   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9903   ins_pipe(ialu_reg_mem);
9904 %}
9905 
9906 // And Memory with Register
9907 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9908 %{
9909   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9910   effect(KILL cr);
9911 
9912   ins_cost(150);
9913   format %{ "andq    $dst, $src\t# long" %}
9914   opcode(0x21); /* Opcode 21 /r */
9915   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9916   ins_pipe(ialu_mem_reg);
9917 %}
9918 
9919 // And Memory with Immediate
9920 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9921 %{
9922   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9923   effect(KILL cr);
9924 
9925   ins_cost(125);
9926   format %{ "andq    $dst, $src\t# long" %}
9927   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9928   ins_encode(REX_mem_wide(dst), OpcSE(src),
9929              RM_opc_mem(secondary, dst), Con8or32(src));
9930   ins_pipe(ialu_mem_imm);
9931 %}
9932 
9933 // Or Instructions
9934 // Or Register with Register
9935 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9936 %{
9937   match(Set dst (OrL dst src));
9938   effect(KILL cr);
9939 
9940   format %{ "orq     $dst, $src\t# long" %}
9941   opcode(0x0B);
9942   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9943   ins_pipe(ialu_reg_reg);
9944 %}
9945 
9946 // Use any_RegP to match R15 (TLS register) without spilling.
9947 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9948   match(Set dst (OrL dst (CastP2X src)));
9949   effect(KILL cr);
9950 
9951   format %{ "orq     $dst, $src\t# long" %}
9952   opcode(0x0B);
9953   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9954   ins_pipe(ialu_reg_reg);
9955 %}
9956 
9957 
9958 // Or Register with Immediate
9959 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9960 %{
9961   match(Set dst (OrL dst src));
9962   effect(KILL cr);
9963 
9964   format %{ "orq     $dst, $src\t# long" %}
9965   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9966   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9967   ins_pipe(ialu_reg);
9968 %}
9969 
9970 // Or Register with Memory
9971 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9972 %{
9973   match(Set dst (OrL dst (LoadL src)));
9974   effect(KILL cr);
9975 
9976   ins_cost(125);
9977   format %{ "orq     $dst, $src\t# long" %}
9978   opcode(0x0B);
9979   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9980   ins_pipe(ialu_reg_mem);
9981 %}
9982 
9983 // Or Memory with Register
9984 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9985 %{
9986   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9987   effect(KILL cr);
9988 
9989   ins_cost(150);
9990   format %{ "orq     $dst, $src\t# long" %}
9991   opcode(0x09); /* Opcode 09 /r */
9992   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9993   ins_pipe(ialu_mem_reg);
9994 %}
9995 
9996 // Or Memory with Immediate
9997 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9998 %{
9999   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10000   effect(KILL cr);
10001 
10002   ins_cost(125);
10003   format %{ "orq     $dst, $src\t# long" %}
10004   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10005   ins_encode(REX_mem_wide(dst), OpcSE(src),
10006              RM_opc_mem(secondary, dst), Con8or32(src));
10007   ins_pipe(ialu_mem_imm);
10008 %}
10009 
10010 // Xor Instructions
10011 // Xor Register with Register
10012 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10013 %{
10014   match(Set dst (XorL dst src));
10015   effect(KILL cr);
10016 
10017   format %{ "xorq    $dst, $src\t# long" %}
10018   opcode(0x33);
10019   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10020   ins_pipe(ialu_reg_reg);
10021 %}
10022 
10023 // Xor Register with Immediate -1
10024 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10025   match(Set dst (XorL dst imm));
10026 
10027   format %{ "notq   $dst" %}
10028   ins_encode %{
10029      __ notq($dst$$Register);
10030   %}
10031   ins_pipe(ialu_reg);
10032 %}
10033 
10034 // Xor Register with Immediate
10035 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10036 %{
10037   match(Set dst (XorL dst src));
10038   effect(KILL cr);
10039 
10040   format %{ "xorq    $dst, $src\t# long" %}
10041   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10042   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10043   ins_pipe(ialu_reg);
10044 %}
10045 
10046 // Xor Register with Memory
10047 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10048 %{
10049   match(Set dst (XorL dst (LoadL src)));
10050   effect(KILL cr);
10051 
10052   ins_cost(125);
10053   format %{ "xorq    $dst, $src\t# long" %}
10054   opcode(0x33);
10055   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10056   ins_pipe(ialu_reg_mem);
10057 %}
10058 
10059 // Xor Memory with Register
10060 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10061 %{
10062   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10063   effect(KILL cr);
10064 
10065   ins_cost(150);
10066   format %{ "xorq    $dst, $src\t# long" %}
10067   opcode(0x31); /* Opcode 31 /r */
10068   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10069   ins_pipe(ialu_mem_reg);
10070 %}
10071 
10072 // Xor Memory with Immediate
10073 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10074 %{
10075   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10076   effect(KILL cr);
10077 
10078   ins_cost(125);
10079   format %{ "xorq    $dst, $src\t# long" %}
10080   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10081   ins_encode(REX_mem_wide(dst), OpcSE(src),
10082              RM_opc_mem(secondary, dst), Con8or32(src));
10083   ins_pipe(ialu_mem_imm);
10084 %}
10085 
10086 // Convert Int to Boolean
10087 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10088 %{
10089   match(Set dst (Conv2B src));
10090   effect(KILL cr);
10091 
10092   format %{ "testl   $src, $src\t# ci2b\n\t"
10093             "setnz   $dst\n\t"
10094             "movzbl  $dst, $dst" %}
10095   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10096              setNZ_reg(dst),
10097              REX_reg_breg(dst, dst), // movzbl
10098              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10099   ins_pipe(pipe_slow); // XXX
10100 %}
10101 
10102 // Convert Pointer to Boolean
10103 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10104 %{
10105   match(Set dst (Conv2B src));
10106   effect(KILL cr);
10107 
10108   format %{ "testq   $src, $src\t# cp2b\n\t"
10109             "setnz   $dst\n\t"
10110             "movzbl  $dst, $dst" %}
10111   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10112              setNZ_reg(dst),
10113              REX_reg_breg(dst, dst), // movzbl
10114              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10115   ins_pipe(pipe_slow); // XXX
10116 %}
10117 
10118 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10119 %{
10120   match(Set dst (CmpLTMask p q));
10121   effect(KILL cr);
10122 
10123   ins_cost(400); // XXX
10124   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10125             "setlt   $dst\n\t"
10126             "movzbl  $dst, $dst\n\t"
10127             "negl    $dst" %}
10128   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10129              setLT_reg(dst),
10130              REX_reg_breg(dst, dst), // movzbl
10131              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10132              neg_reg(dst));
10133   ins_pipe(pipe_slow);
10134 %}
10135 
10136 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10137 %{
10138   match(Set dst (CmpLTMask dst zero));
10139   effect(KILL cr);
10140 
10141   ins_cost(100); // XXX
10142   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10143   opcode(0xC1, 0x7);  /* C1 /7 ib */
10144   ins_encode(reg_opc_imm(dst, 0x1F));
10145   ins_pipe(ialu_reg);
10146 %}
10147 
10148 
10149 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
10150 %{
10151   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10152   effect(TEMP tmp, KILL cr);
10153 
10154   ins_cost(400); // XXX
10155   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10156             "sbbl    $tmp, $tmp\n\t"
10157             "andl    $tmp, $y\n\t"
10158             "addl    $p, $tmp" %}
10159   ins_encode %{
10160     Register Rp = $p$$Register;
10161     Register Rq = $q$$Register;
10162     Register Ry = $y$$Register;
10163     Register Rt = $tmp$$Register;
10164     __ subl(Rp, Rq);
10165     __ sbbl(Rt, Rt);
10166     __ andl(Rt, Ry);
10167     __ addl(Rp, Rt);
10168   %}
10169   ins_pipe(pipe_cmplt);
10170 %}
10171 
10172 //---------- FP Instructions------------------------------------------------
10173 
10174 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10175 %{
10176   match(Set cr (CmpF src1 src2));
10177 
10178   ins_cost(145);
10179   format %{ "ucomiss $src1, $src2\n\t"
10180             "jnp,s   exit\n\t"
10181             "pushfq\t# saw NaN, set CF\n\t"
10182             "andq    [rsp], #0xffffff2b\n\t"
10183             "popfq\n"
10184     "exit:   nop\t# avoid branch to branch" %}
10185   opcode(0x0F, 0x2E);
10186   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10187              cmpfp_fixup);
10188   ins_pipe(pipe_slow);
10189 %}
10190 
10191 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10192   match(Set cr (CmpF src1 src2));
10193 
10194   ins_cost(145);
10195   format %{ "ucomiss $src1, $src2" %}
10196   ins_encode %{
10197     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10198   %}
10199   ins_pipe(pipe_slow);
10200 %}
10201 
10202 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10203 %{
10204   match(Set cr (CmpF src1 (LoadF src2)));
10205 
10206   ins_cost(145);
10207   format %{ "ucomiss $src1, $src2\n\t"
10208             "jnp,s   exit\n\t"
10209             "pushfq\t# saw NaN, set CF\n\t"
10210             "andq    [rsp], #0xffffff2b\n\t"
10211             "popfq\n"
10212     "exit:   nop\t# avoid branch to branch" %}
10213   opcode(0x0F, 0x2E);
10214   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10215              cmpfp_fixup);
10216   ins_pipe(pipe_slow);
10217 %}
10218 
10219 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10220   match(Set cr (CmpF src1 (LoadF src2)));
10221 
10222   ins_cost(100);
10223   format %{ "ucomiss $src1, $src2" %}
10224   opcode(0x0F, 0x2E);
10225   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10226   ins_pipe(pipe_slow);
10227 %}
10228 
10229 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10230   match(Set cr (CmpF src con));
10231 
10232   ins_cost(145);
10233   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10234             "jnp,s   exit\n\t"
10235             "pushfq\t# saw NaN, set CF\n\t"
10236             "andq    [rsp], #0xffffff2b\n\t"
10237             "popfq\n"
10238     "exit:   nop\t# avoid branch to branch" %}
10239   ins_encode %{
10240     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10241     emit_cmpfp_fixup(_masm);
10242   %}
10243   ins_pipe(pipe_slow);
10244 %}
10245 
10246 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10247   match(Set cr (CmpF src con));
10248   ins_cost(100);
10249   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10250   ins_encode %{
10251     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10252   %}
10253   ins_pipe(pipe_slow);
10254 %}
10255 
10256 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10257 %{
10258   match(Set cr (CmpD src1 src2));
10259 
10260   ins_cost(145);
10261   format %{ "ucomisd $src1, $src2\n\t"
10262             "jnp,s   exit\n\t"
10263             "pushfq\t# saw NaN, set CF\n\t"
10264             "andq    [rsp], #0xffffff2b\n\t"
10265             "popfq\n"
10266     "exit:   nop\t# avoid branch to branch" %}
10267   opcode(0x66, 0x0F, 0x2E);
10268   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10269              cmpfp_fixup);
10270   ins_pipe(pipe_slow);
10271 %}
10272 
10273 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10274   match(Set cr (CmpD src1 src2));
10275 
10276   ins_cost(100);
10277   format %{ "ucomisd $src1, $src2 test" %}
10278   ins_encode %{
10279     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10280   %}
10281   ins_pipe(pipe_slow);
10282 %}
10283 
10284 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10285 %{
10286   match(Set cr (CmpD src1 (LoadD src2)));
10287 
10288   ins_cost(145);
10289   format %{ "ucomisd $src1, $src2\n\t"
10290             "jnp,s   exit\n\t"
10291             "pushfq\t# saw NaN, set CF\n\t"
10292             "andq    [rsp], #0xffffff2b\n\t"
10293             "popfq\n"
10294     "exit:   nop\t# avoid branch to branch" %}
10295   opcode(0x66, 0x0F, 0x2E);
10296   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10297              cmpfp_fixup);
10298   ins_pipe(pipe_slow);
10299 %}
10300 
10301 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10302   match(Set cr (CmpD src1 (LoadD src2)));
10303 
10304   ins_cost(100);
10305   format %{ "ucomisd $src1, $src2" %}
10306   opcode(0x66, 0x0F, 0x2E);
10307   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10308   ins_pipe(pipe_slow);
10309 %}
10310 
10311 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10312   match(Set cr (CmpD src con));
10313 
10314   ins_cost(145);
10315   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10316             "jnp,s   exit\n\t"
10317             "pushfq\t# saw NaN, set CF\n\t"
10318             "andq    [rsp], #0xffffff2b\n\t"
10319             "popfq\n"
10320     "exit:   nop\t# avoid branch to branch" %}
10321   ins_encode %{
10322     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10323     emit_cmpfp_fixup(_masm);
10324   %}
10325   ins_pipe(pipe_slow);
10326 %}
10327 
10328 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10329   match(Set cr (CmpD src con));
10330   ins_cost(100);
10331   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10332   ins_encode %{
10333     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10334   %}
10335   ins_pipe(pipe_slow);
10336 %}
10337 
10338 // Compare into -1,0,1
10339 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10340 %{
10341   match(Set dst (CmpF3 src1 src2));
10342   effect(KILL cr);
10343 
10344   ins_cost(275);
10345   format %{ "ucomiss $src1, $src2\n\t"
10346             "movl    $dst, #-1\n\t"
10347             "jp,s    done\n\t"
10348             "jb,s    done\n\t"
10349             "setne   $dst\n\t"
10350             "movzbl  $dst, $dst\n"
10351     "done:" %}
10352 
10353   opcode(0x0F, 0x2E);
10354   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10355              cmpfp3(dst));
10356   ins_pipe(pipe_slow);
10357 %}
10358 
10359 // Compare into -1,0,1
10360 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10361 %{
10362   match(Set dst (CmpF3 src1 (LoadF src2)));
10363   effect(KILL cr);
10364 
10365   ins_cost(275);
10366   format %{ "ucomiss $src1, $src2\n\t"
10367             "movl    $dst, #-1\n\t"
10368             "jp,s    done\n\t"
10369             "jb,s    done\n\t"
10370             "setne   $dst\n\t"
10371             "movzbl  $dst, $dst\n"
10372     "done:" %}
10373 
10374   opcode(0x0F, 0x2E);
10375   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10376              cmpfp3(dst));
10377   ins_pipe(pipe_slow);
10378 %}
10379 
10380 // Compare into -1,0,1
10381 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10382   match(Set dst (CmpF3 src con));
10383   effect(KILL cr);
10384 
10385   ins_cost(275);
10386   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10387             "movl    $dst, #-1\n\t"
10388             "jp,s    done\n\t"
10389             "jb,s    done\n\t"
10390             "setne   $dst\n\t"
10391             "movzbl  $dst, $dst\n"
10392     "done:" %}
10393   ins_encode %{
10394     Label L_done;
10395     Register Rdst = $dst$$Register;
10396     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10397     __ movl(Rdst, -1);
10398     __ jcc(Assembler::parity, L_done);
10399     __ jcc(Assembler::below, L_done);
10400     __ setb(Assembler::notEqual, Rdst);
10401     __ movzbl(Rdst, Rdst);
10402     __ bind(L_done);
10403   %}
10404   ins_pipe(pipe_slow);
10405 %}
10406 
10407 // Compare into -1,0,1
10408 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10409 %{
10410   match(Set dst (CmpD3 src1 src2));
10411   effect(KILL cr);
10412 
10413   ins_cost(275);
10414   format %{ "ucomisd $src1, $src2\n\t"
10415             "movl    $dst, #-1\n\t"
10416             "jp,s    done\n\t"
10417             "jb,s    done\n\t"
10418             "setne   $dst\n\t"
10419             "movzbl  $dst, $dst\n"
10420     "done:" %}
10421 
10422   opcode(0x66, 0x0F, 0x2E);
10423   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10424              cmpfp3(dst));
10425   ins_pipe(pipe_slow);
10426 %}
10427 
10428 // Compare into -1,0,1
10429 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10430 %{
10431   match(Set dst (CmpD3 src1 (LoadD src2)));
10432   effect(KILL cr);
10433 
10434   ins_cost(275);
10435   format %{ "ucomisd $src1, $src2\n\t"
10436             "movl    $dst, #-1\n\t"
10437             "jp,s    done\n\t"
10438             "jb,s    done\n\t"
10439             "setne   $dst\n\t"
10440             "movzbl  $dst, $dst\n"
10441     "done:" %}
10442 
10443   opcode(0x66, 0x0F, 0x2E);
10444   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10445              cmpfp3(dst));
10446   ins_pipe(pipe_slow);
10447 %}
10448 
10449 // Compare into -1,0,1
10450 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10451   match(Set dst (CmpD3 src con));
10452   effect(KILL cr);
10453 
10454   ins_cost(275);
10455   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10456             "movl    $dst, #-1\n\t"
10457             "jp,s    done\n\t"
10458             "jb,s    done\n\t"
10459             "setne   $dst\n\t"
10460             "movzbl  $dst, $dst\n"
10461     "done:" %}
10462   ins_encode %{
10463     Register Rdst = $dst$$Register;
10464     Label L_done;
10465     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10466     __ movl(Rdst, -1);
10467     __ jcc(Assembler::parity, L_done);
10468     __ jcc(Assembler::below, L_done);
10469     __ setb(Assembler::notEqual, Rdst);
10470     __ movzbl(Rdst, Rdst);
10471     __ bind(L_done);
10472   %}
10473   ins_pipe(pipe_slow);
10474 %}
10475 
10476 instruct addF_reg(regF dst, regF src)
10477 %{
10478   match(Set dst (AddF dst src));
10479 
10480   format %{ "addss   $dst, $src" %}
10481   ins_cost(150); // XXX
10482   opcode(0xF3, 0x0F, 0x58);
10483   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10484   ins_pipe(pipe_slow);
10485 %}
10486 
10487 instruct addF_mem(regF dst, memory src)
10488 %{
10489   match(Set dst (AddF dst (LoadF src)));
10490 
10491   format %{ "addss   $dst, $src" %}
10492   ins_cost(150); // XXX
10493   opcode(0xF3, 0x0F, 0x58);
10494   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10495   ins_pipe(pipe_slow);
10496 %}
10497 
10498 instruct addF_imm(regF dst, immF con) %{
10499   match(Set dst (AddF dst con));
10500   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10501   ins_cost(150); // XXX
10502   ins_encode %{
10503     __ addss($dst$$XMMRegister, $constantaddress($con));
10504   %}
10505   ins_pipe(pipe_slow);
10506 %}
10507 
10508 instruct addD_reg(regD dst, regD src)
10509 %{
10510   match(Set dst (AddD dst src));
10511 
10512   format %{ "addsd   $dst, $src" %}
10513   ins_cost(150); // XXX
10514   opcode(0xF2, 0x0F, 0x58);
10515   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10516   ins_pipe(pipe_slow);
10517 %}
10518 
10519 instruct addD_mem(regD dst, memory src)
10520 %{
10521   match(Set dst (AddD dst (LoadD src)));
10522 
10523   format %{ "addsd   $dst, $src" %}
10524   ins_cost(150); // XXX
10525   opcode(0xF2, 0x0F, 0x58);
10526   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10527   ins_pipe(pipe_slow);
10528 %}
10529 
10530 instruct addD_imm(regD dst, immD con) %{
10531   match(Set dst (AddD dst con));
10532   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10533   ins_cost(150); // XXX
10534   ins_encode %{
10535     __ addsd($dst$$XMMRegister, $constantaddress($con));
10536   %}
10537   ins_pipe(pipe_slow);
10538 %}
10539 
10540 instruct subF_reg(regF dst, regF src)
10541 %{
10542   match(Set dst (SubF dst src));
10543 
10544   format %{ "subss   $dst, $src" %}
10545   ins_cost(150); // XXX
10546   opcode(0xF3, 0x0F, 0x5C);
10547   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10548   ins_pipe(pipe_slow);
10549 %}
10550 
10551 instruct subF_mem(regF dst, memory src)
10552 %{
10553   match(Set dst (SubF dst (LoadF src)));
10554 
10555   format %{ "subss   $dst, $src" %}
10556   ins_cost(150); // XXX
10557   opcode(0xF3, 0x0F, 0x5C);
10558   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10559   ins_pipe(pipe_slow);
10560 %}
10561 
10562 instruct subF_imm(regF dst, immF con) %{
10563   match(Set dst (SubF dst con));
10564   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10565   ins_cost(150); // XXX
10566   ins_encode %{
10567     __ subss($dst$$XMMRegister, $constantaddress($con));
10568   %}
10569   ins_pipe(pipe_slow);
10570 %}
10571 
10572 instruct subD_reg(regD dst, regD src)
10573 %{
10574   match(Set dst (SubD dst src));
10575 
10576   format %{ "subsd   $dst, $src" %}
10577   ins_cost(150); // XXX
10578   opcode(0xF2, 0x0F, 0x5C);
10579   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10580   ins_pipe(pipe_slow);
10581 %}
10582 
10583 instruct subD_mem(regD dst, memory src)
10584 %{
10585   match(Set dst (SubD dst (LoadD src)));
10586 
10587   format %{ "subsd   $dst, $src" %}
10588   ins_cost(150); // XXX
10589   opcode(0xF2, 0x0F, 0x5C);
10590   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10591   ins_pipe(pipe_slow);
10592 %}
10593 
10594 instruct subD_imm(regD dst, immD con) %{
10595   match(Set dst (SubD dst con));
10596   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10597   ins_cost(150); // XXX
10598   ins_encode %{
10599     __ subsd($dst$$XMMRegister, $constantaddress($con));
10600   %}
10601   ins_pipe(pipe_slow);
10602 %}
10603 
10604 instruct mulF_reg(regF dst, regF src)
10605 %{
10606   match(Set dst (MulF dst src));
10607 
10608   format %{ "mulss   $dst, $src" %}
10609   ins_cost(150); // XXX
10610   opcode(0xF3, 0x0F, 0x59);
10611   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10612   ins_pipe(pipe_slow);
10613 %}
10614 
10615 instruct mulF_mem(regF dst, memory src)
10616 %{
10617   match(Set dst (MulF dst (LoadF src)));
10618 
10619   format %{ "mulss   $dst, $src" %}
10620   ins_cost(150); // XXX
10621   opcode(0xF3, 0x0F, 0x59);
10622   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10623   ins_pipe(pipe_slow);
10624 %}
10625 
10626 instruct mulF_imm(regF dst, immF con) %{
10627   match(Set dst (MulF dst con));
10628   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10629   ins_cost(150); // XXX
10630   ins_encode %{
10631     __ mulss($dst$$XMMRegister, $constantaddress($con));
10632   %}
10633   ins_pipe(pipe_slow);
10634 %}
10635 
10636 instruct mulD_reg(regD dst, regD src)
10637 %{
10638   match(Set dst (MulD dst src));
10639 
10640   format %{ "mulsd   $dst, $src" %}
10641   ins_cost(150); // XXX
10642   opcode(0xF2, 0x0F, 0x59);
10643   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10644   ins_pipe(pipe_slow);
10645 %}
10646 
10647 instruct mulD_mem(regD dst, memory src)
10648 %{
10649   match(Set dst (MulD dst (LoadD src)));
10650 
10651   format %{ "mulsd   $dst, $src" %}
10652   ins_cost(150); // XXX
10653   opcode(0xF2, 0x0F, 0x59);
10654   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10655   ins_pipe(pipe_slow);
10656 %}
10657 
10658 instruct mulD_imm(regD dst, immD con) %{
10659   match(Set dst (MulD dst con));
10660   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10661   ins_cost(150); // XXX
10662   ins_encode %{
10663     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10664   %}
10665   ins_pipe(pipe_slow);
10666 %}
10667 
10668 instruct divF_reg(regF dst, regF src)
10669 %{
10670   match(Set dst (DivF dst src));
10671 
10672   format %{ "divss   $dst, $src" %}
10673   ins_cost(150); // XXX
10674   opcode(0xF3, 0x0F, 0x5E);
10675   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10676   ins_pipe(pipe_slow);
10677 %}
10678 
10679 instruct divF_mem(regF dst, memory src)
10680 %{
10681   match(Set dst (DivF dst (LoadF src)));
10682 
10683   format %{ "divss   $dst, $src" %}
10684   ins_cost(150); // XXX
10685   opcode(0xF3, 0x0F, 0x5E);
10686   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10687   ins_pipe(pipe_slow);
10688 %}
10689 
10690 instruct divF_imm(regF dst, immF con) %{
10691   match(Set dst (DivF dst con));
10692   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10693   ins_cost(150); // XXX
10694   ins_encode %{
10695     __ divss($dst$$XMMRegister, $constantaddress($con));
10696   %}
10697   ins_pipe(pipe_slow);
10698 %}
10699 
10700 instruct divD_reg(regD dst, regD src)
10701 %{
10702   match(Set dst (DivD dst src));
10703 
10704   format %{ "divsd   $dst, $src" %}
10705   ins_cost(150); // XXX
10706   opcode(0xF2, 0x0F, 0x5E);
10707   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10708   ins_pipe(pipe_slow);
10709 %}
10710 
10711 instruct divD_mem(regD dst, memory src)
10712 %{
10713   match(Set dst (DivD dst (LoadD src)));
10714 
10715   format %{ "divsd   $dst, $src" %}
10716   ins_cost(150); // XXX
10717   opcode(0xF2, 0x0F, 0x5E);
10718   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10719   ins_pipe(pipe_slow);
10720 %}
10721 
10722 instruct divD_imm(regD dst, immD con) %{
10723   match(Set dst (DivD dst con));
10724   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10725   ins_cost(150); // XXX
10726   ins_encode %{
10727     __ divsd($dst$$XMMRegister, $constantaddress($con));
10728   %}
10729   ins_pipe(pipe_slow);
10730 %}
10731 
10732 instruct sqrtF_reg(regF dst, regF src)
10733 %{
10734   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10735 
10736   format %{ "sqrtss  $dst, $src" %}
10737   ins_cost(150); // XXX
10738   opcode(0xF3, 0x0F, 0x51);
10739   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10740   ins_pipe(pipe_slow);
10741 %}
10742 
10743 instruct sqrtF_mem(regF dst, memory src)
10744 %{
10745   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10746 
10747   format %{ "sqrtss  $dst, $src" %}
10748   ins_cost(150); // XXX
10749   opcode(0xF3, 0x0F, 0x51);
10750   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10751   ins_pipe(pipe_slow);
10752 %}
10753 
10754 instruct sqrtF_imm(regF dst, immF con) %{
10755   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10756   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10757   ins_cost(150); // XXX
10758   ins_encode %{
10759     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10760   %}
10761   ins_pipe(pipe_slow);
10762 %}
10763 
10764 instruct sqrtD_reg(regD dst, regD src)
10765 %{
10766   match(Set dst (SqrtD src));
10767 
10768   format %{ "sqrtsd  $dst, $src" %}
10769   ins_cost(150); // XXX
10770   opcode(0xF2, 0x0F, 0x51);
10771   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10772   ins_pipe(pipe_slow);
10773 %}
10774 
10775 instruct sqrtD_mem(regD dst, memory src)
10776 %{
10777   match(Set dst (SqrtD (LoadD src)));
10778 
10779   format %{ "sqrtsd  $dst, $src" %}
10780   ins_cost(150); // XXX
10781   opcode(0xF2, 0x0F, 0x51);
10782   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10783   ins_pipe(pipe_slow);
10784 %}
10785 
10786 instruct sqrtD_imm(regD dst, immD con) %{
10787   match(Set dst (SqrtD con));
10788   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10789   ins_cost(150); // XXX
10790   ins_encode %{
10791     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10792   %}
10793   ins_pipe(pipe_slow);
10794 %}
10795 
10796 instruct absF_reg(regF dst)
10797 %{
10798   match(Set dst (AbsF dst));
10799 
10800   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10801   ins_encode(absF_encoding(dst));
10802   ins_pipe(pipe_slow);
10803 %}
10804 
10805 instruct absD_reg(regD dst)
10806 %{
10807   match(Set dst (AbsD dst));
10808 
10809   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10810             "# abs double by sign masking" %}
10811   ins_encode(absD_encoding(dst));
10812   ins_pipe(pipe_slow);
10813 %}
10814 
10815 instruct negF_reg(regF dst)
10816 %{
10817   match(Set dst (NegF dst));
10818 
10819   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10820   ins_encode(negF_encoding(dst));
10821   ins_pipe(pipe_slow);
10822 %}
10823 
10824 instruct negD_reg(regD dst)
10825 %{
10826   match(Set dst (NegD dst));
10827 
10828   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10829             "# neg double by sign flipping" %}
10830   ins_encode(negD_encoding(dst));
10831   ins_pipe(pipe_slow);
10832 %}
10833 
10834 // -----------Trig and Trancendental Instructions------------------------------
10835 instruct cosD_reg(regD dst) %{
10836   match(Set dst (CosD dst));
10837 
10838   format %{ "dcos   $dst\n\t" %}
10839   opcode(0xD9, 0xFF);
10840   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10841   ins_pipe( pipe_slow );
10842 %}
10843 
10844 instruct sinD_reg(regD dst) %{
10845   match(Set dst (SinD dst));
10846 
10847   format %{ "dsin   $dst\n\t" %}
10848   opcode(0xD9, 0xFE);
10849   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10850   ins_pipe( pipe_slow );
10851 %}
10852 
10853 instruct tanD_reg(regD dst) %{
10854   match(Set dst (TanD dst));
10855 
10856   format %{ "dtan   $dst\n\t" %}
10857   ins_encode( Push_SrcXD(dst),
10858               Opcode(0xD9), Opcode(0xF2),   //fptan
10859               Opcode(0xDD), Opcode(0xD8),   //fstp st
10860               Push_ResultXD(dst) );
10861   ins_pipe( pipe_slow );
10862 %}
10863 
10864 instruct log10D_reg(regD dst) %{
10865   // The source and result Double operands in XMM registers
10866   match(Set dst (Log10D dst));
10867   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10868   // fyl2x        ; compute log_10(2) * log_2(x)
10869   format %{ "fldlg2\t\t\t#Log10\n\t"
10870             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10871          %}
10872    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10873               Push_SrcXD(dst),
10874               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10875               Push_ResultXD(dst));
10876 
10877   ins_pipe( pipe_slow );
10878 %}
10879 
10880 instruct logD_reg(regD dst) %{
10881   // The source and result Double operands in XMM registers
10882   match(Set dst (LogD dst));
10883   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10884   // fyl2x        ; compute log_e(2) * log_2(x)
10885   format %{ "fldln2\t\t\t#Log_e\n\t"
10886             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10887          %}
10888   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10889               Push_SrcXD(dst),
10890               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10891               Push_ResultXD(dst));
10892   ins_pipe( pipe_slow );
10893 %}
10894 
10895 
10896 
10897 //----------Arithmetic Conversion Instructions---------------------------------
10898 
10899 instruct roundFloat_nop(regF dst)
10900 %{
10901   match(Set dst (RoundFloat dst));
10902 
10903   ins_cost(0);
10904   ins_encode();
10905   ins_pipe(empty);
10906 %}
10907 
10908 instruct roundDouble_nop(regD dst)
10909 %{
10910   match(Set dst (RoundDouble dst));
10911 
10912   ins_cost(0);
10913   ins_encode();
10914   ins_pipe(empty);
10915 %}
10916 
10917 instruct convF2D_reg_reg(regD dst, regF src)
10918 %{
10919   match(Set dst (ConvF2D src));
10920 
10921   format %{ "cvtss2sd $dst, $src" %}
10922   opcode(0xF3, 0x0F, 0x5A);
10923   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10924   ins_pipe(pipe_slow); // XXX
10925 %}
10926 
10927 instruct convF2D_reg_mem(regD dst, memory src)
10928 %{
10929   match(Set dst (ConvF2D (LoadF src)));
10930 
10931   format %{ "cvtss2sd $dst, $src" %}
10932   opcode(0xF3, 0x0F, 0x5A);
10933   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10934   ins_pipe(pipe_slow); // XXX
10935 %}
10936 
10937 instruct convD2F_reg_reg(regF dst, regD src)
10938 %{
10939   match(Set dst (ConvD2F src));
10940 
10941   format %{ "cvtsd2ss $dst, $src" %}
10942   opcode(0xF2, 0x0F, 0x5A);
10943   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10944   ins_pipe(pipe_slow); // XXX
10945 %}
10946 
10947 instruct convD2F_reg_mem(regF dst, memory src)
10948 %{
10949   match(Set dst (ConvD2F (LoadD src)));
10950 
10951   format %{ "cvtsd2ss $dst, $src" %}
10952   opcode(0xF2, 0x0F, 0x5A);
10953   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10954   ins_pipe(pipe_slow); // XXX
10955 %}
10956 
10957 // XXX do mem variants
10958 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10959 %{
10960   match(Set dst (ConvF2I src));
10961   effect(KILL cr);
10962 
10963   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10964             "cmpl    $dst, #0x80000000\n\t"
10965             "jne,s   done\n\t"
10966             "subq    rsp, #8\n\t"
10967             "movss   [rsp], $src\n\t"
10968             "call    f2i_fixup\n\t"
10969             "popq    $dst\n"
10970     "done:   "%}
10971   opcode(0xF3, 0x0F, 0x2C);
10972   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10973              f2i_fixup(dst, src));
10974   ins_pipe(pipe_slow);
10975 %}
10976 
10977 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10978 %{
10979   match(Set dst (ConvF2L src));
10980   effect(KILL cr);
10981 
10982   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10983             "cmpq    $dst, [0x8000000000000000]\n\t"
10984             "jne,s   done\n\t"
10985             "subq    rsp, #8\n\t"
10986             "movss   [rsp], $src\n\t"
10987             "call    f2l_fixup\n\t"
10988             "popq    $dst\n"
10989     "done:   "%}
10990   opcode(0xF3, 0x0F, 0x2C);
10991   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10992              f2l_fixup(dst, src));
10993   ins_pipe(pipe_slow);
10994 %}
10995 
10996 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10997 %{
10998   match(Set dst (ConvD2I src));
10999   effect(KILL cr);
11000 
11001   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11002             "cmpl    $dst, #0x80000000\n\t"
11003             "jne,s   done\n\t"
11004             "subq    rsp, #8\n\t"
11005             "movsd   [rsp], $src\n\t"
11006             "call    d2i_fixup\n\t"
11007             "popq    $dst\n"
11008     "done:   "%}
11009   opcode(0xF2, 0x0F, 0x2C);
11010   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11011              d2i_fixup(dst, src));
11012   ins_pipe(pipe_slow);
11013 %}
11014 
11015 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11016 %{
11017   match(Set dst (ConvD2L src));
11018   effect(KILL cr);
11019 
11020   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11021             "cmpq    $dst, [0x8000000000000000]\n\t"
11022             "jne,s   done\n\t"
11023             "subq    rsp, #8\n\t"
11024             "movsd   [rsp], $src\n\t"
11025             "call    d2l_fixup\n\t"
11026             "popq    $dst\n"
11027     "done:   "%}
11028   opcode(0xF2, 0x0F, 0x2C);
11029   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11030              d2l_fixup(dst, src));
11031   ins_pipe(pipe_slow);
11032 %}
11033 
11034 instruct convI2F_reg_reg(regF dst, rRegI src)
11035 %{
11036   predicate(!UseXmmI2F);
11037   match(Set dst (ConvI2F src));
11038 
11039   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11040   opcode(0xF3, 0x0F, 0x2A);
11041   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11042   ins_pipe(pipe_slow); // XXX
11043 %}
11044 
11045 instruct convI2F_reg_mem(regF dst, memory src)
11046 %{
11047   match(Set dst (ConvI2F (LoadI src)));
11048 
11049   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11050   opcode(0xF3, 0x0F, 0x2A);
11051   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11052   ins_pipe(pipe_slow); // XXX
11053 %}
11054 
11055 instruct convI2D_reg_reg(regD dst, rRegI src)
11056 %{
11057   predicate(!UseXmmI2D);
11058   match(Set dst (ConvI2D src));
11059 
11060   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11061   opcode(0xF2, 0x0F, 0x2A);
11062   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11063   ins_pipe(pipe_slow); // XXX
11064 %}
11065 
11066 instruct convI2D_reg_mem(regD dst, memory src)
11067 %{
11068   match(Set dst (ConvI2D (LoadI src)));
11069 
11070   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11071   opcode(0xF2, 0x0F, 0x2A);
11072   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11073   ins_pipe(pipe_slow); // XXX
11074 %}
11075 
11076 instruct convXI2F_reg(regF dst, rRegI src)
11077 %{
11078   predicate(UseXmmI2F);
11079   match(Set dst (ConvI2F src));
11080 
11081   format %{ "movdl $dst, $src\n\t"
11082             "cvtdq2psl $dst, $dst\t# i2f" %}
11083   ins_encode %{
11084     __ movdl($dst$$XMMRegister, $src$$Register);
11085     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11086   %}
11087   ins_pipe(pipe_slow); // XXX
11088 %}
11089 
11090 instruct convXI2D_reg(regD dst, rRegI src)
11091 %{
11092   predicate(UseXmmI2D);
11093   match(Set dst (ConvI2D src));
11094 
11095   format %{ "movdl $dst, $src\n\t"
11096             "cvtdq2pdl $dst, $dst\t# i2d" %}
11097   ins_encode %{
11098     __ movdl($dst$$XMMRegister, $src$$Register);
11099     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11100   %}
11101   ins_pipe(pipe_slow); // XXX
11102 %}
11103 
11104 instruct convL2F_reg_reg(regF dst, rRegL src)
11105 %{
11106   match(Set dst (ConvL2F src));
11107 
11108   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11109   opcode(0xF3, 0x0F, 0x2A);
11110   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11111   ins_pipe(pipe_slow); // XXX
11112 %}
11113 
11114 instruct convL2F_reg_mem(regF dst, memory src)
11115 %{
11116   match(Set dst (ConvL2F (LoadL src)));
11117 
11118   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11119   opcode(0xF3, 0x0F, 0x2A);
11120   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11121   ins_pipe(pipe_slow); // XXX
11122 %}
11123 
11124 instruct convL2D_reg_reg(regD dst, rRegL src)
11125 %{
11126   match(Set dst (ConvL2D src));
11127 
11128   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11129   opcode(0xF2, 0x0F, 0x2A);
11130   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11131   ins_pipe(pipe_slow); // XXX
11132 %}
11133 
11134 instruct convL2D_reg_mem(regD dst, memory src)
11135 %{
11136   match(Set dst (ConvL2D (LoadL src)));
11137 
11138   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11139   opcode(0xF2, 0x0F, 0x2A);
11140   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11141   ins_pipe(pipe_slow); // XXX
11142 %}
11143 
11144 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11145 %{
11146   match(Set dst (ConvI2L src));
11147 
11148   ins_cost(125);
11149   format %{ "movslq  $dst, $src\t# i2l" %}
11150   ins_encode %{
11151     __ movslq($dst$$Register, $src$$Register);
11152   %}
11153   ins_pipe(ialu_reg_reg);
11154 %}
11155 
11156 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11157 // %{
11158 //   match(Set dst (ConvI2L src));
11159 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11160 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11161 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11162 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11163 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11164 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11165 
11166 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11167 //   ins_encode(enc_copy(dst, src));
11168 // //   opcode(0x63); // needs REX.W
11169 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11170 //   ins_pipe(ialu_reg_reg);
11171 // %}
11172 
11173 // Zero-extend convert int to long
11174 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11175 %{
11176   match(Set dst (AndL (ConvI2L src) mask));
11177 
11178   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11179   ins_encode(enc_copy(dst, src));
11180   ins_pipe(ialu_reg_reg);
11181 %}
11182 
11183 // Zero-extend convert int to long
11184 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11185 %{
11186   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11187 
11188   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11189   opcode(0x8B);
11190   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11191   ins_pipe(ialu_reg_mem);
11192 %}
11193 
11194 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11195 %{
11196   match(Set dst (AndL src mask));
11197 
11198   format %{ "movl    $dst, $src\t# zero-extend long" %}
11199   ins_encode(enc_copy_always(dst, src));
11200   ins_pipe(ialu_reg_reg);
11201 %}
11202 
11203 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11204 %{
11205   match(Set dst (ConvL2I src));
11206 
11207   format %{ "movl    $dst, $src\t# l2i" %}
11208   ins_encode(enc_copy_always(dst, src));
11209   ins_pipe(ialu_reg_reg);
11210 %}
11211 
11212 
11213 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11214   match(Set dst (MoveF2I src));
11215   effect(DEF dst, USE src);
11216 
11217   ins_cost(125);
11218   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11219   opcode(0x8B);
11220   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11221   ins_pipe(ialu_reg_mem);
11222 %}
11223 
11224 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11225   match(Set dst (MoveI2F src));
11226   effect(DEF dst, USE src);
11227 
11228   ins_cost(125);
11229   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11230   opcode(0xF3, 0x0F, 0x10);
11231   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11232   ins_pipe(pipe_slow);
11233 %}
11234 
11235 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11236   match(Set dst (MoveD2L src));
11237   effect(DEF dst, USE src);
11238 
11239   ins_cost(125);
11240   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11241   opcode(0x8B);
11242   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11243   ins_pipe(ialu_reg_mem);
11244 %}
11245 
11246 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11247   predicate(!UseXmmLoadAndClearUpper);
11248   match(Set dst (MoveL2D src));
11249   effect(DEF dst, USE src);
11250 
11251   ins_cost(125);
11252   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11253   opcode(0x66, 0x0F, 0x12);
11254   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11255   ins_pipe(pipe_slow);
11256 %}
11257 
11258 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11259   predicate(UseXmmLoadAndClearUpper);
11260   match(Set dst (MoveL2D src));
11261   effect(DEF dst, USE src);
11262 
11263   ins_cost(125);
11264   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11265   opcode(0xF2, 0x0F, 0x10);
11266   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11267   ins_pipe(pipe_slow);
11268 %}
11269 
11270 
11271 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11272   match(Set dst (MoveF2I src));
11273   effect(DEF dst, USE src);
11274 
11275   ins_cost(95); // XXX
11276   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11277   opcode(0xF3, 0x0F, 0x11);
11278   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11279   ins_pipe(pipe_slow);
11280 %}
11281 
11282 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11283   match(Set dst (MoveI2F src));
11284   effect(DEF dst, USE src);
11285 
11286   ins_cost(100);
11287   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11288   opcode(0x89);
11289   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11290   ins_pipe( ialu_mem_reg );
11291 %}
11292 
11293 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11294   match(Set dst (MoveD2L src));
11295   effect(DEF dst, USE src);
11296 
11297   ins_cost(95); // XXX
11298   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11299   opcode(0xF2, 0x0F, 0x11);
11300   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11301   ins_pipe(pipe_slow);
11302 %}
11303 
11304 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11305   match(Set dst (MoveL2D src));
11306   effect(DEF dst, USE src);
11307 
11308   ins_cost(100);
11309   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11310   opcode(0x89);
11311   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11312   ins_pipe(ialu_mem_reg);
11313 %}
11314 
11315 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11316   match(Set dst (MoveF2I src));
11317   effect(DEF dst, USE src);
11318   ins_cost(85);
11319   format %{ "movd    $dst,$src\t# MoveF2I" %}
11320   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11321   ins_pipe( pipe_slow );
11322 %}
11323 
11324 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11325   match(Set dst (MoveD2L src));
11326   effect(DEF dst, USE src);
11327   ins_cost(85);
11328   format %{ "movd    $dst,$src\t# MoveD2L" %}
11329   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11330   ins_pipe( pipe_slow );
11331 %}
11332 
11333 // The next instructions have long latency and use Int unit. Set high cost.
11334 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11335   match(Set dst (MoveI2F src));
11336   effect(DEF dst, USE src);
11337   ins_cost(300);
11338   format %{ "movd    $dst,$src\t# MoveI2F" %}
11339   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11340   ins_pipe( pipe_slow );
11341 %}
11342 
11343 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11344   match(Set dst (MoveL2D src));
11345   effect(DEF dst, USE src);
11346   ins_cost(300);
11347   format %{ "movd    $dst,$src\t# MoveL2D" %}
11348   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 // Replicate scalar to packed byte (1 byte) values in xmm
11353 instruct Repl8B_reg(regD dst, regD src) %{
11354   match(Set dst (Replicate8B src));
11355   format %{ "MOVDQA  $dst,$src\n\t"
11356             "PUNPCKLBW $dst,$dst\n\t"
11357             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11358   ins_encode( pshufd_8x8(dst, src));
11359   ins_pipe( pipe_slow );
11360 %}
11361 
11362 // Replicate scalar to packed byte (1 byte) values in xmm
11363 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11364   match(Set dst (Replicate8B src));
11365   format %{ "MOVD    $dst,$src\n\t"
11366             "PUNPCKLBW $dst,$dst\n\t"
11367             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11368   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11369   ins_pipe( pipe_slow );
11370 %}
11371 
11372 // Replicate scalar zero to packed byte (1 byte) values in xmm
11373 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11374   match(Set dst (Replicate8B zero));
11375   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11376   ins_encode( pxor(dst, dst));
11377   ins_pipe( fpu_reg_reg );
11378 %}
11379 
11380 // Replicate scalar to packed shore (2 byte) values in xmm
11381 instruct Repl4S_reg(regD dst, regD src) %{
11382   match(Set dst (Replicate4S src));
11383   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11384   ins_encode( pshufd_4x16(dst, src));
11385   ins_pipe( fpu_reg_reg );
11386 %}
11387 
11388 // Replicate scalar to packed shore (2 byte) values in xmm
11389 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11390   match(Set dst (Replicate4S src));
11391   format %{ "MOVD    $dst,$src\n\t"
11392             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11393   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11394   ins_pipe( fpu_reg_reg );
11395 %}
11396 
11397 // Replicate scalar zero to packed short (2 byte) values in xmm
11398 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11399   match(Set dst (Replicate4S zero));
11400   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11401   ins_encode( pxor(dst, dst));
11402   ins_pipe( fpu_reg_reg );
11403 %}
11404 
11405 // Replicate scalar to packed char (2 byte) values in xmm
11406 instruct Repl4C_reg(regD dst, regD src) %{
11407   match(Set dst (Replicate4C src));
11408   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11409   ins_encode( pshufd_4x16(dst, src));
11410   ins_pipe( fpu_reg_reg );
11411 %}
11412 
11413 // Replicate scalar to packed char (2 byte) values in xmm
11414 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11415   match(Set dst (Replicate4C src));
11416   format %{ "MOVD    $dst,$src\n\t"
11417             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11418   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11419   ins_pipe( fpu_reg_reg );
11420 %}
11421 
11422 // Replicate scalar zero to packed char (2 byte) values in xmm
11423 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11424   match(Set dst (Replicate4C zero));
11425   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11426   ins_encode( pxor(dst, dst));
11427   ins_pipe( fpu_reg_reg );
11428 %}
11429 
11430 // Replicate scalar to packed integer (4 byte) values in xmm
11431 instruct Repl2I_reg(regD dst, regD src) %{
11432   match(Set dst (Replicate2I src));
11433   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11434   ins_encode( pshufd(dst, src, 0x00));
11435   ins_pipe( fpu_reg_reg );
11436 %}
11437 
11438 // Replicate scalar to packed integer (4 byte) values in xmm
11439 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11440   match(Set dst (Replicate2I src));
11441   format %{ "MOVD   $dst,$src\n\t"
11442             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11443   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11444   ins_pipe( fpu_reg_reg );
11445 %}
11446 
11447 // Replicate scalar zero to packed integer (2 byte) values in xmm
11448 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11449   match(Set dst (Replicate2I zero));
11450   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11451   ins_encode( pxor(dst, dst));
11452   ins_pipe( fpu_reg_reg );
11453 %}
11454 
11455 // Replicate scalar to packed single precision floating point values in xmm
11456 instruct Repl2F_reg(regD dst, regD src) %{
11457   match(Set dst (Replicate2F src));
11458   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11459   ins_encode( pshufd(dst, src, 0xe0));
11460   ins_pipe( fpu_reg_reg );
11461 %}
11462 
11463 // Replicate scalar to packed single precision floating point values in xmm
11464 instruct Repl2F_regF(regD dst, regF src) %{
11465   match(Set dst (Replicate2F src));
11466   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11467   ins_encode( pshufd(dst, src, 0xe0));
11468   ins_pipe( fpu_reg_reg );
11469 %}
11470 
11471 // Replicate scalar to packed single precision floating point values in xmm
11472 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11473   match(Set dst (Replicate2F zero));
11474   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11475   ins_encode( pxor(dst, dst));
11476   ins_pipe( fpu_reg_reg );
11477 %}
11478 
11479 
11480 // =======================================================================
11481 // fast clearing of an array
11482 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11483                   rFlagsReg cr)
11484 %{
11485   match(Set dummy (ClearArray cnt base));
11486   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11487 
11488   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11489             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11490   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11491              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11492   ins_pipe(pipe_slow);
11493 %}
11494 
11495 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11496                         rax_RegI result, regD tmp1, rFlagsReg cr)
11497 %{
11498   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11499   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11500 
11501   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11502   ins_encode %{
11503     __ string_compare($str1$$Register, $str2$$Register,
11504                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11505                       $tmp1$$XMMRegister);
11506   %}
11507   ins_pipe( pipe_slow );
11508 %}
11509 
11510 // fast search of substring with known size.
11511 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11512                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11513 %{
11514   predicate(UseSSE42Intrinsics);
11515   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11516   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11517 
11518   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11519   ins_encode %{
11520     int icnt2 = (int)$int_cnt2$$constant;
11521     if (icnt2 >= 8) {
11522       // IndexOf for constant substrings with size >= 8 elements
11523       // which don't need to be loaded through stack.
11524       __ string_indexofC8($str1$$Register, $str2$$Register,
11525                           $cnt1$$Register, $cnt2$$Register,
11526                           icnt2, $result$$Register,
11527                           $vec$$XMMRegister, $tmp$$Register);
11528     } else {
11529       // Small strings are loaded through stack if they cross page boundary.
11530       __ string_indexof($str1$$Register, $str2$$Register,
11531                         $cnt1$$Register, $cnt2$$Register,
11532                         icnt2, $result$$Register,
11533                         $vec$$XMMRegister, $tmp$$Register);
11534     }
11535   %}
11536   ins_pipe( pipe_slow );
11537 %}
11538 
11539 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11540                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11541 %{
11542   predicate(UseSSE42Intrinsics);
11543   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11544   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11545 
11546   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11547   ins_encode %{
11548     __ string_indexof($str1$$Register, $str2$$Register,
11549                       $cnt1$$Register, $cnt2$$Register,
11550                       (-1), $result$$Register,
11551                       $vec$$XMMRegister, $tmp$$Register);
11552   %}
11553   ins_pipe( pipe_slow );
11554 %}
11555 
11556 // fast string equals
11557 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11558                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11559 %{
11560   match(Set result (StrEquals (Binary str1 str2) cnt));
11561   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11562 
11563   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11564   ins_encode %{
11565     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11566                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11567                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11568   %}
11569   ins_pipe( pipe_slow );
11570 %}
11571 
11572 // fast array equals
11573 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11574                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11575 %{
11576   match(Set result (AryEq ary1 ary2));
11577   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11578   //ins_cost(300);
11579 
11580   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11581   ins_encode %{
11582     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11583                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11584                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11585   %}
11586   ins_pipe( pipe_slow );
11587 %}
11588 
11589 //----------Control Flow Instructions------------------------------------------
11590 // Signed compare Instructions
11591 
11592 // XXX more variants!!
11593 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11594 %{
11595   match(Set cr (CmpI op1 op2));
11596   effect(DEF cr, USE op1, USE op2);
11597 
11598   format %{ "cmpl    $op1, $op2" %}
11599   opcode(0x3B);  /* Opcode 3B /r */
11600   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11601   ins_pipe(ialu_cr_reg_reg);
11602 %}
11603 
11604 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11605 %{
11606   match(Set cr (CmpI op1 op2));
11607 
11608   format %{ "cmpl    $op1, $op2" %}
11609   opcode(0x81, 0x07); /* Opcode 81 /7 */
11610   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11611   ins_pipe(ialu_cr_reg_imm);
11612 %}
11613 
11614 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11615 %{
11616   match(Set cr (CmpI op1 (LoadI op2)));
11617 
11618   ins_cost(500); // XXX
11619   format %{ "cmpl    $op1, $op2" %}
11620   opcode(0x3B); /* Opcode 3B /r */
11621   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11622   ins_pipe(ialu_cr_reg_mem);
11623 %}
11624 
11625 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11626 %{
11627   match(Set cr (CmpI src zero));
11628 
11629   format %{ "testl   $src, $src" %}
11630   opcode(0x85);
11631   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11632   ins_pipe(ialu_cr_reg_imm);
11633 %}
11634 
11635 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11636 %{
11637   match(Set cr (CmpI (AndI src con) zero));
11638 
11639   format %{ "testl   $src, $con" %}
11640   opcode(0xF7, 0x00);
11641   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11642   ins_pipe(ialu_cr_reg_imm);
11643 %}
11644 
11645 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11646 %{
11647   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11648 
11649   format %{ "testl   $src, $mem" %}
11650   opcode(0x85);
11651   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11652   ins_pipe(ialu_cr_reg_mem);
11653 %}
11654 
11655 // Unsigned compare Instructions; really, same as signed except they
11656 // produce an rFlagsRegU instead of rFlagsReg.
11657 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11658 %{
11659   match(Set cr (CmpU op1 op2));
11660 
11661   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11662   opcode(0x3B); /* Opcode 3B /r */
11663   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11664   ins_pipe(ialu_cr_reg_reg);
11665 %}
11666 
11667 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11668 %{
11669   match(Set cr (CmpU op1 op2));
11670 
11671   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11672   opcode(0x81,0x07); /* Opcode 81 /7 */
11673   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11674   ins_pipe(ialu_cr_reg_imm);
11675 %}
11676 
11677 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11678 %{
11679   match(Set cr (CmpU op1 (LoadI op2)));
11680 
11681   ins_cost(500); // XXX
11682   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11683   opcode(0x3B); /* Opcode 3B /r */
11684   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11685   ins_pipe(ialu_cr_reg_mem);
11686 %}
11687 
11688 // // // Cisc-spilled version of cmpU_rReg
11689 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11690 // //%{
11691 // //  match(Set cr (CmpU (LoadI op1) op2));
11692 // //
11693 // //  format %{ "CMPu   $op1,$op2" %}
11694 // //  ins_cost(500);
11695 // //  opcode(0x39);  /* Opcode 39 /r */
11696 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11697 // //%}
11698 
11699 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11700 %{
11701   match(Set cr (CmpU src zero));
11702 
11703   format %{ "testl  $src, $src\t# unsigned" %}
11704   opcode(0x85);
11705   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11706   ins_pipe(ialu_cr_reg_imm);
11707 %}
11708 
11709 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11710 %{
11711   match(Set cr (CmpP op1 op2));
11712 
11713   format %{ "cmpq    $op1, $op2\t# ptr" %}
11714   opcode(0x3B); /* Opcode 3B /r */
11715   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11716   ins_pipe(ialu_cr_reg_reg);
11717 %}
11718 
11719 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11720 %{
11721   match(Set cr (CmpP op1 (LoadP op2)));
11722 
11723   ins_cost(500); // XXX
11724   format %{ "cmpq    $op1, $op2\t# ptr" %}
11725   opcode(0x3B); /* Opcode 3B /r */
11726   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11727   ins_pipe(ialu_cr_reg_mem);
11728 %}
11729 
11730 // // // Cisc-spilled version of cmpP_rReg
11731 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11732 // //%{
11733 // //  match(Set cr (CmpP (LoadP op1) op2));
11734 // //
11735 // //  format %{ "CMPu   $op1,$op2" %}
11736 // //  ins_cost(500);
11737 // //  opcode(0x39);  /* Opcode 39 /r */
11738 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11739 // //%}
11740 
11741 // XXX this is generalized by compP_rReg_mem???
11742 // Compare raw pointer (used in out-of-heap check).
11743 // Only works because non-oop pointers must be raw pointers
11744 // and raw pointers have no anti-dependencies.
11745 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11746 %{
11747   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11748   match(Set cr (CmpP op1 (LoadP op2)));
11749 
11750   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11751   opcode(0x3B); /* Opcode 3B /r */
11752   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11753   ins_pipe(ialu_cr_reg_mem);
11754 %}
11755 
11756 // This will generate a signed flags result. This should be OK since
11757 // any compare to a zero should be eq/neq.
11758 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11759 %{
11760   match(Set cr (CmpP src zero));
11761 
11762   format %{ "testq   $src, $src\t# ptr" %}
11763   opcode(0x85);
11764   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11765   ins_pipe(ialu_cr_reg_imm);
11766 %}
11767 
11768 // This will generate a signed flags result. This should be OK since
11769 // any compare to a zero should be eq/neq.
11770 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11771 %{
11772   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11773   match(Set cr (CmpP (LoadP op) zero));
11774 
11775   ins_cost(500); // XXX
11776   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11777   opcode(0xF7); /* Opcode F7 /0 */
11778   ins_encode(REX_mem_wide(op),
11779              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11780   ins_pipe(ialu_cr_reg_imm);
11781 %}
11782 
11783 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11784 %{
11785   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11786   match(Set cr (CmpP (LoadP mem) zero));
11787 
11788   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11789   ins_encode %{
11790     __ cmpq(r12, $mem$$Address);
11791   %}
11792   ins_pipe(ialu_cr_reg_mem);
11793 %}
11794 
11795 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11796 %{
11797   match(Set cr (CmpN op1 op2));
11798 
11799   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11800   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11801   ins_pipe(ialu_cr_reg_reg);
11802 %}
11803 
11804 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11805 %{
11806   match(Set cr (CmpN src (LoadN mem)));
11807 
11808   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11809   ins_encode %{
11810     __ cmpl($src$$Register, $mem$$Address);
11811   %}
11812   ins_pipe(ialu_cr_reg_mem);
11813 %}
11814 
11815 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11816   match(Set cr (CmpN op1 op2));
11817 
11818   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11819   ins_encode %{
11820     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11821   %}
11822   ins_pipe(ialu_cr_reg_imm);
11823 %}
11824 
11825 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11826 %{
11827   match(Set cr (CmpN src (LoadN mem)));
11828 
11829   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11830   ins_encode %{
11831     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11832   %}
11833   ins_pipe(ialu_cr_reg_mem);
11834 %}
11835 
11836 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11837   match(Set cr (CmpN src zero));
11838 
11839   format %{ "testl   $src, $src\t# compressed ptr" %}
11840   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11841   ins_pipe(ialu_cr_reg_imm);
11842 %}
11843 
11844 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11845 %{
11846   predicate(Universe::narrow_oop_base() != NULL);
11847   match(Set cr (CmpN (LoadN mem) zero));
11848 
11849   ins_cost(500); // XXX
11850   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11851   ins_encode %{
11852     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11853   %}
11854   ins_pipe(ialu_cr_reg_mem);
11855 %}
11856 
11857 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11858 %{
11859   predicate(Universe::narrow_oop_base() == NULL);
11860   match(Set cr (CmpN (LoadN mem) zero));
11861 
11862   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11863   ins_encode %{
11864     __ cmpl(r12, $mem$$Address);
11865   %}
11866   ins_pipe(ialu_cr_reg_mem);
11867 %}
11868 
11869 // Yanked all unsigned pointer compare operations.
11870 // Pointer compares are done with CmpP which is already unsigned.
11871 
11872 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11873 %{
11874   match(Set cr (CmpL op1 op2));
11875 
11876   format %{ "cmpq    $op1, $op2" %}
11877   opcode(0x3B);  /* Opcode 3B /r */
11878   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11879   ins_pipe(ialu_cr_reg_reg);
11880 %}
11881 
11882 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11883 %{
11884   match(Set cr (CmpL op1 op2));
11885 
11886   format %{ "cmpq    $op1, $op2" %}
11887   opcode(0x81, 0x07); /* Opcode 81 /7 */
11888   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11889   ins_pipe(ialu_cr_reg_imm);
11890 %}
11891 
11892 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11893 %{
11894   match(Set cr (CmpL op1 (LoadL op2)));
11895 
11896   format %{ "cmpq    $op1, $op2" %}
11897   opcode(0x3B); /* Opcode 3B /r */
11898   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11899   ins_pipe(ialu_cr_reg_mem);
11900 %}
11901 
11902 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11903 %{
11904   match(Set cr (CmpL src zero));
11905 
11906   format %{ "testq   $src, $src" %}
11907   opcode(0x85);
11908   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11909   ins_pipe(ialu_cr_reg_imm);
11910 %}
11911 
11912 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11913 %{
11914   match(Set cr (CmpL (AndL src con) zero));
11915 
11916   format %{ "testq   $src, $con\t# long" %}
11917   opcode(0xF7, 0x00);
11918   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11919   ins_pipe(ialu_cr_reg_imm);
11920 %}
11921 
11922 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11923 %{
11924   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11925 
11926   format %{ "testq   $src, $mem" %}
11927   opcode(0x85);
11928   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11929   ins_pipe(ialu_cr_reg_mem);
11930 %}
11931 
11932 // Manifest a CmpL result in an integer register.  Very painful.
11933 // This is the test to avoid.
11934 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11935 %{
11936   match(Set dst (CmpL3 src1 src2));
11937   effect(KILL flags);
11938 
11939   ins_cost(275); // XXX
11940   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11941             "movl    $dst, -1\n\t"
11942             "jl,s    done\n\t"
11943             "setne   $dst\n\t"
11944             "movzbl  $dst, $dst\n\t"
11945     "done:" %}
11946   ins_encode(cmpl3_flag(src1, src2, dst));
11947   ins_pipe(pipe_slow);
11948 %}
11949 
11950 //----------Max and Min--------------------------------------------------------
11951 // Min Instructions
11952 
11953 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11954 %{
11955   effect(USE_DEF dst, USE src, USE cr);
11956 
11957   format %{ "cmovlgt $dst, $src\t# min" %}
11958   opcode(0x0F, 0x4F);
11959   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11960   ins_pipe(pipe_cmov_reg);
11961 %}
11962 
11963 
11964 instruct minI_rReg(rRegI dst, rRegI src)
11965 %{
11966   match(Set dst (MinI dst src));
11967 
11968   ins_cost(200);
11969   expand %{
11970     rFlagsReg cr;
11971     compI_rReg(cr, dst, src);
11972     cmovI_reg_g(dst, src, cr);
11973   %}
11974 %}
11975 
11976 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11977 %{
11978   effect(USE_DEF dst, USE src, USE cr);
11979 
11980   format %{ "cmovllt $dst, $src\t# max" %}
11981   opcode(0x0F, 0x4C);
11982   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11983   ins_pipe(pipe_cmov_reg);
11984 %}
11985 
11986 
11987 instruct maxI_rReg(rRegI dst, rRegI src)
11988 %{
11989   match(Set dst (MaxI dst src));
11990 
11991   ins_cost(200);
11992   expand %{
11993     rFlagsReg cr;
11994     compI_rReg(cr, dst, src);
11995     cmovI_reg_l(dst, src, cr);
11996   %}
11997 %}
11998 
11999 // ============================================================================
12000 // Branch Instructions
12001 
12002 // Jump Direct - Label defines a relative address from JMP+1
12003 instruct jmpDir(label labl)
12004 %{
12005   match(Goto);
12006   effect(USE labl);
12007 
12008   ins_cost(300);
12009   format %{ "jmp     $labl" %}
12010   size(5);
12011   ins_encode %{
12012     Label* L = $labl$$label;
12013     __ jmp(*L, false); // Always long jump
12014   %}
12015   ins_pipe(pipe_jmp);
12016 %}
12017 
12018 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12019 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12020 %{
12021   match(If cop cr);
12022   effect(USE labl);
12023 
12024   ins_cost(300);
12025   format %{ "j$cop     $labl" %}
12026   size(6);
12027   ins_encode %{
12028     Label* L = $labl$$label;
12029     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12030   %}
12031   ins_pipe(pipe_jcc);
12032 %}
12033 
12034 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12035 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12036 %{
12037   match(CountedLoopEnd cop cr);
12038   effect(USE labl);
12039 
12040   ins_cost(300);
12041   format %{ "j$cop     $labl\t# loop end" %}
12042   size(6);
12043   ins_encode %{
12044     Label* L = $labl$$label;
12045     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12046   %}
12047   ins_pipe(pipe_jcc);
12048 %}
12049 
12050 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12051 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12052   match(CountedLoopEnd cop cmp);
12053   effect(USE labl);
12054 
12055   ins_cost(300);
12056   format %{ "j$cop,u   $labl\t# loop end" %}
12057   size(6);
12058   ins_encode %{
12059     Label* L = $labl$$label;
12060     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12061   %}
12062   ins_pipe(pipe_jcc);
12063 %}
12064 
12065 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12066   match(CountedLoopEnd cop cmp);
12067   effect(USE labl);
12068 
12069   ins_cost(200);
12070   format %{ "j$cop,u   $labl\t# loop end" %}
12071   size(6);
12072   ins_encode %{
12073     Label* L = $labl$$label;
12074     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12075   %}
12076   ins_pipe(pipe_jcc);
12077 %}
12078 
12079 // Jump Direct Conditional - using unsigned comparison
12080 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12081   match(If cop cmp);
12082   effect(USE labl);
12083 
12084   ins_cost(300);
12085   format %{ "j$cop,u  $labl" %}
12086   size(6);
12087   ins_encode %{
12088     Label* L = $labl$$label;
12089     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12090   %}
12091   ins_pipe(pipe_jcc);
12092 %}
12093 
12094 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12095   match(If cop cmp);
12096   effect(USE labl);
12097 
12098   ins_cost(200);
12099   format %{ "j$cop,u  $labl" %}
12100   size(6);
12101   ins_encode %{
12102     Label* L = $labl$$label;
12103     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12104   %}
12105   ins_pipe(pipe_jcc);
12106 %}
12107 
12108 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12109   match(If cop cmp);
12110   effect(USE labl);
12111 
12112   ins_cost(200);
12113   format %{ $$template
12114     if ($cop$$cmpcode == Assembler::notEqual) {
12115       $$emit$$"jp,u   $labl\n\t"
12116       $$emit$$"j$cop,u   $labl"
12117     } else {
12118       $$emit$$"jp,u   done\n\t"
12119       $$emit$$"j$cop,u   $labl\n\t"
12120       $$emit$$"done:"
12121     }
12122   %}
12123   ins_encode %{
12124     Label* l = $labl$$label;
12125     if ($cop$$cmpcode == Assembler::notEqual) {
12126       __ jcc(Assembler::parity, *l, false);
12127       __ jcc(Assembler::notEqual, *l, false);
12128     } else if ($cop$$cmpcode == Assembler::equal) {
12129       Label done;
12130       __ jccb(Assembler::parity, done);
12131       __ jcc(Assembler::equal, *l, false);
12132       __ bind(done);
12133     } else {
12134        ShouldNotReachHere();
12135     }
12136   %}
12137   ins_pipe(pipe_jcc);
12138 %}
12139 
12140 // ============================================================================
12141 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12142 // superklass array for an instance of the superklass.  Set a hidden
12143 // internal cache on a hit (cache is checked with exposed code in
12144 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12145 // encoding ALSO sets flags.
12146 
12147 instruct partialSubtypeCheck(rdi_RegP result,
12148                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12149                              rFlagsReg cr)
12150 %{
12151   match(Set result (PartialSubtypeCheck sub super));
12152   effect(KILL rcx, KILL cr);
12153 
12154   ins_cost(1100);  // slightly larger than the next version
12155   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12156             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12157             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12158             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12159             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12160             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12161             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12162     "miss:\t" %}
12163 
12164   opcode(0x1); // Force a XOR of RDI
12165   ins_encode(enc_PartialSubtypeCheck());
12166   ins_pipe(pipe_slow);
12167 %}
12168 
12169 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12170                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12171                                      immP0 zero,
12172                                      rdi_RegP result)
12173 %{
12174   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12175   effect(KILL rcx, KILL result);
12176 
12177   ins_cost(1000);
12178   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12179             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12180             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12181             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12182             "jne,s   miss\t\t# Missed: flags nz\n\t"
12183             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12184     "miss:\t" %}
12185 
12186   opcode(0x0); // No need to XOR RDI
12187   ins_encode(enc_PartialSubtypeCheck());
12188   ins_pipe(pipe_slow);
12189 %}
12190 
12191 // ============================================================================
12192 // Branch Instructions -- short offset versions
12193 //
12194 // These instructions are used to replace jumps of a long offset (the default
12195 // match) with jumps of a shorter offset.  These instructions are all tagged
12196 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12197 // match rules in general matching.  Instead, the ADLC generates a conversion
12198 // method in the MachNode which can be used to do in-place replacement of the
12199 // long variant with the shorter variant.  The compiler will determine if a
12200 // branch can be taken by the is_short_branch_offset() predicate in the machine
12201 // specific code section of the file.
12202 
12203 // Jump Direct - Label defines a relative address from JMP+1
12204 instruct jmpDir_short(label labl) %{
12205   match(Goto);
12206   effect(USE labl);
12207 
12208   ins_cost(300);
12209   format %{ "jmp,s   $labl" %}
12210   size(2);
12211   ins_encode %{
12212     Label* L = $labl$$label;
12213     __ jmpb(*L);
12214   %}
12215   ins_pipe(pipe_jmp);
12216   ins_short_branch(1);
12217 %}
12218 
12219 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12220 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12221   match(If cop cr);
12222   effect(USE labl);
12223 
12224   ins_cost(300);
12225   format %{ "j$cop,s   $labl" %}
12226   size(2);
12227   ins_encode %{
12228     Label* L = $labl$$label;
12229     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12230   %}
12231   ins_pipe(pipe_jcc);
12232   ins_short_branch(1);
12233 %}
12234 
12235 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12236 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12237   match(CountedLoopEnd cop cr);
12238   effect(USE labl);
12239 
12240   ins_cost(300);
12241   format %{ "j$cop,s   $labl\t# loop end" %}
12242   size(2);
12243   ins_encode %{
12244     Label* L = $labl$$label;
12245     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12246   %}
12247   ins_pipe(pipe_jcc);
12248   ins_short_branch(1);
12249 %}
12250 
12251 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12252 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12253   match(CountedLoopEnd cop cmp);
12254   effect(USE labl);
12255 
12256   ins_cost(300);
12257   format %{ "j$cop,us  $labl\t# loop end" %}
12258   size(2);
12259   ins_encode %{
12260     Label* L = $labl$$label;
12261     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12262   %}
12263   ins_pipe(pipe_jcc);
12264   ins_short_branch(1);
12265 %}
12266 
12267 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12268   match(CountedLoopEnd cop cmp);
12269   effect(USE labl);
12270 
12271   ins_cost(300);
12272   format %{ "j$cop,us  $labl\t# loop end" %}
12273   size(2);
12274   ins_encode %{
12275     Label* L = $labl$$label;
12276     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12277   %}
12278   ins_pipe(pipe_jcc);
12279   ins_short_branch(1);
12280 %}
12281 
12282 // Jump Direct Conditional - using unsigned comparison
12283 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12284   match(If cop cmp);
12285   effect(USE labl);
12286 
12287   ins_cost(300);
12288   format %{ "j$cop,us  $labl" %}
12289   size(2);
12290   ins_encode %{
12291     Label* L = $labl$$label;
12292     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12293   %}
12294   ins_pipe(pipe_jcc);
12295   ins_short_branch(1);
12296 %}
12297 
12298 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12299   match(If cop cmp);
12300   effect(USE labl);
12301 
12302   ins_cost(300);
12303   format %{ "j$cop,us  $labl" %}
12304   size(2);
12305   ins_encode %{
12306     Label* L = $labl$$label;
12307     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12308   %}
12309   ins_pipe(pipe_jcc);
12310   ins_short_branch(1);
12311 %}
12312 
12313 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12314   match(If cop cmp);
12315   effect(USE labl);
12316 
12317   ins_cost(300);
12318   format %{ $$template
12319     if ($cop$$cmpcode == Assembler::notEqual) {
12320       $$emit$$"jp,u,s   $labl\n\t"
12321       $$emit$$"j$cop,u,s   $labl"
12322     } else {
12323       $$emit$$"jp,u,s   done\n\t"
12324       $$emit$$"j$cop,u,s  $labl\n\t"
12325       $$emit$$"done:"
12326     }
12327   %}
12328   size(4);
12329   ins_encode %{
12330     Label* l = $labl$$label;
12331     if ($cop$$cmpcode == Assembler::notEqual) {
12332       __ jccb(Assembler::parity, *l);
12333       __ jccb(Assembler::notEqual, *l);
12334     } else if ($cop$$cmpcode == Assembler::equal) {
12335       Label done;
12336       __ jccb(Assembler::parity, done);
12337       __ jccb(Assembler::equal, *l);
12338       __ bind(done);
12339     } else {
12340        ShouldNotReachHere();
12341     }
12342   %}
12343   ins_pipe(pipe_jcc);
12344   ins_short_branch(1);
12345 %}
12346 
12347 // ============================================================================
12348 // inlined locking and unlocking
12349 
12350 instruct cmpFastLock(rFlagsReg cr,
12351                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12352 %{
12353   match(Set cr (FastLock object box));
12354   effect(TEMP tmp, TEMP scr);
12355 
12356   ins_cost(300);
12357   format %{ "fastlock $object,$box,$tmp,$scr" %}
12358   ins_encode(Fast_Lock(object, box, tmp, scr));
12359   ins_pipe(pipe_slow);
12360 %}
12361 
12362 instruct cmpFastUnlock(rFlagsReg cr,
12363                        rRegP object, rax_RegP box, rRegP tmp)
12364 %{
12365   match(Set cr (FastUnlock object box));
12366   effect(TEMP tmp);
12367 
12368   ins_cost(300);
12369   format %{ "fastunlock $object, $box, $tmp" %}
12370   ins_encode(Fast_Unlock(object, box, tmp));
12371   ins_pipe(pipe_slow);
12372 %}
12373 
12374 
12375 // ============================================================================
12376 // Safepoint Instructions
12377 instruct safePoint_poll(rFlagsReg cr)
12378 %{
12379   predicate(!Assembler::is_polling_page_far());
12380   match(SafePoint);
12381   effect(KILL cr);
12382 
12383   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12384             "# Safepoint: poll for GC" %}
12385   ins_cost(125);
12386   ins_encode %{
12387     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12388     __ testl(rax, addr);
12389   %}
12390   ins_pipe(ialu_reg_mem);
12391 %}
12392 
12393 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12394 %{
12395   predicate(Assembler::is_polling_page_far());
12396   match(SafePoint poll);
12397   effect(KILL cr, USE poll);
12398 
12399   format %{ "testl  rax, [$poll]\t"
12400             "# Safepoint: poll for GC" %}
12401   ins_cost(125);
12402   ins_encode %{
12403     __ relocate(relocInfo::poll_type);
12404     __ testl(rax, Address($poll$$Register, 0));
12405   %}
12406   ins_pipe(ialu_reg_mem);
12407 %}
12408 
12409 // ============================================================================
12410 // Procedure Call/Return Instructions
12411 // Call Java Static Instruction
12412 // Note: If this code changes, the corresponding ret_addr_offset() and
12413 //       compute_padding() functions will have to be adjusted.
12414 instruct CallStaticJavaDirect(method meth) %{
12415   match(CallStaticJava);
12416   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12417   effect(USE meth);
12418 
12419   ins_cost(300);
12420   format %{ "call,static " %}
12421   opcode(0xE8); /* E8 cd */
12422   ins_encode(Java_Static_Call(meth), call_epilog);
12423   ins_pipe(pipe_slow);
12424   ins_alignment(4);
12425 %}
12426 
12427 // Call Java Static Instruction (method handle version)
12428 // Note: If this code changes, the corresponding ret_addr_offset() and
12429 //       compute_padding() functions will have to be adjusted.
12430 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12431   match(CallStaticJava);
12432   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12433   effect(USE meth);
12434   // RBP is saved by all callees (for interpreter stack correction).
12435   // We use it here for a similar purpose, in {preserve,restore}_SP.
12436 
12437   ins_cost(300);
12438   format %{ "call,static/MethodHandle " %}
12439   opcode(0xE8); /* E8 cd */
12440   ins_encode(preserve_SP,
12441              Java_Static_Call(meth),
12442              restore_SP,
12443              call_epilog);
12444   ins_pipe(pipe_slow);
12445   ins_alignment(4);
12446 %}
12447 
12448 // Call Java Dynamic Instruction
12449 // Note: If this code changes, the corresponding ret_addr_offset() and
12450 //       compute_padding() functions will have to be adjusted.
12451 instruct CallDynamicJavaDirect(method meth)
12452 %{
12453   match(CallDynamicJava);
12454   effect(USE meth);
12455 
12456   ins_cost(300);
12457   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12458             "call,dynamic " %}
12459   opcode(0xE8); /* E8 cd */
12460   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12461   ins_pipe(pipe_slow);
12462   ins_alignment(4);
12463 %}
12464 
12465 // Call Runtime Instruction
12466 instruct CallRuntimeDirect(method meth)
12467 %{
12468   match(CallRuntime);
12469   effect(USE meth);
12470 
12471   ins_cost(300);
12472   format %{ "call,runtime " %}
12473   opcode(0xE8); /* E8 cd */
12474   ins_encode(Java_To_Runtime(meth));
12475   ins_pipe(pipe_slow);
12476 %}
12477 
12478 // Call runtime without safepoint
12479 instruct CallLeafDirect(method meth)
12480 %{
12481   match(CallLeaf);
12482   effect(USE meth);
12483 
12484   ins_cost(300);
12485   format %{ "call_leaf,runtime " %}
12486   opcode(0xE8); /* E8 cd */
12487   ins_encode(Java_To_Runtime(meth));
12488   ins_pipe(pipe_slow);
12489 %}
12490 
12491 // Call runtime without safepoint
12492 instruct CallLeafNoFPDirect(method meth)
12493 %{
12494   match(CallLeafNoFP);
12495   effect(USE meth);
12496 
12497   ins_cost(300);
12498   format %{ "call_leaf_nofp,runtime " %}
12499   opcode(0xE8); /* E8 cd */
12500   ins_encode(Java_To_Runtime(meth));
12501   ins_pipe(pipe_slow);
12502 %}
12503 
12504 // Return Instruction
12505 // Remove the return address & jump to it.
12506 // Notice: We always emit a nop after a ret to make sure there is room
12507 // for safepoint patching
12508 instruct Ret()
12509 %{
12510   match(Return);
12511 
12512   format %{ "ret" %}
12513   opcode(0xC3);
12514   ins_encode(OpcP);
12515   ins_pipe(pipe_jmp);
12516 %}
12517 
12518 // Tail Call; Jump from runtime stub to Java code.
12519 // Also known as an 'interprocedural jump'.
12520 // Target of jump will eventually return to caller.
12521 // TailJump below removes the return address.
12522 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12523 %{
12524   match(TailCall jump_target method_oop);
12525 
12526   ins_cost(300);
12527   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12528   opcode(0xFF, 0x4); /* Opcode FF /4 */
12529   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12530   ins_pipe(pipe_jmp);
12531 %}
12532 
12533 // Tail Jump; remove the return address; jump to target.
12534 // TailCall above leaves the return address around.
12535 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12536 %{
12537   match(TailJump jump_target ex_oop);
12538 
12539   ins_cost(300);
12540   format %{ "popq    rdx\t# pop return address\n\t"
12541             "jmp     $jump_target" %}
12542   opcode(0xFF, 0x4); /* Opcode FF /4 */
12543   ins_encode(Opcode(0x5a), // popq rdx
12544              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12545   ins_pipe(pipe_jmp);
12546 %}
12547 
12548 // Create exception oop: created by stack-crawling runtime code.
12549 // Created exception is now available to this handler, and is setup
12550 // just prior to jumping to this handler.  No code emitted.
12551 instruct CreateException(rax_RegP ex_oop)
12552 %{
12553   match(Set ex_oop (CreateEx));
12554 
12555   size(0);
12556   // use the following format syntax
12557   format %{ "# exception oop is in rax; no code emitted" %}
12558   ins_encode();
12559   ins_pipe(empty);
12560 %}
12561 
12562 // Rethrow exception:
12563 // The exception oop will come in the first argument position.
12564 // Then JUMP (not call) to the rethrow stub code.
12565 instruct RethrowException()
12566 %{
12567   match(Rethrow);
12568 
12569   // use the following format syntax
12570   format %{ "jmp     rethrow_stub" %}
12571   ins_encode(enc_rethrow);
12572   ins_pipe(pipe_jmp);
12573 %}
12574 
12575 
12576 //----------PEEPHOLE RULES-----------------------------------------------------
12577 // These must follow all instruction definitions as they use the names
12578 // defined in the instructions definitions.
12579 //
12580 // peepmatch ( root_instr_name [preceding_instruction]* );
12581 //
12582 // peepconstraint %{
12583 // (instruction_number.operand_name relational_op instruction_number.operand_name
12584 //  [, ...] );
12585 // // instruction numbers are zero-based using left to right order in peepmatch
12586 //
12587 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12588 // // provide an instruction_number.operand_name for each operand that appears
12589 // // in the replacement instruction's match rule
12590 //
12591 // ---------VM FLAGS---------------------------------------------------------
12592 //
12593 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12594 //
12595 // Each peephole rule is given an identifying number starting with zero and
12596 // increasing by one in the order seen by the parser.  An individual peephole
12597 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12598 // on the command-line.
12599 //
12600 // ---------CURRENT LIMITATIONS----------------------------------------------
12601 //
12602 // Only match adjacent instructions in same basic block
12603 // Only equality constraints
12604 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12605 // Only one replacement instruction
12606 //
12607 // ---------EXAMPLE----------------------------------------------------------
12608 //
12609 // // pertinent parts of existing instructions in architecture description
12610 // instruct movI(rRegI dst, rRegI src)
12611 // %{
12612 //   match(Set dst (CopyI src));
12613 // %}
12614 //
12615 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12616 // %{
12617 //   match(Set dst (AddI dst src));
12618 //   effect(KILL cr);
12619 // %}
12620 //
12621 // // Change (inc mov) to lea
12622 // peephole %{
12623 //   // increment preceeded by register-register move
12624 //   peepmatch ( incI_rReg movI );
12625 //   // require that the destination register of the increment
12626 //   // match the destination register of the move
12627 //   peepconstraint ( 0.dst == 1.dst );
12628 //   // construct a replacement instruction that sets
12629 //   // the destination to ( move's source register + one )
12630 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12631 // %}
12632 //
12633 
12634 // Implementation no longer uses movX instructions since
12635 // machine-independent system no longer uses CopyX nodes.
12636 //
12637 // peephole
12638 // %{
12639 //   peepmatch (incI_rReg movI);
12640 //   peepconstraint (0.dst == 1.dst);
12641 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12642 // %}
12643 
12644 // peephole
12645 // %{
12646 //   peepmatch (decI_rReg movI);
12647 //   peepconstraint (0.dst == 1.dst);
12648 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12649 // %}
12650 
12651 // peephole
12652 // %{
12653 //   peepmatch (addI_rReg_imm movI);
12654 //   peepconstraint (0.dst == 1.dst);
12655 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12656 // %}
12657 
12658 // peephole
12659 // %{
12660 //   peepmatch (incL_rReg movL);
12661 //   peepconstraint (0.dst == 1.dst);
12662 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12663 // %}
12664 
12665 // peephole
12666 // %{
12667 //   peepmatch (decL_rReg movL);
12668 //   peepconstraint (0.dst == 1.dst);
12669 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12670 // %}
12671 
12672 // peephole
12673 // %{
12674 //   peepmatch (addL_rReg_imm movL);
12675 //   peepconstraint (0.dst == 1.dst);
12676 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12677 // %}
12678 
12679 // peephole
12680 // %{
12681 //   peepmatch (addP_rReg_imm movP);
12682 //   peepconstraint (0.dst == 1.dst);
12683 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12684 // %}
12685 
12686 // // Change load of spilled value to only a spill
12687 // instruct storeI(memory mem, rRegI src)
12688 // %{
12689 //   match(Set mem (StoreI mem src));
12690 // %}
12691 //
12692 // instruct loadI(rRegI dst, memory mem)
12693 // %{
12694 //   match(Set dst (LoadI mem));
12695 // %}
12696 //
12697 
12698 peephole
12699 %{
12700   peepmatch (loadI storeI);
12701   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12702   peepreplace (storeI(1.mem 1.mem 1.src));
12703 %}
12704 
12705 peephole
12706 %{
12707   peepmatch (loadL storeL);
12708   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12709   peepreplace (storeL(1.mem 1.mem 1.src));
12710 %}
12711 
12712 //----------SMARTSPILL RULES---------------------------------------------------
12713 // These must follow all instruction definitions as they use the names
12714 // defined in the instructions definitions.