1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 
 834 //=============================================================================
 835 const bool Matcher::constant_table_absolute_addressing = true;
 836 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 837 
 838 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 839   // Empty encoding
 840 }
 841 
 842 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 843   return 0;
 844 }
 845 
 846 #ifndef PRODUCT
 847 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 848   st->print("# MachConstantBaseNode (empty encoding)");
 849 }
 850 #endif
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.insts_size());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq   rbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print("\t");
1001     if (Assembler::is_polling_page_far()) {
1002       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1003                    "testl  rax, [rscratch1]\t"
1004                    "# Safepoint: poll for GC");
1005     } else {
1006       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1007                    "# Safepoint: poll for GC");
1008     }
1009   }
1010 }
1011 #endif
1012 
1013 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1014 {
1015   Compile* C = ra_->C;
1016   int framesize = C->frame_slots() << LogBytesPerInt;
1017   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1018   // Remove word for return adr already pushed
1019   // and RBP
1020   framesize -= 2*wordSize;
1021 
1022   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1023 
1024   if (framesize) {
1025     emit_opcode(cbuf, Assembler::REX_W);
1026     if (framesize < 0x80) {
1027       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1028       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1029       emit_d8(cbuf, framesize);
1030     } else {
1031       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1032       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1033       emit_d32(cbuf, framesize);
1034     }
1035   }
1036 
1037   // popq rbp
1038   emit_opcode(cbuf, 0x58 | RBP_enc);
1039 
1040   if (do_polling() && C->is_method_compilation()) {
1041     MacroAssembler _masm(&cbuf);
1042     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1043     if (Assembler::is_polling_page_far()) {
1044       __ lea(rscratch1, polling_page);
1045       __ relocate(relocInfo::poll_return_type);
1046       __ testl(rax, Address(rscratch1, 0));
1047     } else {
1048       __ testl(rax, polling_page);
1049     }
1050   }
1051 }
1052 
1053 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1054 {
1055   return MachNode::size(ra_); // too many variables; just compute it
1056                               // the hard way
1057 }
1058 
1059 int MachEpilogNode::reloc() const
1060 {
1061   return 2; // a large enough number
1062 }
1063 
1064 const Pipeline* MachEpilogNode::pipeline() const
1065 {
1066   return MachNode::pipeline_class();
1067 }
1068 
1069 int MachEpilogNode::safepoint_offset() const
1070 {
1071   return 0;
1072 }
1073 
1074 //=============================================================================
1075 
1076 enum RC {
1077   rc_bad,
1078   rc_int,
1079   rc_float,
1080   rc_stack
1081 };
1082 
1083 static enum RC rc_class(OptoReg::Name reg)
1084 {
1085   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1086 
1087   if (OptoReg::is_stack(reg)) return rc_stack;
1088 
1089   VMReg r = OptoReg::as_VMReg(reg);
1090 
1091   if (r->is_Register()) return rc_int;
1092 
1093   assert(r->is_XMMRegister(), "must be");
1094   return rc_float;
1095 }
1096 
1097 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1098                                        PhaseRegAlloc* ra_,
1099                                        bool do_size,
1100                                        outputStream* st) const
1101 {
1102 
1103   // Get registers to move
1104   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1105   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1106   OptoReg::Name dst_second = ra_->get_reg_second(this);
1107   OptoReg::Name dst_first = ra_->get_reg_first(this);
1108 
1109   enum RC src_second_rc = rc_class(src_second);
1110   enum RC src_first_rc = rc_class(src_first);
1111   enum RC dst_second_rc = rc_class(dst_second);
1112   enum RC dst_first_rc = rc_class(dst_first);
1113 
1114   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1115          "must move at least 1 register" );
1116 
1117   if (src_first == dst_first && src_second == dst_second) {
1118     // Self copy, no move
1119     return 0;
1120   } else if (src_first_rc == rc_stack) {
1121     // mem ->
1122     if (dst_first_rc == rc_stack) {
1123       // mem -> mem
1124       assert(src_second != dst_first, "overlap");
1125       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1126           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1127         // 64-bit
1128         int src_offset = ra_->reg2offset(src_first);
1129         int dst_offset = ra_->reg2offset(dst_first);
1130         if (cbuf) {
1131           emit_opcode(*cbuf, 0xFF);
1132           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1133 
1134           emit_opcode(*cbuf, 0x8F);
1135           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1136 
1137 #ifndef PRODUCT
1138         } else if (!do_size) {
1139           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1140                      "popq    [rsp + #%d]",
1141                      src_offset,
1142                      dst_offset);
1143 #endif
1144         }
1145         return
1146           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1147           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1148       } else {
1149         // 32-bit
1150         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1151         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1152         // No pushl/popl, so:
1153         int src_offset = ra_->reg2offset(src_first);
1154         int dst_offset = ra_->reg2offset(dst_first);
1155         if (cbuf) {
1156           emit_opcode(*cbuf, Assembler::REX_W);
1157           emit_opcode(*cbuf, 0x89);
1158           emit_opcode(*cbuf, 0x44);
1159           emit_opcode(*cbuf, 0x24);
1160           emit_opcode(*cbuf, 0xF8);
1161 
1162           emit_opcode(*cbuf, 0x8B);
1163           encode_RegMem(*cbuf,
1164                         RAX_enc,
1165                         RSP_enc, 0x4, 0, src_offset,
1166                         false);
1167 
1168           emit_opcode(*cbuf, 0x89);
1169           encode_RegMem(*cbuf,
1170                         RAX_enc,
1171                         RSP_enc, 0x4, 0, dst_offset,
1172                         false);
1173 
1174           emit_opcode(*cbuf, Assembler::REX_W);
1175           emit_opcode(*cbuf, 0x8B);
1176           emit_opcode(*cbuf, 0x44);
1177           emit_opcode(*cbuf, 0x24);
1178           emit_opcode(*cbuf, 0xF8);
1179 
1180 #ifndef PRODUCT
1181         } else if (!do_size) {
1182           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1183                      "movl    rax, [rsp + #%d]\n\t"
1184                      "movl    [rsp + #%d], rax\n\t"
1185                      "movq    rax, [rsp - #8]",
1186                      src_offset,
1187                      dst_offset);
1188 #endif
1189         }
1190         return
1191           5 + // movq
1192           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1193           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1194           5; // movq
1195       }
1196     } else if (dst_first_rc == rc_int) {
1197       // mem -> gpr
1198       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1199           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1200         // 64-bit
1201         int offset = ra_->reg2offset(src_first);
1202         if (cbuf) {
1203           if (Matcher::_regEncode[dst_first] < 8) {
1204             emit_opcode(*cbuf, Assembler::REX_W);
1205           } else {
1206             emit_opcode(*cbuf, Assembler::REX_WR);
1207           }
1208           emit_opcode(*cbuf, 0x8B);
1209           encode_RegMem(*cbuf,
1210                         Matcher::_regEncode[dst_first],
1211                         RSP_enc, 0x4, 0, offset,
1212                         false);
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    %s, [rsp + #%d]\t# spill",
1216                      Matcher::regName[dst_first],
1217                      offset);
1218 #endif
1219         }
1220         return
1221           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1222       } else {
1223         // 32-bit
1224         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1225         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1226         int offset = ra_->reg2offset(src_first);
1227         if (cbuf) {
1228           if (Matcher::_regEncode[dst_first] >= 8) {
1229             emit_opcode(*cbuf, Assembler::REX_R);
1230           }
1231           emit_opcode(*cbuf, 0x8B);
1232           encode_RegMem(*cbuf,
1233                         Matcher::_regEncode[dst_first],
1234                         RSP_enc, 0x4, 0, offset,
1235                         false);
1236 #ifndef PRODUCT
1237         } else if (!do_size) {
1238           st->print("movl    %s, [rsp + #%d]\t# spill",
1239                      Matcher::regName[dst_first],
1240                      offset);
1241 #endif
1242         }
1243         return
1244           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1245           ((Matcher::_regEncode[dst_first] < 8)
1246            ? 3
1247            : 4); // REX
1248       }
1249     } else if (dst_first_rc == rc_float) {
1250       // mem-> xmm
1251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1253         // 64-bit
1254         int offset = ra_->reg2offset(src_first);
1255         if (cbuf) {
1256           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1257           if (Matcher::_regEncode[dst_first] >= 8) {
1258             emit_opcode(*cbuf, Assembler::REX_R);
1259           }
1260           emit_opcode(*cbuf, 0x0F);
1261           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1262           encode_RegMem(*cbuf,
1263                         Matcher::_regEncode[dst_first],
1264                         RSP_enc, 0x4, 0, offset,
1265                         false);
1266 #ifndef PRODUCT
1267         } else if (!do_size) {
1268           st->print("%s  %s, [rsp + #%d]\t# spill",
1269                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1270                      Matcher::regName[dst_first],
1271                      offset);
1272 #endif
1273         }
1274         return
1275           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1276           ((Matcher::_regEncode[dst_first] < 8)
1277            ? 5
1278            : 6); // REX
1279       } else {
1280         // 32-bit
1281         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1282         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1283         int offset = ra_->reg2offset(src_first);
1284         if (cbuf) {
1285           emit_opcode(*cbuf, 0xF3);
1286           if (Matcher::_regEncode[dst_first] >= 8) {
1287             emit_opcode(*cbuf, Assembler::REX_R);
1288           }
1289           emit_opcode(*cbuf, 0x0F);
1290           emit_opcode(*cbuf, 0x10);
1291           encode_RegMem(*cbuf,
1292                         Matcher::_regEncode[dst_first],
1293                         RSP_enc, 0x4, 0, offset,
1294                         false);
1295 #ifndef PRODUCT
1296         } else if (!do_size) {
1297           st->print("movss   %s, [rsp + #%d]\t# spill",
1298                      Matcher::regName[dst_first],
1299                      offset);
1300 #endif
1301         }
1302         return
1303           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1304           ((Matcher::_regEncode[dst_first] < 8)
1305            ? 5
1306            : 6); // REX
1307       }
1308     }
1309   } else if (src_first_rc == rc_int) {
1310     // gpr ->
1311     if (dst_first_rc == rc_stack) {
1312       // gpr -> mem
1313       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1314           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1315         // 64-bit
1316         int offset = ra_->reg2offset(dst_first);
1317         if (cbuf) {
1318           if (Matcher::_regEncode[src_first] < 8) {
1319             emit_opcode(*cbuf, Assembler::REX_W);
1320           } else {
1321             emit_opcode(*cbuf, Assembler::REX_WR);
1322           }
1323           emit_opcode(*cbuf, 0x89);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[src_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movq    [rsp + #%d], %s\t# spill",
1331                      offset,
1332                      Matcher::regName[src_first]);
1333 #endif
1334         }
1335         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1336       } else {
1337         // 32-bit
1338         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1339         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1340         int offset = ra_->reg2offset(dst_first);
1341         if (cbuf) {
1342           if (Matcher::_regEncode[src_first] >= 8) {
1343             emit_opcode(*cbuf, Assembler::REX_R);
1344           }
1345           emit_opcode(*cbuf, 0x89);
1346           encode_RegMem(*cbuf,
1347                         Matcher::_regEncode[src_first],
1348                         RSP_enc, 0x4, 0, offset,
1349                         false);
1350 #ifndef PRODUCT
1351         } else if (!do_size) {
1352           st->print("movl    [rsp + #%d], %s\t# spill",
1353                      offset,
1354                      Matcher::regName[src_first]);
1355 #endif
1356         }
1357         return
1358           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1359           ((Matcher::_regEncode[src_first] < 8)
1360            ? 3
1361            : 4); // REX
1362       }
1363     } else if (dst_first_rc == rc_int) {
1364       // gpr -> gpr
1365       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1366           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1367         // 64-bit
1368         if (cbuf) {
1369           if (Matcher::_regEncode[dst_first] < 8) {
1370             if (Matcher::_regEncode[src_first] < 8) {
1371               emit_opcode(*cbuf, Assembler::REX_W);
1372             } else {
1373               emit_opcode(*cbuf, Assembler::REX_WB);
1374             }
1375           } else {
1376             if (Matcher::_regEncode[src_first] < 8) {
1377               emit_opcode(*cbuf, Assembler::REX_WR);
1378             } else {
1379               emit_opcode(*cbuf, Assembler::REX_WRB);
1380             }
1381           }
1382           emit_opcode(*cbuf, 0x8B);
1383           emit_rm(*cbuf, 0x3,
1384                   Matcher::_regEncode[dst_first] & 7,
1385                   Matcher::_regEncode[src_first] & 7);
1386 #ifndef PRODUCT
1387         } else if (!do_size) {
1388           st->print("movq    %s, %s\t# spill",
1389                      Matcher::regName[dst_first],
1390                      Matcher::regName[src_first]);
1391 #endif
1392         }
1393         return 3; // REX
1394       } else {
1395         // 32-bit
1396         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1397         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1398         if (cbuf) {
1399           if (Matcher::_regEncode[dst_first] < 8) {
1400             if (Matcher::_regEncode[src_first] >= 8) {
1401               emit_opcode(*cbuf, Assembler::REX_B);
1402             }
1403           } else {
1404             if (Matcher::_regEncode[src_first] < 8) {
1405               emit_opcode(*cbuf, Assembler::REX_R);
1406             } else {
1407               emit_opcode(*cbuf, Assembler::REX_RB);
1408             }
1409           }
1410           emit_opcode(*cbuf, 0x8B);
1411           emit_rm(*cbuf, 0x3,
1412                   Matcher::_regEncode[dst_first] & 7,
1413                   Matcher::_regEncode[src_first] & 7);
1414 #ifndef PRODUCT
1415         } else if (!do_size) {
1416           st->print("movl    %s, %s\t# spill",
1417                      Matcher::regName[dst_first],
1418                      Matcher::regName[src_first]);
1419 #endif
1420         }
1421         return
1422           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1423           ? 2
1424           : 3; // REX
1425       }
1426     } else if (dst_first_rc == rc_float) {
1427       // gpr -> xmm
1428       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1429           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1430         // 64-bit
1431         if (cbuf) {
1432           emit_opcode(*cbuf, 0x66);
1433           if (Matcher::_regEncode[dst_first] < 8) {
1434             if (Matcher::_regEncode[src_first] < 8) {
1435               emit_opcode(*cbuf, Assembler::REX_W);
1436             } else {
1437               emit_opcode(*cbuf, Assembler::REX_WB);
1438             }
1439           } else {
1440             if (Matcher::_regEncode[src_first] < 8) {
1441               emit_opcode(*cbuf, Assembler::REX_WR);
1442             } else {
1443               emit_opcode(*cbuf, Assembler::REX_WRB);
1444             }
1445           }
1446           emit_opcode(*cbuf, 0x0F);
1447           emit_opcode(*cbuf, 0x6E);
1448           emit_rm(*cbuf, 0x3,
1449                   Matcher::_regEncode[dst_first] & 7,
1450                   Matcher::_regEncode[src_first] & 7);
1451 #ifndef PRODUCT
1452         } else if (!do_size) {
1453           st->print("movdq   %s, %s\t# spill",
1454                      Matcher::regName[dst_first],
1455                      Matcher::regName[src_first]);
1456 #endif
1457         }
1458         return 5; // REX
1459       } else {
1460         // 32-bit
1461         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1462         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1463         if (cbuf) {
1464           emit_opcode(*cbuf, 0x66);
1465           if (Matcher::_regEncode[dst_first] < 8) {
1466             if (Matcher::_regEncode[src_first] >= 8) {
1467               emit_opcode(*cbuf, Assembler::REX_B);
1468             }
1469           } else {
1470             if (Matcher::_regEncode[src_first] < 8) {
1471               emit_opcode(*cbuf, Assembler::REX_R);
1472             } else {
1473               emit_opcode(*cbuf, Assembler::REX_RB);
1474             }
1475           }
1476           emit_opcode(*cbuf, 0x0F);
1477           emit_opcode(*cbuf, 0x6E);
1478           emit_rm(*cbuf, 0x3,
1479                   Matcher::_regEncode[dst_first] & 7,
1480                   Matcher::_regEncode[src_first] & 7);
1481 #ifndef PRODUCT
1482         } else if (!do_size) {
1483           st->print("movdl   %s, %s\t# spill",
1484                      Matcher::regName[dst_first],
1485                      Matcher::regName[src_first]);
1486 #endif
1487         }
1488         return
1489           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1490           ? 4
1491           : 5; // REX
1492       }
1493     }
1494   } else if (src_first_rc == rc_float) {
1495     // xmm ->
1496     if (dst_first_rc == rc_stack) {
1497       // xmm -> mem
1498       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1499           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1500         // 64-bit
1501         int offset = ra_->reg2offset(dst_first);
1502         if (cbuf) {
1503           emit_opcode(*cbuf, 0xF2);
1504           if (Matcher::_regEncode[src_first] >= 8) {
1505               emit_opcode(*cbuf, Assembler::REX_R);
1506           }
1507           emit_opcode(*cbuf, 0x0F);
1508           emit_opcode(*cbuf, 0x11);
1509           encode_RegMem(*cbuf,
1510                         Matcher::_regEncode[src_first],
1511                         RSP_enc, 0x4, 0, offset,
1512                         false);
1513 #ifndef PRODUCT
1514         } else if (!do_size) {
1515           st->print("movsd   [rsp + #%d], %s\t# spill",
1516                      offset,
1517                      Matcher::regName[src_first]);
1518 #endif
1519         }
1520         return
1521           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1522           ((Matcher::_regEncode[src_first] < 8)
1523            ? 5
1524            : 6); // REX
1525       } else {
1526         // 32-bit
1527         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1528         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1529         int offset = ra_->reg2offset(dst_first);
1530         if (cbuf) {
1531           emit_opcode(*cbuf, 0xF3);
1532           if (Matcher::_regEncode[src_first] >= 8) {
1533               emit_opcode(*cbuf, Assembler::REX_R);
1534           }
1535           emit_opcode(*cbuf, 0x0F);
1536           emit_opcode(*cbuf, 0x11);
1537           encode_RegMem(*cbuf,
1538                         Matcher::_regEncode[src_first],
1539                         RSP_enc, 0x4, 0, offset,
1540                         false);
1541 #ifndef PRODUCT
1542         } else if (!do_size) {
1543           st->print("movss   [rsp + #%d], %s\t# spill",
1544                      offset,
1545                      Matcher::regName[src_first]);
1546 #endif
1547         }
1548         return
1549           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1550           ((Matcher::_regEncode[src_first] < 8)
1551            ? 5
1552            : 6); // REX
1553       }
1554     } else if (dst_first_rc == rc_int) {
1555       // xmm -> gpr
1556       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1557           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1558         // 64-bit
1559         if (cbuf) {
1560           emit_opcode(*cbuf, 0x66);
1561           if (Matcher::_regEncode[dst_first] < 8) {
1562             if (Matcher::_regEncode[src_first] < 8) {
1563               emit_opcode(*cbuf, Assembler::REX_W);
1564             } else {
1565               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1566             }
1567           } else {
1568             if (Matcher::_regEncode[src_first] < 8) {
1569               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1570             } else {
1571               emit_opcode(*cbuf, Assembler::REX_WRB);
1572             }
1573           }
1574           emit_opcode(*cbuf, 0x0F);
1575           emit_opcode(*cbuf, 0x7E);
1576           emit_rm(*cbuf, 0x3,
1577                   Matcher::_regEncode[src_first] & 7,
1578                   Matcher::_regEncode[dst_first] & 7);
1579 #ifndef PRODUCT
1580         } else if (!do_size) {
1581           st->print("movdq   %s, %s\t# spill",
1582                      Matcher::regName[dst_first],
1583                      Matcher::regName[src_first]);
1584 #endif
1585         }
1586         return 5; // REX
1587       } else {
1588         // 32-bit
1589         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1590         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1591         if (cbuf) {
1592           emit_opcode(*cbuf, 0x66);
1593           if (Matcher::_regEncode[dst_first] < 8) {
1594             if (Matcher::_regEncode[src_first] >= 8) {
1595               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1596             }
1597           } else {
1598             if (Matcher::_regEncode[src_first] < 8) {
1599               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1600             } else {
1601               emit_opcode(*cbuf, Assembler::REX_RB);
1602             }
1603           }
1604           emit_opcode(*cbuf, 0x0F);
1605           emit_opcode(*cbuf, 0x7E);
1606           emit_rm(*cbuf, 0x3,
1607                   Matcher::_regEncode[src_first] & 7,
1608                   Matcher::_regEncode[dst_first] & 7);
1609 #ifndef PRODUCT
1610         } else if (!do_size) {
1611           st->print("movdl   %s, %s\t# spill",
1612                      Matcher::regName[dst_first],
1613                      Matcher::regName[src_first]);
1614 #endif
1615         }
1616         return
1617           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1618           ? 4
1619           : 5; // REX
1620       }
1621     } else if (dst_first_rc == rc_float) {
1622       // xmm -> xmm
1623       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1624           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1625         // 64-bit
1626         if (cbuf) {
1627           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1628           if (Matcher::_regEncode[dst_first] < 8) {
1629             if (Matcher::_regEncode[src_first] >= 8) {
1630               emit_opcode(*cbuf, Assembler::REX_B);
1631             }
1632           } else {
1633             if (Matcher::_regEncode[src_first] < 8) {
1634               emit_opcode(*cbuf, Assembler::REX_R);
1635             } else {
1636               emit_opcode(*cbuf, Assembler::REX_RB);
1637             }
1638           }
1639           emit_opcode(*cbuf, 0x0F);
1640           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1641           emit_rm(*cbuf, 0x3,
1642                   Matcher::_regEncode[dst_first] & 7,
1643                   Matcher::_regEncode[src_first] & 7);
1644 #ifndef PRODUCT
1645         } else if (!do_size) {
1646           st->print("%s  %s, %s\t# spill",
1647                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1648                      Matcher::regName[dst_first],
1649                      Matcher::regName[src_first]);
1650 #endif
1651         }
1652         return
1653           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1654           ? 4
1655           : 5; // REX
1656       } else {
1657         // 32-bit
1658         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1659         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1660         if (cbuf) {
1661           if (!UseXmmRegToRegMoveAll)
1662             emit_opcode(*cbuf, 0xF3);
1663           if (Matcher::_regEncode[dst_first] < 8) {
1664             if (Matcher::_regEncode[src_first] >= 8) {
1665               emit_opcode(*cbuf, Assembler::REX_B);
1666             }
1667           } else {
1668             if (Matcher::_regEncode[src_first] < 8) {
1669               emit_opcode(*cbuf, Assembler::REX_R);
1670             } else {
1671               emit_opcode(*cbuf, Assembler::REX_RB);
1672             }
1673           }
1674           emit_opcode(*cbuf, 0x0F);
1675           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1676           emit_rm(*cbuf, 0x3,
1677                   Matcher::_regEncode[dst_first] & 7,
1678                   Matcher::_regEncode[src_first] & 7);
1679 #ifndef PRODUCT
1680         } else if (!do_size) {
1681           st->print("%s  %s, %s\t# spill",
1682                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1683                      Matcher::regName[dst_first],
1684                      Matcher::regName[src_first]);
1685 #endif
1686         }
1687         return
1688           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1689           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1690           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1691       }
1692     }
1693   }
1694 
1695   assert(0," foo ");
1696   Unimplemented();
1697 
1698   return 0;
1699 }
1700 
1701 #ifndef PRODUCT
1702 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1703 {
1704   implementation(NULL, ra_, false, st);
1705 }
1706 #endif
1707 
1708 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1709 {
1710   implementation(&cbuf, ra_, false, NULL);
1711 }
1712 
1713 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1714 {
1715   return implementation(NULL, ra_, true, NULL);
1716 }
1717 
1718 //=============================================================================
1719 #ifndef PRODUCT
1720 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1721 {
1722   st->print("nop \t# %d bytes pad for loops and calls", _count);
1723 }
1724 #endif
1725 
1726 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1727 {
1728   MacroAssembler _masm(&cbuf);
1729   __ nop(_count);
1730 }
1731 
1732 uint MachNopNode::size(PhaseRegAlloc*) const
1733 {
1734   return _count;
1735 }
1736 
1737 
1738 //=============================================================================
1739 #ifndef PRODUCT
1740 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1741 {
1742   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1743   int reg = ra_->get_reg_first(this);
1744   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1745             Matcher::regName[reg], offset);
1746 }
1747 #endif
1748 
1749 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1750 {
1751   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1752   int reg = ra_->get_encode(this);
1753   if (offset >= 0x80) {
1754     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1755     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1756     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1757     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1758     emit_d32(cbuf, offset);
1759   } else {
1760     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1761     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1762     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1763     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1764     emit_d8(cbuf, offset);
1765   }
1766 }
1767 
1768 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1769 {
1770   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1771   return (offset < 0x80) ? 5 : 8; // REX
1772 }
1773 
1774 //=============================================================================
1775 
1776 // emit call stub, compiled java to interpreter
1777 void emit_java_to_interp(CodeBuffer& cbuf)
1778 {
1779   // Stub is fixed up when the corresponding call is converted from
1780   // calling compiled code to calling interpreted code.
1781   // movq rbx, 0
1782   // jmp -5 # to self
1783 
1784   address mark = cbuf.insts_mark();  // get mark within main instrs section
1785 
1786   // Note that the code buffer's insts_mark is always relative to insts.
1787   // That's why we must use the macroassembler to generate a stub.
1788   MacroAssembler _masm(&cbuf);
1789 
1790   address base =
1791   __ start_a_stub(Compile::MAX_stubs_size);
1792   if (base == NULL)  return;  // CodeBuffer::expand failed
1793   // static stub relocation stores the instruction address of the call
1794   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1795   // static stub relocation also tags the methodOop in the code-stream.
1796   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1797   // This is recognized as unresolved by relocs/nativeinst/ic code
1798   __ jump(RuntimeAddress(__ pc()));
1799 
1800   // Update current stubs pointer and restore insts_end.
1801   __ end_a_stub();
1802 }
1803 
1804 // size of call stub, compiled java to interpretor
1805 uint size_java_to_interp()
1806 {
1807   return 15;  // movq (1+1+8); jmp (1+4)
1808 }
1809 
1810 // relocation entries for call stub, compiled java to interpretor
1811 uint reloc_java_to_interp()
1812 {
1813   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1814 }
1815 
1816 //=============================================================================
1817 #ifndef PRODUCT
1818 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1819 {
1820   if (UseCompressedOops) {
1821     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1822     if (Universe::narrow_oop_shift() != 0) {
1823       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1824     }
1825     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1826   } else {
1827     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1828                  "# Inline cache check");
1829   }
1830   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1831   st->print_cr("\tnop\t# nops to align entry point");
1832 }
1833 #endif
1834 
1835 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1836 {
1837   MacroAssembler masm(&cbuf);
1838   uint insts_size = cbuf.insts_size();
1839   if (UseCompressedOops) {
1840     masm.load_klass(rscratch1, j_rarg0);
1841     masm.cmpptr(rax, rscratch1);
1842   } else {
1843     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1844   }
1845 
1846   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1847 
1848   /* WARNING these NOPs are critical so that verified entry point is properly
1849      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1850   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1851   if (OptoBreakpoint) {
1852     // Leave space for int3
1853     nops_cnt -= 1;
1854   }
1855   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1856   if (nops_cnt > 0)
1857     masm.nop(nops_cnt);
1858 }
1859 
1860 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1861 {
1862   return MachNode::size(ra_); // too many variables; just compute it
1863                               // the hard way
1864 }
1865 
1866 
1867 //=============================================================================
1868 uint size_exception_handler()
1869 {
1870   // NativeCall instruction size is the same as NativeJump.
1871   // Note that this value is also credited (in output.cpp) to
1872   // the size of the code section.
1873   return NativeJump::instruction_size;
1874 }
1875 
1876 // Emit exception handler code.
1877 int emit_exception_handler(CodeBuffer& cbuf)
1878 {
1879 
1880   // Note that the code buffer's insts_mark is always relative to insts.
1881   // That's why we must use the macroassembler to generate a handler.
1882   MacroAssembler _masm(&cbuf);
1883   address base =
1884   __ start_a_stub(size_exception_handler());
1885   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1886   int offset = __ offset();
1887   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1888   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1889   __ end_a_stub();
1890   return offset;
1891 }
1892 
1893 uint size_deopt_handler()
1894 {
1895   // three 5 byte instructions
1896   return 15;
1897 }
1898 
1899 // Emit deopt handler code.
1900 int emit_deopt_handler(CodeBuffer& cbuf)
1901 {
1902 
1903   // Note that the code buffer's insts_mark is always relative to insts.
1904   // That's why we must use the macroassembler to generate a handler.
1905   MacroAssembler _masm(&cbuf);
1906   address base =
1907   __ start_a_stub(size_deopt_handler());
1908   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1909   int offset = __ offset();
1910   address the_pc = (address) __ pc();
1911   Label next;
1912   // push a "the_pc" on the stack without destroying any registers
1913   // as they all may be live.
1914 
1915   // push address of "next"
1916   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1917   __ bind(next);
1918   // adjust it so it matches "the_pc"
1919   __ subptr(Address(rsp, 0), __ offset() - offset);
1920   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1921   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1922   __ end_a_stub();
1923   return offset;
1924 }
1925 
1926 
1927 const bool Matcher::match_rule_supported(int opcode) {
1928   if (!has_match_rule(opcode))
1929     return false;
1930 
1931   return true;  // Per default match rules are supported.
1932 }
1933 
1934 int Matcher::regnum_to_fpu_offset(int regnum)
1935 {
1936   return regnum - 32; // The FP registers are in the second chunk
1937 }
1938 
1939 // This is UltraSparc specific, true just means we have fast l2f conversion
1940 const bool Matcher::convL2FSupported(void) {
1941   return true;
1942 }
1943 
1944 // Vector width in bytes
1945 const uint Matcher::vector_width_in_bytes(void) {
1946   return 8;
1947 }
1948 
1949 // Vector ideal reg
1950 const uint Matcher::vector_ideal_reg(void) {
1951   return Op_RegD;
1952 }
1953 
1954 // Is this branch offset short enough that a short branch can be used?
1955 //
1956 // NOTE: If the platform does not provide any short branch variants, then
1957 //       this method should return false for offset 0.
1958 bool Matcher::is_short_branch_offset(int rule, int offset) {
1959   // the short version of jmpConUCF2 contains multiple branches,
1960   // making the reach slightly less
1961   if (rule == jmpConUCF2_rule)
1962     return (-126 <= offset && offset <= 125);
1963   return (-128 <= offset && offset <= 127);
1964 }
1965 
1966 const bool Matcher::isSimpleConstant64(jlong value) {
1967   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1968   //return value == (int) value;  // Cf. storeImmL and immL32.
1969 
1970   // Probably always true, even if a temp register is required.
1971   return true;
1972 }
1973 
1974 // The ecx parameter to rep stosq for the ClearArray node is in words.
1975 const bool Matcher::init_array_count_is_in_bytes = false;
1976 
1977 // Threshold size for cleararray.
1978 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1979 
1980 // Should the Matcher clone shifts on addressing modes, expecting them
1981 // to be subsumed into complex addressing expressions or compute them
1982 // into registers?  True for Intel but false for most RISCs
1983 const bool Matcher::clone_shift_expressions = true;
1984 
1985 // Do we need to mask the count passed to shift instructions or does
1986 // the cpu only look at the lower 5/6 bits anyway?
1987 const bool Matcher::need_masked_shift_count = false;
1988 
1989 bool Matcher::narrow_oop_use_complex_address() {
1990   assert(UseCompressedOops, "only for compressed oops code");
1991   return (LogMinObjAlignmentInBytes <= 3);
1992 }
1993 
1994 // Is it better to copy float constants, or load them directly from
1995 // memory?  Intel can load a float constant from a direct address,
1996 // requiring no extra registers.  Most RISCs will have to materialize
1997 // an address into a register first, so they would do better to copy
1998 // the constant from stack.
1999 const bool Matcher::rematerialize_float_constants = true; // XXX
2000 
2001 // If CPU can load and store mis-aligned doubles directly then no
2002 // fixup is needed.  Else we split the double into 2 integer pieces
2003 // and move it piece-by-piece.  Only happens when passing doubles into
2004 // C code as the Java calling convention forces doubles to be aligned.
2005 const bool Matcher::misaligned_doubles_ok = true;
2006 
2007 // No-op on amd64
2008 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2009 
2010 // Advertise here if the CPU requires explicit rounding operations to
2011 // implement the UseStrictFP mode.
2012 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2013 
2014 // Are floats conerted to double when stored to stack during deoptimization?
2015 // On x64 it is stored without convertion so we can use normal access.
2016 bool Matcher::float_in_double() { return false; }
2017 
2018 // Do ints take an entire long register or just half?
2019 const bool Matcher::int_in_long = true;
2020 
2021 // Return whether or not this register is ever used as an argument.
2022 // This function is used on startup to build the trampoline stubs in
2023 // generateOptoStub.  Registers not mentioned will be killed by the VM
2024 // call in the trampoline, and arguments in those registers not be
2025 // available to the callee.
2026 bool Matcher::can_be_java_arg(int reg)
2027 {
2028   return
2029     reg ==  RDI_num || reg ==  RDI_H_num ||
2030     reg ==  RSI_num || reg ==  RSI_H_num ||
2031     reg ==  RDX_num || reg ==  RDX_H_num ||
2032     reg ==  RCX_num || reg ==  RCX_H_num ||
2033     reg ==   R8_num || reg ==   R8_H_num ||
2034     reg ==   R9_num || reg ==   R9_H_num ||
2035     reg ==  R12_num || reg ==  R12_H_num ||
2036     reg == XMM0_num || reg == XMM0_H_num ||
2037     reg == XMM1_num || reg == XMM1_H_num ||
2038     reg == XMM2_num || reg == XMM2_H_num ||
2039     reg == XMM3_num || reg == XMM3_H_num ||
2040     reg == XMM4_num || reg == XMM4_H_num ||
2041     reg == XMM5_num || reg == XMM5_H_num ||
2042     reg == XMM6_num || reg == XMM6_H_num ||
2043     reg == XMM7_num || reg == XMM7_H_num;
2044 }
2045 
2046 bool Matcher::is_spillable_arg(int reg)
2047 {
2048   return can_be_java_arg(reg);
2049 }
2050 
2051 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2052   // In 64 bit mode a code which use multiply when
2053   // devisor is constant is faster than hardware
2054   // DIV instruction (it uses MulHiL).
2055   return false;
2056 }
2057 
2058 // Register for DIVI projection of divmodI
2059 RegMask Matcher::divI_proj_mask() {
2060   return INT_RAX_REG_mask;
2061 }
2062 
2063 // Register for MODI projection of divmodI
2064 RegMask Matcher::modI_proj_mask() {
2065   return INT_RDX_REG_mask;
2066 }
2067 
2068 // Register for DIVL projection of divmodL
2069 RegMask Matcher::divL_proj_mask() {
2070   return LONG_RAX_REG_mask;
2071 }
2072 
2073 // Register for MODL projection of divmodL
2074 RegMask Matcher::modL_proj_mask() {
2075   return LONG_RDX_REG_mask;
2076 }
2077 
2078 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2079   return PTR_RBP_REG_mask;
2080 }
2081 
2082 static Address build_address(int b, int i, int s, int d) {
2083   Register index = as_Register(i);
2084   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2085   if (index == rsp) {
2086     index = noreg;
2087     scale = Address::no_scale;
2088   }
2089   Address addr(as_Register(b), index, scale, d);
2090   return addr;
2091 }
2092 
2093 %}
2094 
2095 //----------ENCODING BLOCK-----------------------------------------------------
2096 // This block specifies the encoding classes used by the compiler to
2097 // output byte streams.  Encoding classes are parameterized macros
2098 // used by Machine Instruction Nodes in order to generate the bit
2099 // encoding of the instruction.  Operands specify their base encoding
2100 // interface with the interface keyword.  There are currently
2101 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2102 // COND_INTER.  REG_INTER causes an operand to generate a function
2103 // which returns its register number when queried.  CONST_INTER causes
2104 // an operand to generate a function which returns the value of the
2105 // constant when queried.  MEMORY_INTER causes an operand to generate
2106 // four functions which return the Base Register, the Index Register,
2107 // the Scale Value, and the Offset Value of the operand when queried.
2108 // COND_INTER causes an operand to generate six functions which return
2109 // the encoding code (ie - encoding bits for the instruction)
2110 // associated with each basic boolean condition for a conditional
2111 // instruction.
2112 //
2113 // Instructions specify two basic values for encoding.  Again, a
2114 // function is available to check if the constant displacement is an
2115 // oop. They use the ins_encode keyword to specify their encoding
2116 // classes (which must be a sequence of enc_class names, and their
2117 // parameters, specified in the encoding block), and they use the
2118 // opcode keyword to specify, in order, their primary, secondary, and
2119 // tertiary opcode.  Only the opcode sections which a particular
2120 // instruction needs for encoding need to be specified.
2121 encode %{
2122   // Build emit functions for each basic byte or larger field in the
2123   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2124   // from C++ code in the enc_class source block.  Emit functions will
2125   // live in the main source block for now.  In future, we can
2126   // generalize this by adding a syntax that specifies the sizes of
2127   // fields in an order, so that the adlc can build the emit functions
2128   // automagically
2129 
2130   // Emit primary opcode
2131   enc_class OpcP
2132   %{
2133     emit_opcode(cbuf, $primary);
2134   %}
2135 
2136   // Emit secondary opcode
2137   enc_class OpcS
2138   %{
2139     emit_opcode(cbuf, $secondary);
2140   %}
2141 
2142   // Emit tertiary opcode
2143   enc_class OpcT
2144   %{
2145     emit_opcode(cbuf, $tertiary);
2146   %}
2147 
2148   // Emit opcode directly
2149   enc_class Opcode(immI d8)
2150   %{
2151     emit_opcode(cbuf, $d8$$constant);
2152   %}
2153 
2154   // Emit size prefix
2155   enc_class SizePrefix
2156   %{
2157     emit_opcode(cbuf, 0x66);
2158   %}
2159 
2160   enc_class reg(rRegI reg)
2161   %{
2162     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2163   %}
2164 
2165   enc_class reg_reg(rRegI dst, rRegI src)
2166   %{
2167     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2168   %}
2169 
2170   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2171   %{
2172     emit_opcode(cbuf, $opcode$$constant);
2173     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2174   %}
2175 
2176   enc_class cmpfp_fixup()
2177   %{
2178     // jnp,s exit
2179     emit_opcode(cbuf, 0x7B);
2180     emit_d8(cbuf, 0x0A);
2181 
2182     // pushfq
2183     emit_opcode(cbuf, 0x9C);
2184 
2185     // andq $0xffffff2b, (%rsp)
2186     emit_opcode(cbuf, Assembler::REX_W);
2187     emit_opcode(cbuf, 0x81);
2188     emit_opcode(cbuf, 0x24);
2189     emit_opcode(cbuf, 0x24);
2190     emit_d32(cbuf, 0xffffff2b);
2191 
2192     // popfq
2193     emit_opcode(cbuf, 0x9D);
2194 
2195     // nop (target for branch to avoid branch to branch)
2196     emit_opcode(cbuf, 0x90);
2197   %}
2198 
2199   enc_class cmpfp3(rRegI dst)
2200   %{
2201     int dstenc = $dst$$reg;
2202 
2203     // movl $dst, -1
2204     if (dstenc >= 8) {
2205       emit_opcode(cbuf, Assembler::REX_B);
2206     }
2207     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2208     emit_d32(cbuf, -1);
2209 
2210     // jp,s done
2211     emit_opcode(cbuf, 0x7A);
2212     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2213 
2214     // jb,s done
2215     emit_opcode(cbuf, 0x72);
2216     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2217 
2218     // setne $dst
2219     if (dstenc >= 4) {
2220       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2221     }
2222     emit_opcode(cbuf, 0x0F);
2223     emit_opcode(cbuf, 0x95);
2224     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2225 
2226     // movzbl $dst, $dst
2227     if (dstenc >= 4) {
2228       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2229     }
2230     emit_opcode(cbuf, 0x0F);
2231     emit_opcode(cbuf, 0xB6);
2232     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2233   %}
2234 
2235   enc_class cdql_enc(no_rax_rdx_RegI div)
2236   %{
2237     // Full implementation of Java idiv and irem; checks for
2238     // special case as described in JVM spec., p.243 & p.271.
2239     //
2240     //         normal case                           special case
2241     //
2242     // input : rax: dividend                         min_int
2243     //         reg: divisor                          -1
2244     //
2245     // output: rax: quotient  (= rax idiv reg)       min_int
2246     //         rdx: remainder (= rax irem reg)       0
2247     //
2248     //  Code sequnce:
2249     //
2250     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2251     //    5:   75 07/08                jne    e <normal>
2252     //    7:   33 d2                   xor    %edx,%edx
2253     //  [div >= 8 -> offset + 1]
2254     //  [REX_B]
2255     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2256     //    c:   74 03/04                je     11 <done>
2257     // 000000000000000e <normal>:
2258     //    e:   99                      cltd
2259     //  [div >= 8 -> offset + 1]
2260     //  [REX_B]
2261     //    f:   f7 f9                   idiv   $div
2262     // 0000000000000011 <done>:
2263 
2264     // cmp    $0x80000000,%eax
2265     emit_opcode(cbuf, 0x3d);
2266     emit_d8(cbuf, 0x00);
2267     emit_d8(cbuf, 0x00);
2268     emit_d8(cbuf, 0x00);
2269     emit_d8(cbuf, 0x80);
2270 
2271     // jne    e <normal>
2272     emit_opcode(cbuf, 0x75);
2273     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2274 
2275     // xor    %edx,%edx
2276     emit_opcode(cbuf, 0x33);
2277     emit_d8(cbuf, 0xD2);
2278 
2279     // cmp    $0xffffffffffffffff,%ecx
2280     if ($div$$reg >= 8) {
2281       emit_opcode(cbuf, Assembler::REX_B);
2282     }
2283     emit_opcode(cbuf, 0x83);
2284     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2285     emit_d8(cbuf, 0xFF);
2286 
2287     // je     11 <done>
2288     emit_opcode(cbuf, 0x74);
2289     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2290 
2291     // <normal>
2292     // cltd
2293     emit_opcode(cbuf, 0x99);
2294 
2295     // idivl (note: must be emitted by the user of this rule)
2296     // <done>
2297   %}
2298 
2299   enc_class cdqq_enc(no_rax_rdx_RegL div)
2300   %{
2301     // Full implementation of Java ldiv and lrem; checks for
2302     // special case as described in JVM spec., p.243 & p.271.
2303     //
2304     //         normal case                           special case
2305     //
2306     // input : rax: dividend                         min_long
2307     //         reg: divisor                          -1
2308     //
2309     // output: rax: quotient  (= rax idiv reg)       min_long
2310     //         rdx: remainder (= rax irem reg)       0
2311     //
2312     //  Code sequnce:
2313     //
2314     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2315     //    7:   00 00 80
2316     //    a:   48 39 d0                cmp    %rdx,%rax
2317     //    d:   75 08                   jne    17 <normal>
2318     //    f:   33 d2                   xor    %edx,%edx
2319     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2320     //   15:   74 05                   je     1c <done>
2321     // 0000000000000017 <normal>:
2322     //   17:   48 99                   cqto
2323     //   19:   48 f7 f9                idiv   $div
2324     // 000000000000001c <done>:
2325 
2326     // mov    $0x8000000000000000,%rdx
2327     emit_opcode(cbuf, Assembler::REX_W);
2328     emit_opcode(cbuf, 0xBA);
2329     emit_d8(cbuf, 0x00);
2330     emit_d8(cbuf, 0x00);
2331     emit_d8(cbuf, 0x00);
2332     emit_d8(cbuf, 0x00);
2333     emit_d8(cbuf, 0x00);
2334     emit_d8(cbuf, 0x00);
2335     emit_d8(cbuf, 0x00);
2336     emit_d8(cbuf, 0x80);
2337 
2338     // cmp    %rdx,%rax
2339     emit_opcode(cbuf, Assembler::REX_W);
2340     emit_opcode(cbuf, 0x39);
2341     emit_d8(cbuf, 0xD0);
2342 
2343     // jne    17 <normal>
2344     emit_opcode(cbuf, 0x75);
2345     emit_d8(cbuf, 0x08);
2346 
2347     // xor    %edx,%edx
2348     emit_opcode(cbuf, 0x33);
2349     emit_d8(cbuf, 0xD2);
2350 
2351     // cmp    $0xffffffffffffffff,$div
2352     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2353     emit_opcode(cbuf, 0x83);
2354     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2355     emit_d8(cbuf, 0xFF);
2356 
2357     // je     1e <done>
2358     emit_opcode(cbuf, 0x74);
2359     emit_d8(cbuf, 0x05);
2360 
2361     // <normal>
2362     // cqto
2363     emit_opcode(cbuf, Assembler::REX_W);
2364     emit_opcode(cbuf, 0x99);
2365 
2366     // idivq (note: must be emitted by the user of this rule)
2367     // <done>
2368   %}
2369 
2370   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2371   enc_class OpcSE(immI imm)
2372   %{
2373     // Emit primary opcode and set sign-extend bit
2374     // Check for 8-bit immediate, and set sign extend bit in opcode
2375     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2376       emit_opcode(cbuf, $primary | 0x02);
2377     } else {
2378       // 32-bit immediate
2379       emit_opcode(cbuf, $primary);
2380     }
2381   %}
2382 
2383   enc_class OpcSErm(rRegI dst, immI imm)
2384   %{
2385     // OpcSEr/m
2386     int dstenc = $dst$$reg;
2387     if (dstenc >= 8) {
2388       emit_opcode(cbuf, Assembler::REX_B);
2389       dstenc -= 8;
2390     }
2391     // Emit primary opcode and set sign-extend bit
2392     // Check for 8-bit immediate, and set sign extend bit in opcode
2393     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2394       emit_opcode(cbuf, $primary | 0x02);
2395     } else {
2396       // 32-bit immediate
2397       emit_opcode(cbuf, $primary);
2398     }
2399     // Emit r/m byte with secondary opcode, after primary opcode.
2400     emit_rm(cbuf, 0x3, $secondary, dstenc);
2401   %}
2402 
2403   enc_class OpcSErm_wide(rRegL dst, immI imm)
2404   %{
2405     // OpcSEr/m
2406     int dstenc = $dst$$reg;
2407     if (dstenc < 8) {
2408       emit_opcode(cbuf, Assembler::REX_W);
2409     } else {
2410       emit_opcode(cbuf, Assembler::REX_WB);
2411       dstenc -= 8;
2412     }
2413     // Emit primary opcode and set sign-extend bit
2414     // Check for 8-bit immediate, and set sign extend bit in opcode
2415     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2416       emit_opcode(cbuf, $primary | 0x02);
2417     } else {
2418       // 32-bit immediate
2419       emit_opcode(cbuf, $primary);
2420     }
2421     // Emit r/m byte with secondary opcode, after primary opcode.
2422     emit_rm(cbuf, 0x3, $secondary, dstenc);
2423   %}
2424 
2425   enc_class Con8or32(immI imm)
2426   %{
2427     // Check for 8-bit immediate, and set sign extend bit in opcode
2428     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2429       $$$emit8$imm$$constant;
2430     } else {
2431       // 32-bit immediate
2432       $$$emit32$imm$$constant;
2433     }
2434   %}
2435 
2436   enc_class Lbl(label labl)
2437   %{
2438     // JMP, CALL
2439     Label* l = $labl$$label;
2440     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2441   %}
2442 
2443   enc_class LblShort(label labl)
2444   %{
2445     // JMP, CALL
2446     Label* l = $labl$$label;
2447     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2448     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2449     emit_d8(cbuf, disp);
2450   %}
2451 
2452   enc_class opc2_reg(rRegI dst)
2453   %{
2454     // BSWAP
2455     emit_cc(cbuf, $secondary, $dst$$reg);
2456   %}
2457 
2458   enc_class opc3_reg(rRegI dst)
2459   %{
2460     // BSWAP
2461     emit_cc(cbuf, $tertiary, $dst$$reg);
2462   %}
2463 
2464   enc_class reg_opc(rRegI div)
2465   %{
2466     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2467     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2468   %}
2469 
2470   enc_class Jcc(cmpOp cop, label labl)
2471   %{
2472     // JCC
2473     Label* l = $labl$$label;
2474     $$$emit8$primary;
2475     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2476     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2477   %}
2478 
2479   enc_class JccShort (cmpOp cop, label labl)
2480   %{
2481   // JCC
2482     Label *l = $labl$$label;
2483     emit_cc(cbuf, $primary, $cop$$cmpcode);
2484     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2485     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2486     emit_d8(cbuf, disp);
2487   %}
2488 
2489   enc_class enc_cmov(cmpOp cop)
2490   %{
2491     // CMOV
2492     $$$emit8$primary;
2493     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2494   %}
2495 
2496   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2497   %{
2498     // Invert sense of branch from sense of cmov
2499     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2500     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2501                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2502                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2503     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2504     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2505     if ($dst$$reg < 8) {
2506       if ($src$$reg >= 8) {
2507         emit_opcode(cbuf, Assembler::REX_B);
2508       }
2509     } else {
2510       if ($src$$reg < 8) {
2511         emit_opcode(cbuf, Assembler::REX_R);
2512       } else {
2513         emit_opcode(cbuf, Assembler::REX_RB);
2514       }
2515     }
2516     emit_opcode(cbuf, 0x0F);
2517     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2518     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2519   %}
2520 
2521   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2522   %{
2523     // Invert sense of branch from sense of cmov
2524     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2525     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2526 
2527     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2528     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2529     if ($dst$$reg < 8) {
2530       if ($src$$reg >= 8) {
2531         emit_opcode(cbuf, Assembler::REX_B);
2532       }
2533     } else {
2534       if ($src$$reg < 8) {
2535         emit_opcode(cbuf, Assembler::REX_R);
2536       } else {
2537         emit_opcode(cbuf, Assembler::REX_RB);
2538       }
2539     }
2540     emit_opcode(cbuf, 0x0F);
2541     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2542     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2543   %}
2544 
2545   enc_class enc_PartialSubtypeCheck()
2546   %{
2547     Register Rrdi = as_Register(RDI_enc); // result register
2548     Register Rrax = as_Register(RAX_enc); // super class
2549     Register Rrcx = as_Register(RCX_enc); // killed
2550     Register Rrsi = as_Register(RSI_enc); // sub class
2551     Label miss;
2552     const bool set_cond_codes = true;
2553 
2554     MacroAssembler _masm(&cbuf);
2555     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2556                                      NULL, &miss,
2557                                      /*set_cond_codes:*/ true);
2558     if ($primary) {
2559       __ xorptr(Rrdi, Rrdi);
2560     }
2561     __ bind(miss);
2562   %}
2563 
2564   enc_class Java_To_Interpreter(method meth)
2565   %{
2566     // CALL Java_To_Interpreter
2567     // This is the instruction starting address for relocation info.
2568     cbuf.set_insts_mark();
2569     $$$emit8$primary;
2570     // CALL directly to the runtime
2571     emit_d32_reloc(cbuf,
2572                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2573                    runtime_call_Relocation::spec(),
2574                    RELOC_DISP32);
2575   %}
2576 
2577   enc_class preserve_SP %{
2578     debug_only(int off0 = cbuf.insts_size());
2579     MacroAssembler _masm(&cbuf);
2580     // RBP is preserved across all calls, even compiled calls.
2581     // Use it to preserve RSP in places where the callee might change the SP.
2582     __ movptr(rbp_mh_SP_save, rsp);
2583     debug_only(int off1 = cbuf.insts_size());
2584     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2585   %}
2586 
2587   enc_class restore_SP %{
2588     MacroAssembler _masm(&cbuf);
2589     __ movptr(rsp, rbp_mh_SP_save);
2590   %}
2591 
2592   enc_class Java_Static_Call(method meth)
2593   %{
2594     // JAVA STATIC CALL
2595     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2596     // determine who we intended to call.
2597     cbuf.set_insts_mark();
2598     $$$emit8$primary;
2599 
2600     if (!_method) {
2601       emit_d32_reloc(cbuf,
2602                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2603                      runtime_call_Relocation::spec(),
2604                      RELOC_DISP32);
2605     } else if (_optimized_virtual) {
2606       emit_d32_reloc(cbuf,
2607                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2608                      opt_virtual_call_Relocation::spec(),
2609                      RELOC_DISP32);
2610     } else {
2611       emit_d32_reloc(cbuf,
2612                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2613                      static_call_Relocation::spec(),
2614                      RELOC_DISP32);
2615     }
2616     if (_method) {
2617       // Emit stub for static call
2618       emit_java_to_interp(cbuf);
2619     }
2620   %}
2621 
2622   enc_class Java_Dynamic_Call(method meth)
2623   %{
2624     // JAVA DYNAMIC CALL
2625     // !!!!!
2626     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2627     // emit_call_dynamic_prologue( cbuf );
2628     cbuf.set_insts_mark();
2629 
2630     // movq rax, -1
2631     emit_opcode(cbuf, Assembler::REX_W);
2632     emit_opcode(cbuf, 0xB8 | RAX_enc);
2633     emit_d64_reloc(cbuf,
2634                    (int64_t) Universe::non_oop_word(),
2635                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2636     address virtual_call_oop_addr = cbuf.insts_mark();
2637     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2638     // who we intended to call.
2639     cbuf.set_insts_mark();
2640     $$$emit8$primary;
2641     emit_d32_reloc(cbuf,
2642                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2643                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2644                    RELOC_DISP32);
2645   %}
2646 
2647   enc_class Java_Compiled_Call(method meth)
2648   %{
2649     // JAVA COMPILED CALL
2650     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2651 
2652     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2653     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2654 
2655     // callq *disp(%rax)
2656     cbuf.set_insts_mark();
2657     $$$emit8$primary;
2658     if (disp < 0x80) {
2659       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2660       emit_d8(cbuf, disp); // Displacement
2661     } else {
2662       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2663       emit_d32(cbuf, disp); // Displacement
2664     }
2665   %}
2666 
2667   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2668   %{
2669     // SAL, SAR, SHR
2670     int dstenc = $dst$$reg;
2671     if (dstenc >= 8) {
2672       emit_opcode(cbuf, Assembler::REX_B);
2673       dstenc -= 8;
2674     }
2675     $$$emit8$primary;
2676     emit_rm(cbuf, 0x3, $secondary, dstenc);
2677     $$$emit8$shift$$constant;
2678   %}
2679 
2680   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2681   %{
2682     // SAL, SAR, SHR
2683     int dstenc = $dst$$reg;
2684     if (dstenc < 8) {
2685       emit_opcode(cbuf, Assembler::REX_W);
2686     } else {
2687       emit_opcode(cbuf, Assembler::REX_WB);
2688       dstenc -= 8;
2689     }
2690     $$$emit8$primary;
2691     emit_rm(cbuf, 0x3, $secondary, dstenc);
2692     $$$emit8$shift$$constant;
2693   %}
2694 
2695   enc_class load_immI(rRegI dst, immI src)
2696   %{
2697     int dstenc = $dst$$reg;
2698     if (dstenc >= 8) {
2699       emit_opcode(cbuf, Assembler::REX_B);
2700       dstenc -= 8;
2701     }
2702     emit_opcode(cbuf, 0xB8 | dstenc);
2703     $$$emit32$src$$constant;
2704   %}
2705 
2706   enc_class load_immL(rRegL dst, immL src)
2707   %{
2708     int dstenc = $dst$$reg;
2709     if (dstenc < 8) {
2710       emit_opcode(cbuf, Assembler::REX_W);
2711     } else {
2712       emit_opcode(cbuf, Assembler::REX_WB);
2713       dstenc -= 8;
2714     }
2715     emit_opcode(cbuf, 0xB8 | dstenc);
2716     emit_d64(cbuf, $src$$constant);
2717   %}
2718 
2719   enc_class load_immUL32(rRegL dst, immUL32 src)
2720   %{
2721     // same as load_immI, but this time we care about zeroes in the high word
2722     int dstenc = $dst$$reg;
2723     if (dstenc >= 8) {
2724       emit_opcode(cbuf, Assembler::REX_B);
2725       dstenc -= 8;
2726     }
2727     emit_opcode(cbuf, 0xB8 | dstenc);
2728     $$$emit32$src$$constant;
2729   %}
2730 
2731   enc_class load_immL32(rRegL dst, immL32 src)
2732   %{
2733     int dstenc = $dst$$reg;
2734     if (dstenc < 8) {
2735       emit_opcode(cbuf, Assembler::REX_W);
2736     } else {
2737       emit_opcode(cbuf, Assembler::REX_WB);
2738       dstenc -= 8;
2739     }
2740     emit_opcode(cbuf, 0xC7);
2741     emit_rm(cbuf, 0x03, 0x00, dstenc);
2742     $$$emit32$src$$constant;
2743   %}
2744 
2745   enc_class load_immP31(rRegP dst, immP32 src)
2746   %{
2747     // same as load_immI, but this time we care about zeroes in the high word
2748     int dstenc = $dst$$reg;
2749     if (dstenc >= 8) {
2750       emit_opcode(cbuf, Assembler::REX_B);
2751       dstenc -= 8;
2752     }
2753     emit_opcode(cbuf, 0xB8 | dstenc);
2754     $$$emit32$src$$constant;
2755   %}
2756 
2757   enc_class load_immP(rRegP dst, immP src)
2758   %{
2759     int dstenc = $dst$$reg;
2760     if (dstenc < 8) {
2761       emit_opcode(cbuf, Assembler::REX_W);
2762     } else {
2763       emit_opcode(cbuf, Assembler::REX_WB);
2764       dstenc -= 8;
2765     }
2766     emit_opcode(cbuf, 0xB8 | dstenc);
2767     // This next line should be generated from ADLC
2768     if ($src->constant_is_oop()) {
2769       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2770     } else {
2771       emit_d64(cbuf, $src$$constant);
2772     }
2773   %}
2774 
2775   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2776   enc_class enc_copy(rRegI dst, rRegI src)
2777   %{
2778     encode_copy(cbuf, $dst$$reg, $src$$reg);
2779   %}
2780 
2781   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2782   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2783     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2784   %}
2785 
2786   enc_class enc_copy_always(rRegI dst, rRegI src)
2787   %{
2788     int srcenc = $src$$reg;
2789     int dstenc = $dst$$reg;
2790 
2791     if (dstenc < 8) {
2792       if (srcenc >= 8) {
2793         emit_opcode(cbuf, Assembler::REX_B);
2794         srcenc -= 8;
2795       }
2796     } else {
2797       if (srcenc < 8) {
2798         emit_opcode(cbuf, Assembler::REX_R);
2799       } else {
2800         emit_opcode(cbuf, Assembler::REX_RB);
2801         srcenc -= 8;
2802       }
2803       dstenc -= 8;
2804     }
2805 
2806     emit_opcode(cbuf, 0x8B);
2807     emit_rm(cbuf, 0x3, dstenc, srcenc);
2808   %}
2809 
2810   enc_class enc_copy_wide(rRegL dst, rRegL src)
2811   %{
2812     int srcenc = $src$$reg;
2813     int dstenc = $dst$$reg;
2814 
2815     if (dstenc != srcenc) {
2816       if (dstenc < 8) {
2817         if (srcenc < 8) {
2818           emit_opcode(cbuf, Assembler::REX_W);
2819         } else {
2820           emit_opcode(cbuf, Assembler::REX_WB);
2821           srcenc -= 8;
2822         }
2823       } else {
2824         if (srcenc < 8) {
2825           emit_opcode(cbuf, Assembler::REX_WR);
2826         } else {
2827           emit_opcode(cbuf, Assembler::REX_WRB);
2828           srcenc -= 8;
2829         }
2830         dstenc -= 8;
2831       }
2832       emit_opcode(cbuf, 0x8B);
2833       emit_rm(cbuf, 0x3, dstenc, srcenc);
2834     }
2835   %}
2836 
2837   enc_class Con32(immI src)
2838   %{
2839     // Output immediate
2840     $$$emit32$src$$constant;
2841   %}
2842 
2843   enc_class Con64(immL src)
2844   %{
2845     // Output immediate
2846     emit_d64($src$$constant);
2847   %}
2848 
2849   enc_class Con32F_as_bits(immF src)
2850   %{
2851     // Output Float immediate bits
2852     jfloat jf = $src$$constant;
2853     jint jf_as_bits = jint_cast(jf);
2854     emit_d32(cbuf, jf_as_bits);
2855   %}
2856 
2857   enc_class Con16(immI src)
2858   %{
2859     // Output immediate
2860     $$$emit16$src$$constant;
2861   %}
2862 
2863   // How is this different from Con32??? XXX
2864   enc_class Con_d32(immI src)
2865   %{
2866     emit_d32(cbuf,$src$$constant);
2867   %}
2868 
2869   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2870     // Output immediate memory reference
2871     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2872     emit_d32(cbuf, 0x00);
2873   %}
2874 
2875   enc_class lock_prefix()
2876   %{
2877     if (os::is_MP()) {
2878       emit_opcode(cbuf, 0xF0); // lock
2879     }
2880   %}
2881 
2882   enc_class REX_mem(memory mem)
2883   %{
2884     if ($mem$$base >= 8) {
2885       if ($mem$$index < 8) {
2886         emit_opcode(cbuf, Assembler::REX_B);
2887       } else {
2888         emit_opcode(cbuf, Assembler::REX_XB);
2889       }
2890     } else {
2891       if ($mem$$index >= 8) {
2892         emit_opcode(cbuf, Assembler::REX_X);
2893       }
2894     }
2895   %}
2896 
2897   enc_class REX_mem_wide(memory mem)
2898   %{
2899     if ($mem$$base >= 8) {
2900       if ($mem$$index < 8) {
2901         emit_opcode(cbuf, Assembler::REX_WB);
2902       } else {
2903         emit_opcode(cbuf, Assembler::REX_WXB);
2904       }
2905     } else {
2906       if ($mem$$index < 8) {
2907         emit_opcode(cbuf, Assembler::REX_W);
2908       } else {
2909         emit_opcode(cbuf, Assembler::REX_WX);
2910       }
2911     }
2912   %}
2913 
2914   // for byte regs
2915   enc_class REX_breg(rRegI reg)
2916   %{
2917     if ($reg$$reg >= 4) {
2918       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2919     }
2920   %}
2921 
2922   // for byte regs
2923   enc_class REX_reg_breg(rRegI dst, rRegI src)
2924   %{
2925     if ($dst$$reg < 8) {
2926       if ($src$$reg >= 4) {
2927         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2928       }
2929     } else {
2930       if ($src$$reg < 8) {
2931         emit_opcode(cbuf, Assembler::REX_R);
2932       } else {
2933         emit_opcode(cbuf, Assembler::REX_RB);
2934       }
2935     }
2936   %}
2937 
2938   // for byte regs
2939   enc_class REX_breg_mem(rRegI reg, memory mem)
2940   %{
2941     if ($reg$$reg < 8) {
2942       if ($mem$$base < 8) {
2943         if ($mem$$index >= 8) {
2944           emit_opcode(cbuf, Assembler::REX_X);
2945         } else if ($reg$$reg >= 4) {
2946           emit_opcode(cbuf, Assembler::REX);
2947         }
2948       } else {
2949         if ($mem$$index < 8) {
2950           emit_opcode(cbuf, Assembler::REX_B);
2951         } else {
2952           emit_opcode(cbuf, Assembler::REX_XB);
2953         }
2954       }
2955     } else {
2956       if ($mem$$base < 8) {
2957         if ($mem$$index < 8) {
2958           emit_opcode(cbuf, Assembler::REX_R);
2959         } else {
2960           emit_opcode(cbuf, Assembler::REX_RX);
2961         }
2962       } else {
2963         if ($mem$$index < 8) {
2964           emit_opcode(cbuf, Assembler::REX_RB);
2965         } else {
2966           emit_opcode(cbuf, Assembler::REX_RXB);
2967         }
2968       }
2969     }
2970   %}
2971 
2972   enc_class REX_reg(rRegI reg)
2973   %{
2974     if ($reg$$reg >= 8) {
2975       emit_opcode(cbuf, Assembler::REX_B);
2976     }
2977   %}
2978 
2979   enc_class REX_reg_wide(rRegI reg)
2980   %{
2981     if ($reg$$reg < 8) {
2982       emit_opcode(cbuf, Assembler::REX_W);
2983     } else {
2984       emit_opcode(cbuf, Assembler::REX_WB);
2985     }
2986   %}
2987 
2988   enc_class REX_reg_reg(rRegI dst, rRegI src)
2989   %{
2990     if ($dst$$reg < 8) {
2991       if ($src$$reg >= 8) {
2992         emit_opcode(cbuf, Assembler::REX_B);
2993       }
2994     } else {
2995       if ($src$$reg < 8) {
2996         emit_opcode(cbuf, Assembler::REX_R);
2997       } else {
2998         emit_opcode(cbuf, Assembler::REX_RB);
2999       }
3000     }
3001   %}
3002 
3003   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3004   %{
3005     if ($dst$$reg < 8) {
3006       if ($src$$reg < 8) {
3007         emit_opcode(cbuf, Assembler::REX_W);
3008       } else {
3009         emit_opcode(cbuf, Assembler::REX_WB);
3010       }
3011     } else {
3012       if ($src$$reg < 8) {
3013         emit_opcode(cbuf, Assembler::REX_WR);
3014       } else {
3015         emit_opcode(cbuf, Assembler::REX_WRB);
3016       }
3017     }
3018   %}
3019 
3020   enc_class REX_reg_mem(rRegI reg, memory mem)
3021   %{
3022     if ($reg$$reg < 8) {
3023       if ($mem$$base < 8) {
3024         if ($mem$$index >= 8) {
3025           emit_opcode(cbuf, Assembler::REX_X);
3026         }
3027       } else {
3028         if ($mem$$index < 8) {
3029           emit_opcode(cbuf, Assembler::REX_B);
3030         } else {
3031           emit_opcode(cbuf, Assembler::REX_XB);
3032         }
3033       }
3034     } else {
3035       if ($mem$$base < 8) {
3036         if ($mem$$index < 8) {
3037           emit_opcode(cbuf, Assembler::REX_R);
3038         } else {
3039           emit_opcode(cbuf, Assembler::REX_RX);
3040         }
3041       } else {
3042         if ($mem$$index < 8) {
3043           emit_opcode(cbuf, Assembler::REX_RB);
3044         } else {
3045           emit_opcode(cbuf, Assembler::REX_RXB);
3046         }
3047       }
3048     }
3049   %}
3050 
3051   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3052   %{
3053     if ($reg$$reg < 8) {
3054       if ($mem$$base < 8) {
3055         if ($mem$$index < 8) {
3056           emit_opcode(cbuf, Assembler::REX_W);
3057         } else {
3058           emit_opcode(cbuf, Assembler::REX_WX);
3059         }
3060       } else {
3061         if ($mem$$index < 8) {
3062           emit_opcode(cbuf, Assembler::REX_WB);
3063         } else {
3064           emit_opcode(cbuf, Assembler::REX_WXB);
3065         }
3066       }
3067     } else {
3068       if ($mem$$base < 8) {
3069         if ($mem$$index < 8) {
3070           emit_opcode(cbuf, Assembler::REX_WR);
3071         } else {
3072           emit_opcode(cbuf, Assembler::REX_WRX);
3073         }
3074       } else {
3075         if ($mem$$index < 8) {
3076           emit_opcode(cbuf, Assembler::REX_WRB);
3077         } else {
3078           emit_opcode(cbuf, Assembler::REX_WRXB);
3079         }
3080       }
3081     }
3082   %}
3083 
3084   enc_class reg_mem(rRegI ereg, memory mem)
3085   %{
3086     // High registers handle in encode_RegMem
3087     int reg = $ereg$$reg;
3088     int base = $mem$$base;
3089     int index = $mem$$index;
3090     int scale = $mem$$scale;
3091     int disp = $mem$$disp;
3092     bool disp_is_oop = $mem->disp_is_oop();
3093 
3094     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3095   %}
3096 
3097   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3098   %{
3099     int rm_byte_opcode = $rm_opcode$$constant;
3100 
3101     // High registers handle in encode_RegMem
3102     int base = $mem$$base;
3103     int index = $mem$$index;
3104     int scale = $mem$$scale;
3105     int displace = $mem$$disp;
3106 
3107     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3108                                             // working with static
3109                                             // globals
3110     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3111                   disp_is_oop);
3112   %}
3113 
3114   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3115   %{
3116     int reg_encoding = $dst$$reg;
3117     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3118     int index        = 0x04;            // 0x04 indicates no index
3119     int scale        = 0x00;            // 0x00 indicates no scale
3120     int displace     = $src1$$constant; // 0x00 indicates no displacement
3121     bool disp_is_oop = false;
3122     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3123                   disp_is_oop);
3124   %}
3125 
3126   enc_class neg_reg(rRegI dst)
3127   %{
3128     int dstenc = $dst$$reg;
3129     if (dstenc >= 8) {
3130       emit_opcode(cbuf, Assembler::REX_B);
3131       dstenc -= 8;
3132     }
3133     // NEG $dst
3134     emit_opcode(cbuf, 0xF7);
3135     emit_rm(cbuf, 0x3, 0x03, dstenc);
3136   %}
3137 
3138   enc_class neg_reg_wide(rRegI dst)
3139   %{
3140     int dstenc = $dst$$reg;
3141     if (dstenc < 8) {
3142       emit_opcode(cbuf, Assembler::REX_W);
3143     } else {
3144       emit_opcode(cbuf, Assembler::REX_WB);
3145       dstenc -= 8;
3146     }
3147     // NEG $dst
3148     emit_opcode(cbuf, 0xF7);
3149     emit_rm(cbuf, 0x3, 0x03, dstenc);
3150   %}
3151 
3152   enc_class setLT_reg(rRegI dst)
3153   %{
3154     int dstenc = $dst$$reg;
3155     if (dstenc >= 8) {
3156       emit_opcode(cbuf, Assembler::REX_B);
3157       dstenc -= 8;
3158     } else if (dstenc >= 4) {
3159       emit_opcode(cbuf, Assembler::REX);
3160     }
3161     // SETLT $dst
3162     emit_opcode(cbuf, 0x0F);
3163     emit_opcode(cbuf, 0x9C);
3164     emit_rm(cbuf, 0x3, 0x0, dstenc);
3165   %}
3166 
3167   enc_class setNZ_reg(rRegI dst)
3168   %{
3169     int dstenc = $dst$$reg;
3170     if (dstenc >= 8) {
3171       emit_opcode(cbuf, Assembler::REX_B);
3172       dstenc -= 8;
3173     } else if (dstenc >= 4) {
3174       emit_opcode(cbuf, Assembler::REX);
3175     }
3176     // SETNZ $dst
3177     emit_opcode(cbuf, 0x0F);
3178     emit_opcode(cbuf, 0x95);
3179     emit_rm(cbuf, 0x3, 0x0, dstenc);
3180   %}
3181 
3182   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3183                        rcx_RegI tmp)
3184   %{
3185     // cadd_cmpLT
3186 
3187     int tmpReg = $tmp$$reg;
3188 
3189     int penc = $p$$reg;
3190     int qenc = $q$$reg;
3191     int yenc = $y$$reg;
3192 
3193     // subl $p,$q
3194     if (penc < 8) {
3195       if (qenc >= 8) {
3196         emit_opcode(cbuf, Assembler::REX_B);
3197       }
3198     } else {
3199       if (qenc < 8) {
3200         emit_opcode(cbuf, Assembler::REX_R);
3201       } else {
3202         emit_opcode(cbuf, Assembler::REX_RB);
3203       }
3204     }
3205     emit_opcode(cbuf, 0x2B);
3206     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3207 
3208     // sbbl $tmp, $tmp
3209     emit_opcode(cbuf, 0x1B);
3210     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3211 
3212     // andl $tmp, $y
3213     if (yenc >= 8) {
3214       emit_opcode(cbuf, Assembler::REX_B);
3215     }
3216     emit_opcode(cbuf, 0x23);
3217     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3218 
3219     // addl $p,$tmp
3220     if (penc >= 8) {
3221         emit_opcode(cbuf, Assembler::REX_R);
3222     }
3223     emit_opcode(cbuf, 0x03);
3224     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3225   %}
3226 
3227   // Compare the lonogs and set -1, 0, or 1 into dst
3228   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3229   %{
3230     int src1enc = $src1$$reg;
3231     int src2enc = $src2$$reg;
3232     int dstenc = $dst$$reg;
3233 
3234     // cmpq $src1, $src2
3235     if (src1enc < 8) {
3236       if (src2enc < 8) {
3237         emit_opcode(cbuf, Assembler::REX_W);
3238       } else {
3239         emit_opcode(cbuf, Assembler::REX_WB);
3240       }
3241     } else {
3242       if (src2enc < 8) {
3243         emit_opcode(cbuf, Assembler::REX_WR);
3244       } else {
3245         emit_opcode(cbuf, Assembler::REX_WRB);
3246       }
3247     }
3248     emit_opcode(cbuf, 0x3B);
3249     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3250 
3251     // movl $dst, -1
3252     if (dstenc >= 8) {
3253       emit_opcode(cbuf, Assembler::REX_B);
3254     }
3255     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3256     emit_d32(cbuf, -1);
3257 
3258     // jl,s done
3259     emit_opcode(cbuf, 0x7C);
3260     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3261 
3262     // setne $dst
3263     if (dstenc >= 4) {
3264       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3265     }
3266     emit_opcode(cbuf, 0x0F);
3267     emit_opcode(cbuf, 0x95);
3268     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3269 
3270     // movzbl $dst, $dst
3271     if (dstenc >= 4) {
3272       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3273     }
3274     emit_opcode(cbuf, 0x0F);
3275     emit_opcode(cbuf, 0xB6);
3276     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3277   %}
3278 
3279   enc_class Push_ResultXD(regD dst) %{
3280     int dstenc = $dst$$reg;
3281 
3282     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3283 
3284     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3285     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3286     if (dstenc >= 8) {
3287       emit_opcode(cbuf, Assembler::REX_R);
3288     }
3289     emit_opcode  (cbuf, 0x0F );
3290     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3291     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3292 
3293     // add rsp,8
3294     emit_opcode(cbuf, Assembler::REX_W);
3295     emit_opcode(cbuf,0x83);
3296     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3297     emit_d8(cbuf,0x08);
3298   %}
3299 
3300   enc_class Push_SrcXD(regD src) %{
3301     int srcenc = $src$$reg;
3302 
3303     // subq rsp,#8
3304     emit_opcode(cbuf, Assembler::REX_W);
3305     emit_opcode(cbuf, 0x83);
3306     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3307     emit_d8(cbuf, 0x8);
3308 
3309     // movsd [rsp],src
3310     emit_opcode(cbuf, 0xF2);
3311     if (srcenc >= 8) {
3312       emit_opcode(cbuf, Assembler::REX_R);
3313     }
3314     emit_opcode(cbuf, 0x0F);
3315     emit_opcode(cbuf, 0x11);
3316     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3317 
3318     // fldd [rsp]
3319     emit_opcode(cbuf, 0x66);
3320     emit_opcode(cbuf, 0xDD);
3321     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3322   %}
3323 
3324 
3325   enc_class movq_ld(regD dst, memory mem) %{
3326     MacroAssembler _masm(&cbuf);
3327     __ movq($dst$$XMMRegister, $mem$$Address);
3328   %}
3329 
3330   enc_class movq_st(memory mem, regD src) %{
3331     MacroAssembler _masm(&cbuf);
3332     __ movq($mem$$Address, $src$$XMMRegister);
3333   %}
3334 
3335   enc_class pshufd_8x8(regF dst, regF src) %{
3336     MacroAssembler _masm(&cbuf);
3337 
3338     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3339     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3340     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3341   %}
3342 
3343   enc_class pshufd_4x16(regF dst, regF src) %{
3344     MacroAssembler _masm(&cbuf);
3345 
3346     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3347   %}
3348 
3349   enc_class pshufd(regD dst, regD src, int mode) %{
3350     MacroAssembler _masm(&cbuf);
3351 
3352     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3353   %}
3354 
3355   enc_class pxor(regD dst, regD src) %{
3356     MacroAssembler _masm(&cbuf);
3357 
3358     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3359   %}
3360 
3361   enc_class mov_i2x(regD dst, rRegI src) %{
3362     MacroAssembler _masm(&cbuf);
3363 
3364     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3365   %}
3366 
3367   // obj: object to lock
3368   // box: box address (header location) -- killed
3369   // tmp: rax -- killed
3370   // scr: rbx -- killed
3371   //
3372   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3373   // from i486.ad.  See that file for comments.
3374   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3375   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3376 
3377 
3378   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3379   %{
3380     Register objReg = as_Register((int)$obj$$reg);
3381     Register boxReg = as_Register((int)$box$$reg);
3382     Register tmpReg = as_Register($tmp$$reg);
3383     Register scrReg = as_Register($scr$$reg);
3384     MacroAssembler masm(&cbuf);
3385 
3386     // Verify uniqueness of register assignments -- necessary but not sufficient
3387     assert (objReg != boxReg && objReg != tmpReg &&
3388             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3389 
3390     if (_counters != NULL) {
3391       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3392     }
3393     if (EmitSync & 1) {
3394         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3395         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3396         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3397     } else
3398     if (EmitSync & 2) {
3399         Label DONE_LABEL;
3400         if (UseBiasedLocking) {
3401            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3402           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3403         }
3404         // QQQ was movl...
3405         masm.movptr(tmpReg, 0x1);
3406         masm.orptr(tmpReg, Address(objReg, 0));
3407         masm.movptr(Address(boxReg, 0), tmpReg);
3408         if (os::is_MP()) {
3409           masm.lock();
3410         }
3411         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3412         masm.jcc(Assembler::equal, DONE_LABEL);
3413 
3414         // Recursive locking
3415         masm.subptr(tmpReg, rsp);
3416         masm.andptr(tmpReg, 7 - os::vm_page_size());
3417         masm.movptr(Address(boxReg, 0), tmpReg);
3418 
3419         masm.bind(DONE_LABEL);
3420         masm.nop(); // avoid branch to branch
3421     } else {
3422         Label DONE_LABEL, IsInflated, Egress;
3423 
3424         masm.movptr(tmpReg, Address(objReg, 0)) ;
3425         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3426         masm.jcc   (Assembler::notZero, IsInflated) ;
3427 
3428         // it's stack-locked, biased or neutral
3429         // TODO: optimize markword triage order to reduce the number of
3430         // conditional branches in the most common cases.
3431         // Beware -- there's a subtle invariant that fetch of the markword
3432         // at [FETCH], below, will never observe a biased encoding (*101b).
3433         // If this invariant is not held we'll suffer exclusion (safety) failure.
3434 
3435         if (UseBiasedLocking && !UseOptoBiasInlining) {
3436           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3437           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3438         }
3439 
3440         // was q will it destroy high?
3441         masm.orl   (tmpReg, 1) ;
3442         masm.movptr(Address(boxReg, 0), tmpReg) ;
3443         if (os::is_MP()) { masm.lock(); }
3444         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3445         if (_counters != NULL) {
3446            masm.cond_inc32(Assembler::equal,
3447                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3448         }
3449         masm.jcc   (Assembler::equal, DONE_LABEL);
3450 
3451         // Recursive locking
3452         masm.subptr(tmpReg, rsp);
3453         masm.andptr(tmpReg, 7 - os::vm_page_size());
3454         masm.movptr(Address(boxReg, 0), tmpReg);
3455         if (_counters != NULL) {
3456            masm.cond_inc32(Assembler::equal,
3457                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3458         }
3459         masm.jmp   (DONE_LABEL) ;
3460 
3461         masm.bind  (IsInflated) ;
3462         // It's inflated
3463 
3464         // TODO: someday avoid the ST-before-CAS penalty by
3465         // relocating (deferring) the following ST.
3466         // We should also think about trying a CAS without having
3467         // fetched _owner.  If the CAS is successful we may
3468         // avoid an RTO->RTS upgrade on the $line.
3469         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3470         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3471 
3472         masm.mov    (boxReg, tmpReg) ;
3473         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3474         masm.testptr(tmpReg, tmpReg) ;
3475         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3476 
3477         // It's inflated and appears unlocked
3478         if (os::is_MP()) { masm.lock(); }
3479         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3480         // Intentional fall-through into DONE_LABEL ...
3481 
3482         masm.bind  (DONE_LABEL) ;
3483         masm.nop   () ;                 // avoid jmp to jmp
3484     }
3485   %}
3486 
3487   // obj: object to unlock
3488   // box: box address (displaced header location), killed
3489   // RBX: killed tmp; cannot be obj nor box
3490   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3491   %{
3492 
3493     Register objReg = as_Register($obj$$reg);
3494     Register boxReg = as_Register($box$$reg);
3495     Register tmpReg = as_Register($tmp$$reg);
3496     MacroAssembler masm(&cbuf);
3497 
3498     if (EmitSync & 4) {
3499        masm.cmpptr(rsp, 0) ;
3500     } else
3501     if (EmitSync & 8) {
3502        Label DONE_LABEL;
3503        if (UseBiasedLocking) {
3504          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3505        }
3506 
3507        // Check whether the displaced header is 0
3508        //(=> recursive unlock)
3509        masm.movptr(tmpReg, Address(boxReg, 0));
3510        masm.testptr(tmpReg, tmpReg);
3511        masm.jcc(Assembler::zero, DONE_LABEL);
3512 
3513        // If not recursive lock, reset the header to displaced header
3514        if (os::is_MP()) {
3515          masm.lock();
3516        }
3517        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3518        masm.bind(DONE_LABEL);
3519        masm.nop(); // avoid branch to branch
3520     } else {
3521        Label DONE_LABEL, Stacked, CheckSucc ;
3522 
3523        if (UseBiasedLocking && !UseOptoBiasInlining) {
3524          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3525        }
3526 
3527        masm.movptr(tmpReg, Address(objReg, 0)) ;
3528        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3529        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3530        masm.testl (tmpReg, 0x02) ;
3531        masm.jcc   (Assembler::zero, Stacked) ;
3532 
3533        // It's inflated
3534        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3535        masm.xorptr(boxReg, r15_thread) ;
3536        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3537        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3538        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3539        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3540        masm.jcc   (Assembler::notZero, CheckSucc) ;
3541        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3542        masm.jmp   (DONE_LABEL) ;
3543 
3544        if ((EmitSync & 65536) == 0) {
3545          Label LSuccess, LGoSlowPath ;
3546          masm.bind  (CheckSucc) ;
3547          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3548          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3549 
3550          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3551          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3552          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3553          // are all faster when the write buffer is populated.
3554          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3555          if (os::is_MP()) {
3556             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3557          }
3558          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3559          masm.jcc   (Assembler::notZero, LSuccess) ;
3560 
3561          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3562          if (os::is_MP()) { masm.lock(); }
3563          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3564          masm.jcc   (Assembler::notEqual, LSuccess) ;
3565          // Intentional fall-through into slow-path
3566 
3567          masm.bind  (LGoSlowPath) ;
3568          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3569          masm.jmp   (DONE_LABEL) ;
3570 
3571          masm.bind  (LSuccess) ;
3572          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3573          masm.jmp   (DONE_LABEL) ;
3574        }
3575 
3576        masm.bind  (Stacked) ;
3577        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3578        if (os::is_MP()) { masm.lock(); }
3579        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3580 
3581        if (EmitSync & 65536) {
3582           masm.bind (CheckSucc) ;
3583        }
3584        masm.bind(DONE_LABEL);
3585        if (EmitSync & 32768) {
3586           masm.nop();                      // avoid branch to branch
3587        }
3588     }
3589   %}
3590 
3591 
3592   enc_class enc_rethrow()
3593   %{
3594     cbuf.set_insts_mark();
3595     emit_opcode(cbuf, 0xE9); // jmp entry
3596     emit_d32_reloc(cbuf,
3597                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3598                    runtime_call_Relocation::spec(),
3599                    RELOC_DISP32);
3600   %}
3601 
3602   enc_class absF_encoding(regF dst)
3603   %{
3604     int dstenc = $dst$$reg;
3605     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3606 
3607     cbuf.set_insts_mark();
3608     if (dstenc >= 8) {
3609       emit_opcode(cbuf, Assembler::REX_R);
3610       dstenc -= 8;
3611     }
3612     // XXX reg_mem doesn't support RIP-relative addressing yet
3613     emit_opcode(cbuf, 0x0F);
3614     emit_opcode(cbuf, 0x54);
3615     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3616     emit_d32_reloc(cbuf, signmask_address);
3617   %}
3618 
3619   enc_class absD_encoding(regD dst)
3620   %{
3621     int dstenc = $dst$$reg;
3622     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3623 
3624     cbuf.set_insts_mark();
3625     emit_opcode(cbuf, 0x66);
3626     if (dstenc >= 8) {
3627       emit_opcode(cbuf, Assembler::REX_R);
3628       dstenc -= 8;
3629     }
3630     // XXX reg_mem doesn't support RIP-relative addressing yet
3631     emit_opcode(cbuf, 0x0F);
3632     emit_opcode(cbuf, 0x54);
3633     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3634     emit_d32_reloc(cbuf, signmask_address);
3635   %}
3636 
3637   enc_class negF_encoding(regF dst)
3638   %{
3639     int dstenc = $dst$$reg;
3640     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3641 
3642     cbuf.set_insts_mark();
3643     if (dstenc >= 8) {
3644       emit_opcode(cbuf, Assembler::REX_R);
3645       dstenc -= 8;
3646     }
3647     // XXX reg_mem doesn't support RIP-relative addressing yet
3648     emit_opcode(cbuf, 0x0F);
3649     emit_opcode(cbuf, 0x57);
3650     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3651     emit_d32_reloc(cbuf, signflip_address);
3652   %}
3653 
3654   enc_class negD_encoding(regD dst)
3655   %{
3656     int dstenc = $dst$$reg;
3657     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3658 
3659     cbuf.set_insts_mark();
3660     emit_opcode(cbuf, 0x66);
3661     if (dstenc >= 8) {
3662       emit_opcode(cbuf, Assembler::REX_R);
3663       dstenc -= 8;
3664     }
3665     // XXX reg_mem doesn't support RIP-relative addressing yet
3666     emit_opcode(cbuf, 0x0F);
3667     emit_opcode(cbuf, 0x57);
3668     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3669     emit_d32_reloc(cbuf, signflip_address);
3670   %}
3671 
3672   enc_class f2i_fixup(rRegI dst, regF src)
3673   %{
3674     int dstenc = $dst$$reg;
3675     int srcenc = $src$$reg;
3676 
3677     // cmpl $dst, #0x80000000
3678     if (dstenc >= 8) {
3679       emit_opcode(cbuf, Assembler::REX_B);
3680     }
3681     emit_opcode(cbuf, 0x81);
3682     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3683     emit_d32(cbuf, 0x80000000);
3684 
3685     // jne,s done
3686     emit_opcode(cbuf, 0x75);
3687     if (srcenc < 8 && dstenc < 8) {
3688       emit_d8(cbuf, 0xF);
3689     } else if (srcenc >= 8 && dstenc >= 8) {
3690       emit_d8(cbuf, 0x11);
3691     } else {
3692       emit_d8(cbuf, 0x10);
3693     }
3694 
3695     // subq rsp, #8
3696     emit_opcode(cbuf, Assembler::REX_W);
3697     emit_opcode(cbuf, 0x83);
3698     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3699     emit_d8(cbuf, 8);
3700 
3701     // movss [rsp], $src
3702     emit_opcode(cbuf, 0xF3);
3703     if (srcenc >= 8) {
3704       emit_opcode(cbuf, Assembler::REX_R);
3705     }
3706     emit_opcode(cbuf, 0x0F);
3707     emit_opcode(cbuf, 0x11);
3708     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3709 
3710     // call f2i_fixup
3711     cbuf.set_insts_mark();
3712     emit_opcode(cbuf, 0xE8);
3713     emit_d32_reloc(cbuf,
3714                    (int)
3715                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3716                    runtime_call_Relocation::spec(),
3717                    RELOC_DISP32);
3718 
3719     // popq $dst
3720     if (dstenc >= 8) {
3721       emit_opcode(cbuf, Assembler::REX_B);
3722     }
3723     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3724 
3725     // done:
3726   %}
3727 
3728   enc_class f2l_fixup(rRegL dst, regF src)
3729   %{
3730     int dstenc = $dst$$reg;
3731     int srcenc = $src$$reg;
3732     address const_address = (address) StubRoutines::x86::double_sign_flip();
3733 
3734     // cmpq $dst, [0x8000000000000000]
3735     cbuf.set_insts_mark();
3736     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3737     emit_opcode(cbuf, 0x39);
3738     // XXX reg_mem doesn't support RIP-relative addressing yet
3739     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3740     emit_d32_reloc(cbuf, const_address);
3741 
3742 
3743     // jne,s done
3744     emit_opcode(cbuf, 0x75);
3745     if (srcenc < 8 && dstenc < 8) {
3746       emit_d8(cbuf, 0xF);
3747     } else if (srcenc >= 8 && dstenc >= 8) {
3748       emit_d8(cbuf, 0x11);
3749     } else {
3750       emit_d8(cbuf, 0x10);
3751     }
3752 
3753     // subq rsp, #8
3754     emit_opcode(cbuf, Assembler::REX_W);
3755     emit_opcode(cbuf, 0x83);
3756     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3757     emit_d8(cbuf, 8);
3758 
3759     // movss [rsp], $src
3760     emit_opcode(cbuf, 0xF3);
3761     if (srcenc >= 8) {
3762       emit_opcode(cbuf, Assembler::REX_R);
3763     }
3764     emit_opcode(cbuf, 0x0F);
3765     emit_opcode(cbuf, 0x11);
3766     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3767 
3768     // call f2l_fixup
3769     cbuf.set_insts_mark();
3770     emit_opcode(cbuf, 0xE8);
3771     emit_d32_reloc(cbuf,
3772                    (int)
3773                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3774                    runtime_call_Relocation::spec(),
3775                    RELOC_DISP32);
3776 
3777     // popq $dst
3778     if (dstenc >= 8) {
3779       emit_opcode(cbuf, Assembler::REX_B);
3780     }
3781     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3782 
3783     // done:
3784   %}
3785 
3786   enc_class d2i_fixup(rRegI dst, regD src)
3787   %{
3788     int dstenc = $dst$$reg;
3789     int srcenc = $src$$reg;
3790 
3791     // cmpl $dst, #0x80000000
3792     if (dstenc >= 8) {
3793       emit_opcode(cbuf, Assembler::REX_B);
3794     }
3795     emit_opcode(cbuf, 0x81);
3796     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3797     emit_d32(cbuf, 0x80000000);
3798 
3799     // jne,s done
3800     emit_opcode(cbuf, 0x75);
3801     if (srcenc < 8 && dstenc < 8) {
3802       emit_d8(cbuf, 0xF);
3803     } else if (srcenc >= 8 && dstenc >= 8) {
3804       emit_d8(cbuf, 0x11);
3805     } else {
3806       emit_d8(cbuf, 0x10);
3807     }
3808 
3809     // subq rsp, #8
3810     emit_opcode(cbuf, Assembler::REX_W);
3811     emit_opcode(cbuf, 0x83);
3812     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3813     emit_d8(cbuf, 8);
3814 
3815     // movsd [rsp], $src
3816     emit_opcode(cbuf, 0xF2);
3817     if (srcenc >= 8) {
3818       emit_opcode(cbuf, Assembler::REX_R);
3819     }
3820     emit_opcode(cbuf, 0x0F);
3821     emit_opcode(cbuf, 0x11);
3822     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3823 
3824     // call d2i_fixup
3825     cbuf.set_insts_mark();
3826     emit_opcode(cbuf, 0xE8);
3827     emit_d32_reloc(cbuf,
3828                    (int)
3829                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3830                    runtime_call_Relocation::spec(),
3831                    RELOC_DISP32);
3832 
3833     // popq $dst
3834     if (dstenc >= 8) {
3835       emit_opcode(cbuf, Assembler::REX_B);
3836     }
3837     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3838 
3839     // done:
3840   %}
3841 
3842   enc_class d2l_fixup(rRegL dst, regD src)
3843   %{
3844     int dstenc = $dst$$reg;
3845     int srcenc = $src$$reg;
3846     address const_address = (address) StubRoutines::x86::double_sign_flip();
3847 
3848     // cmpq $dst, [0x8000000000000000]
3849     cbuf.set_insts_mark();
3850     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3851     emit_opcode(cbuf, 0x39);
3852     // XXX reg_mem doesn't support RIP-relative addressing yet
3853     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3854     emit_d32_reloc(cbuf, const_address);
3855 
3856 
3857     // jne,s done
3858     emit_opcode(cbuf, 0x75);
3859     if (srcenc < 8 && dstenc < 8) {
3860       emit_d8(cbuf, 0xF);
3861     } else if (srcenc >= 8 && dstenc >= 8) {
3862       emit_d8(cbuf, 0x11);
3863     } else {
3864       emit_d8(cbuf, 0x10);
3865     }
3866 
3867     // subq rsp, #8
3868     emit_opcode(cbuf, Assembler::REX_W);
3869     emit_opcode(cbuf, 0x83);
3870     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3871     emit_d8(cbuf, 8);
3872 
3873     // movsd [rsp], $src
3874     emit_opcode(cbuf, 0xF2);
3875     if (srcenc >= 8) {
3876       emit_opcode(cbuf, Assembler::REX_R);
3877     }
3878     emit_opcode(cbuf, 0x0F);
3879     emit_opcode(cbuf, 0x11);
3880     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3881 
3882     // call d2l_fixup
3883     cbuf.set_insts_mark();
3884     emit_opcode(cbuf, 0xE8);
3885     emit_d32_reloc(cbuf,
3886                    (int)
3887                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3888                    runtime_call_Relocation::spec(),
3889                    RELOC_DISP32);
3890 
3891     // popq $dst
3892     if (dstenc >= 8) {
3893       emit_opcode(cbuf, Assembler::REX_B);
3894     }
3895     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3896 
3897     // done:
3898   %}
3899 %}
3900 
3901 
3902 
3903 //----------FRAME--------------------------------------------------------------
3904 // Definition of frame structure and management information.
3905 //
3906 //  S T A C K   L A Y O U T    Allocators stack-slot number
3907 //                             |   (to get allocators register number
3908 //  G  Owned by    |        |  v    add OptoReg::stack0())
3909 //  r   CALLER     |        |
3910 //  o     |        +--------+      pad to even-align allocators stack-slot
3911 //  w     V        |  pad0  |        numbers; owned by CALLER
3912 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3913 //  h     ^        |   in   |  5
3914 //        |        |  args  |  4   Holes in incoming args owned by SELF
3915 //  |     |        |        |  3
3916 //  |     |        +--------+
3917 //  V     |        | old out|      Empty on Intel, window on Sparc
3918 //        |    old |preserve|      Must be even aligned.
3919 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3920 //        |        |   in   |  3   area for Intel ret address
3921 //     Owned by    |preserve|      Empty on Sparc.
3922 //       SELF      +--------+
3923 //        |        |  pad2  |  2   pad to align old SP
3924 //        |        +--------+  1
3925 //        |        | locks  |  0
3926 //        |        +--------+----> OptoReg::stack0(), even aligned
3927 //        |        |  pad1  | 11   pad to align new SP
3928 //        |        +--------+
3929 //        |        |        | 10
3930 //        |        | spills |  9   spills
3931 //        V        |        |  8   (pad0 slot for callee)
3932 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3933 //        ^        |  out   |  7
3934 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3935 //     Owned by    +--------+
3936 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3937 //        |    new |preserve|      Must be even-aligned.
3938 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3939 //        |        |        |
3940 //
3941 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3942 //         known from SELF's arguments and the Java calling convention.
3943 //         Region 6-7 is determined per call site.
3944 // Note 2: If the calling convention leaves holes in the incoming argument
3945 //         area, those holes are owned by SELF.  Holes in the outgoing area
3946 //         are owned by the CALLEE.  Holes should not be nessecary in the
3947 //         incoming area, as the Java calling convention is completely under
3948 //         the control of the AD file.  Doubles can be sorted and packed to
3949 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3950 //         varargs C calling conventions.
3951 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3952 //         even aligned with pad0 as needed.
3953 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3954 //         region 6-11 is even aligned; it may be padded out more so that
3955 //         the region from SP to FP meets the minimum stack alignment.
3956 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3957 //         alignment.  Region 11, pad1, may be dynamically extended so that
3958 //         SP meets the minimum alignment.
3959 
3960 frame
3961 %{
3962   // What direction does stack grow in (assumed to be same for C & Java)
3963   stack_direction(TOWARDS_LOW);
3964 
3965   // These three registers define part of the calling convention
3966   // between compiled code and the interpreter.
3967   inline_cache_reg(RAX);                // Inline Cache Register
3968   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3969                                         // calling interpreter
3970 
3971   // Optional: name the operand used by cisc-spilling to access
3972   // [stack_pointer + offset]
3973   cisc_spilling_operand_name(indOffset32);
3974 
3975   // Number of stack slots consumed by locking an object
3976   sync_stack_slots(2);
3977 
3978   // Compiled code's Frame Pointer
3979   frame_pointer(RSP);
3980 
3981   // Interpreter stores its frame pointer in a register which is
3982   // stored to the stack by I2CAdaptors.
3983   // I2CAdaptors convert from interpreted java to compiled java.
3984   interpreter_frame_pointer(RBP);
3985 
3986   // Stack alignment requirement
3987   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3988 
3989   // Number of stack slots between incoming argument block and the start of
3990   // a new frame.  The PROLOG must add this many slots to the stack.  The
3991   // EPILOG must remove this many slots.  amd64 needs two slots for
3992   // return address.
3993   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3994 
3995   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3996   // for calls to C.  Supports the var-args backing area for register parms.
3997   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3998 
3999   // The after-PROLOG location of the return address.  Location of
4000   // return address specifies a type (REG or STACK) and a number
4001   // representing the register number (i.e. - use a register name) or
4002   // stack slot.
4003   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4004   // Otherwise, it is above the locks and verification slot and alignment word
4005   return_addr(STACK - 2 +
4006               round_to(2 + 2 * VerifyStackAtCalls +
4007                        Compile::current()->fixed_slots(),
4008                        WordsPerLong * 2));
4009 
4010   // Body of function which returns an integer array locating
4011   // arguments either in registers or in stack slots.  Passed an array
4012   // of ideal registers called "sig" and a "length" count.  Stack-slot
4013   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4014   // arguments for a CALLEE.  Incoming stack arguments are
4015   // automatically biased by the preserve_stack_slots field above.
4016 
4017   calling_convention
4018   %{
4019     // No difference between ingoing/outgoing just pass false
4020     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4021   %}
4022 
4023   c_calling_convention
4024   %{
4025     // This is obviously always outgoing
4026     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4027   %}
4028 
4029   // Location of compiled Java return values.  Same as C for now.
4030   return_value
4031   %{
4032     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4033            "only return normal values");
4034 
4035     static const int lo[Op_RegL + 1] = {
4036       0,
4037       0,
4038       RAX_num,  // Op_RegN
4039       RAX_num,  // Op_RegI
4040       RAX_num,  // Op_RegP
4041       XMM0_num, // Op_RegF
4042       XMM0_num, // Op_RegD
4043       RAX_num   // Op_RegL
4044     };
4045     static const int hi[Op_RegL + 1] = {
4046       0,
4047       0,
4048       OptoReg::Bad, // Op_RegN
4049       OptoReg::Bad, // Op_RegI
4050       RAX_H_num,    // Op_RegP
4051       OptoReg::Bad, // Op_RegF
4052       XMM0_H_num,   // Op_RegD
4053       RAX_H_num     // Op_RegL
4054     };
4055     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4056     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4057   %}
4058 %}
4059 
4060 //----------ATTRIBUTES---------------------------------------------------------
4061 //----------Operand Attributes-------------------------------------------------
4062 op_attrib op_cost(0);        // Required cost attribute
4063 
4064 //----------Instruction Attributes---------------------------------------------
4065 ins_attrib ins_cost(100);       // Required cost attribute
4066 ins_attrib ins_size(8);         // Required size attribute (in bits)
4067 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4068 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4069                                 // a non-matching short branch variant
4070                                 // of some long branch?
4071 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4072                                 // be a power of 2) specifies the
4073                                 // alignment that some part of the
4074                                 // instruction (not necessarily the
4075                                 // start) requires.  If > 1, a
4076                                 // compute_padding() function must be
4077                                 // provided for the instruction
4078 
4079 //----------OPERANDS-----------------------------------------------------------
4080 // Operand definitions must precede instruction definitions for correct parsing
4081 // in the ADLC because operands constitute user defined types which are used in
4082 // instruction definitions.
4083 
4084 //----------Simple Operands----------------------------------------------------
4085 // Immediate Operands
4086 // Integer Immediate
4087 operand immI()
4088 %{
4089   match(ConI);
4090 
4091   op_cost(10);
4092   format %{ %}
4093   interface(CONST_INTER);
4094 %}
4095 
4096 // Constant for test vs zero
4097 operand immI0()
4098 %{
4099   predicate(n->get_int() == 0);
4100   match(ConI);
4101 
4102   op_cost(0);
4103   format %{ %}
4104   interface(CONST_INTER);
4105 %}
4106 
4107 // Constant for increment
4108 operand immI1()
4109 %{
4110   predicate(n->get_int() == 1);
4111   match(ConI);
4112 
4113   op_cost(0);
4114   format %{ %}
4115   interface(CONST_INTER);
4116 %}
4117 
4118 // Constant for decrement
4119 operand immI_M1()
4120 %{
4121   predicate(n->get_int() == -1);
4122   match(ConI);
4123 
4124   op_cost(0);
4125   format %{ %}
4126   interface(CONST_INTER);
4127 %}
4128 
4129 // Valid scale values for addressing modes
4130 operand immI2()
4131 %{
4132   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4133   match(ConI);
4134 
4135   format %{ %}
4136   interface(CONST_INTER);
4137 %}
4138 
4139 operand immI8()
4140 %{
4141   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4142   match(ConI);
4143 
4144   op_cost(5);
4145   format %{ %}
4146   interface(CONST_INTER);
4147 %}
4148 
4149 operand immI16()
4150 %{
4151   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4152   match(ConI);
4153 
4154   op_cost(10);
4155   format %{ %}
4156   interface(CONST_INTER);
4157 %}
4158 
4159 // Constant for long shifts
4160 operand immI_32()
4161 %{
4162   predicate( n->get_int() == 32 );
4163   match(ConI);
4164 
4165   op_cost(0);
4166   format %{ %}
4167   interface(CONST_INTER);
4168 %}
4169 
4170 // Constant for long shifts
4171 operand immI_64()
4172 %{
4173   predicate( n->get_int() == 64 );
4174   match(ConI);
4175 
4176   op_cost(0);
4177   format %{ %}
4178   interface(CONST_INTER);
4179 %}
4180 
4181 // Pointer Immediate
4182 operand immP()
4183 %{
4184   match(ConP);
4185 
4186   op_cost(10);
4187   format %{ %}
4188   interface(CONST_INTER);
4189 %}
4190 
4191 // NULL Pointer Immediate
4192 operand immP0()
4193 %{
4194   predicate(n->get_ptr() == 0);
4195   match(ConP);
4196 
4197   op_cost(5);
4198   format %{ %}
4199   interface(CONST_INTER);
4200 %}
4201 
4202 operand immP_poll() %{
4203   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4204   match(ConP);
4205 
4206   // formats are generated automatically for constants and base registers
4207   format %{ %}
4208   interface(CONST_INTER);
4209 %}
4210 
4211 // Pointer Immediate
4212 operand immN() %{
4213   match(ConN);
4214 
4215   op_cost(10);
4216   format %{ %}
4217   interface(CONST_INTER);
4218 %}
4219 
4220 // NULL Pointer Immediate
4221 operand immN0() %{
4222   predicate(n->get_narrowcon() == 0);
4223   match(ConN);
4224 
4225   op_cost(5);
4226   format %{ %}
4227   interface(CONST_INTER);
4228 %}
4229 
4230 operand immP31()
4231 %{
4232   predicate(!n->as_Type()->type()->isa_oopptr()
4233             && (n->get_ptr() >> 31) == 0);
4234   match(ConP);
4235 
4236   op_cost(5);
4237   format %{ %}
4238   interface(CONST_INTER);
4239 %}
4240 
4241 
4242 // Long Immediate
4243 operand immL()
4244 %{
4245   match(ConL);
4246 
4247   op_cost(20);
4248   format %{ %}
4249   interface(CONST_INTER);
4250 %}
4251 
4252 // Long Immediate 8-bit
4253 operand immL8()
4254 %{
4255   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4256   match(ConL);
4257 
4258   op_cost(5);
4259   format %{ %}
4260   interface(CONST_INTER);
4261 %}
4262 
4263 // Long Immediate 32-bit unsigned
4264 operand immUL32()
4265 %{
4266   predicate(n->get_long() == (unsigned int) (n->get_long()));
4267   match(ConL);
4268 
4269   op_cost(10);
4270   format %{ %}
4271   interface(CONST_INTER);
4272 %}
4273 
4274 // Long Immediate 32-bit signed
4275 operand immL32()
4276 %{
4277   predicate(n->get_long() == (int) (n->get_long()));
4278   match(ConL);
4279 
4280   op_cost(15);
4281   format %{ %}
4282   interface(CONST_INTER);
4283 %}
4284 
4285 // Long Immediate zero
4286 operand immL0()
4287 %{
4288   predicate(n->get_long() == 0L);
4289   match(ConL);
4290 
4291   op_cost(10);
4292   format %{ %}
4293   interface(CONST_INTER);
4294 %}
4295 
4296 // Constant for increment
4297 operand immL1()
4298 %{
4299   predicate(n->get_long() == 1);
4300   match(ConL);
4301 
4302   format %{ %}
4303   interface(CONST_INTER);
4304 %}
4305 
4306 // Constant for decrement
4307 operand immL_M1()
4308 %{
4309   predicate(n->get_long() == -1);
4310   match(ConL);
4311 
4312   format %{ %}
4313   interface(CONST_INTER);
4314 %}
4315 
4316 // Long Immediate: the value 10
4317 operand immL10()
4318 %{
4319   predicate(n->get_long() == 10);
4320   match(ConL);
4321 
4322   format %{ %}
4323   interface(CONST_INTER);
4324 %}
4325 
4326 // Long immediate from 0 to 127.
4327 // Used for a shorter form of long mul by 10.
4328 operand immL_127()
4329 %{
4330   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4331   match(ConL);
4332 
4333   op_cost(10);
4334   format %{ %}
4335   interface(CONST_INTER);
4336 %}
4337 
4338 // Long Immediate: low 32-bit mask
4339 operand immL_32bits()
4340 %{
4341   predicate(n->get_long() == 0xFFFFFFFFL);
4342   match(ConL);
4343   op_cost(20);
4344 
4345   format %{ %}
4346   interface(CONST_INTER);
4347 %}
4348 
4349 // Float Immediate zero
4350 operand immF0()
4351 %{
4352   predicate(jint_cast(n->getf()) == 0);
4353   match(ConF);
4354 
4355   op_cost(5);
4356   format %{ %}
4357   interface(CONST_INTER);
4358 %}
4359 
4360 // Float Immediate
4361 operand immF()
4362 %{
4363   match(ConF);
4364 
4365   op_cost(15);
4366   format %{ %}
4367   interface(CONST_INTER);
4368 %}
4369 
4370 // Double Immediate zero
4371 operand immD0()
4372 %{
4373   predicate(jlong_cast(n->getd()) == 0);
4374   match(ConD);
4375 
4376   op_cost(5);
4377   format %{ %}
4378   interface(CONST_INTER);
4379 %}
4380 
4381 // Double Immediate
4382 operand immD()
4383 %{
4384   match(ConD);
4385 
4386   op_cost(15);
4387   format %{ %}
4388   interface(CONST_INTER);
4389 %}
4390 
4391 // Immediates for special shifts (sign extend)
4392 
4393 // Constants for increment
4394 operand immI_16()
4395 %{
4396   predicate(n->get_int() == 16);
4397   match(ConI);
4398 
4399   format %{ %}
4400   interface(CONST_INTER);
4401 %}
4402 
4403 operand immI_24()
4404 %{
4405   predicate(n->get_int() == 24);
4406   match(ConI);
4407 
4408   format %{ %}
4409   interface(CONST_INTER);
4410 %}
4411 
4412 // Constant for byte-wide masking
4413 operand immI_255()
4414 %{
4415   predicate(n->get_int() == 255);
4416   match(ConI);
4417 
4418   format %{ %}
4419   interface(CONST_INTER);
4420 %}
4421 
4422 // Constant for short-wide masking
4423 operand immI_65535()
4424 %{
4425   predicate(n->get_int() == 65535);
4426   match(ConI);
4427 
4428   format %{ %}
4429   interface(CONST_INTER);
4430 %}
4431 
4432 // Constant for byte-wide masking
4433 operand immL_255()
4434 %{
4435   predicate(n->get_long() == 255);
4436   match(ConL);
4437 
4438   format %{ %}
4439   interface(CONST_INTER);
4440 %}
4441 
4442 // Constant for short-wide masking
4443 operand immL_65535()
4444 %{
4445   predicate(n->get_long() == 65535);
4446   match(ConL);
4447 
4448   format %{ %}
4449   interface(CONST_INTER);
4450 %}
4451 
4452 // Register Operands
4453 // Integer Register
4454 operand rRegI()
4455 %{
4456   constraint(ALLOC_IN_RC(int_reg));
4457   match(RegI);
4458 
4459   match(rax_RegI);
4460   match(rbx_RegI);
4461   match(rcx_RegI);
4462   match(rdx_RegI);
4463   match(rdi_RegI);
4464 
4465   format %{ %}
4466   interface(REG_INTER);
4467 %}
4468 
4469 // Special Registers
4470 operand rax_RegI()
4471 %{
4472   constraint(ALLOC_IN_RC(int_rax_reg));
4473   match(RegI);
4474   match(rRegI);
4475 
4476   format %{ "RAX" %}
4477   interface(REG_INTER);
4478 %}
4479 
4480 // Special Registers
4481 operand rbx_RegI()
4482 %{
4483   constraint(ALLOC_IN_RC(int_rbx_reg));
4484   match(RegI);
4485   match(rRegI);
4486 
4487   format %{ "RBX" %}
4488   interface(REG_INTER);
4489 %}
4490 
4491 operand rcx_RegI()
4492 %{
4493   constraint(ALLOC_IN_RC(int_rcx_reg));
4494   match(RegI);
4495   match(rRegI);
4496 
4497   format %{ "RCX" %}
4498   interface(REG_INTER);
4499 %}
4500 
4501 operand rdx_RegI()
4502 %{
4503   constraint(ALLOC_IN_RC(int_rdx_reg));
4504   match(RegI);
4505   match(rRegI);
4506 
4507   format %{ "RDX" %}
4508   interface(REG_INTER);
4509 %}
4510 
4511 operand rdi_RegI()
4512 %{
4513   constraint(ALLOC_IN_RC(int_rdi_reg));
4514   match(RegI);
4515   match(rRegI);
4516 
4517   format %{ "RDI" %}
4518   interface(REG_INTER);
4519 %}
4520 
4521 operand no_rcx_RegI()
4522 %{
4523   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4524   match(RegI);
4525   match(rax_RegI);
4526   match(rbx_RegI);
4527   match(rdx_RegI);
4528   match(rdi_RegI);
4529 
4530   format %{ %}
4531   interface(REG_INTER);
4532 %}
4533 
4534 operand no_rax_rdx_RegI()
4535 %{
4536   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4537   match(RegI);
4538   match(rbx_RegI);
4539   match(rcx_RegI);
4540   match(rdi_RegI);
4541 
4542   format %{ %}
4543   interface(REG_INTER);
4544 %}
4545 
4546 // Pointer Register
4547 operand any_RegP()
4548 %{
4549   constraint(ALLOC_IN_RC(any_reg));
4550   match(RegP);
4551   match(rax_RegP);
4552   match(rbx_RegP);
4553   match(rdi_RegP);
4554   match(rsi_RegP);
4555   match(rbp_RegP);
4556   match(r15_RegP);
4557   match(rRegP);
4558 
4559   format %{ %}
4560   interface(REG_INTER);
4561 %}
4562 
4563 operand rRegP()
4564 %{
4565   constraint(ALLOC_IN_RC(ptr_reg));
4566   match(RegP);
4567   match(rax_RegP);
4568   match(rbx_RegP);
4569   match(rdi_RegP);
4570   match(rsi_RegP);
4571   match(rbp_RegP);
4572   match(r15_RegP);  // See Q&A below about r15_RegP.
4573 
4574   format %{ %}
4575   interface(REG_INTER);
4576 %}
4577 
4578 operand rRegN() %{
4579   constraint(ALLOC_IN_RC(int_reg));
4580   match(RegN);
4581 
4582   format %{ %}
4583   interface(REG_INTER);
4584 %}
4585 
4586 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4587 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4588 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4589 // The output of an instruction is controlled by the allocator, which respects
4590 // register class masks, not match rules.  Unless an instruction mentions
4591 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4592 // by the allocator as an input.
4593 
4594 operand no_rax_RegP()
4595 %{
4596   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4597   match(RegP);
4598   match(rbx_RegP);
4599   match(rsi_RegP);
4600   match(rdi_RegP);
4601 
4602   format %{ %}
4603   interface(REG_INTER);
4604 %}
4605 
4606 operand no_rbp_RegP()
4607 %{
4608   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4609   match(RegP);
4610   match(rbx_RegP);
4611   match(rsi_RegP);
4612   match(rdi_RegP);
4613 
4614   format %{ %}
4615   interface(REG_INTER);
4616 %}
4617 
4618 operand no_rax_rbx_RegP()
4619 %{
4620   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4621   match(RegP);
4622   match(rsi_RegP);
4623   match(rdi_RegP);
4624 
4625   format %{ %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 // Special Registers
4630 // Return a pointer value
4631 operand rax_RegP()
4632 %{
4633   constraint(ALLOC_IN_RC(ptr_rax_reg));
4634   match(RegP);
4635   match(rRegP);
4636 
4637   format %{ %}
4638   interface(REG_INTER);
4639 %}
4640 
4641 // Special Registers
4642 // Return a compressed pointer value
4643 operand rax_RegN()
4644 %{
4645   constraint(ALLOC_IN_RC(int_rax_reg));
4646   match(RegN);
4647   match(rRegN);
4648 
4649   format %{ %}
4650   interface(REG_INTER);
4651 %}
4652 
4653 // Used in AtomicAdd
4654 operand rbx_RegP()
4655 %{
4656   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4657   match(RegP);
4658   match(rRegP);
4659 
4660   format %{ %}
4661   interface(REG_INTER);
4662 %}
4663 
4664 operand rsi_RegP()
4665 %{
4666   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4667   match(RegP);
4668   match(rRegP);
4669 
4670   format %{ %}
4671   interface(REG_INTER);
4672 %}
4673 
4674 // Used in rep stosq
4675 operand rdi_RegP()
4676 %{
4677   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4678   match(RegP);
4679   match(rRegP);
4680 
4681   format %{ %}
4682   interface(REG_INTER);
4683 %}
4684 
4685 operand rbp_RegP()
4686 %{
4687   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4688   match(RegP);
4689   match(rRegP);
4690 
4691   format %{ %}
4692   interface(REG_INTER);
4693 %}
4694 
4695 operand r15_RegP()
4696 %{
4697   constraint(ALLOC_IN_RC(ptr_r15_reg));
4698   match(RegP);
4699   match(rRegP);
4700 
4701   format %{ %}
4702   interface(REG_INTER);
4703 %}
4704 
4705 operand rRegL()
4706 %{
4707   constraint(ALLOC_IN_RC(long_reg));
4708   match(RegL);
4709   match(rax_RegL);
4710   match(rdx_RegL);
4711 
4712   format %{ %}
4713   interface(REG_INTER);
4714 %}
4715 
4716 // Special Registers
4717 operand no_rax_rdx_RegL()
4718 %{
4719   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4720   match(RegL);
4721   match(rRegL);
4722 
4723   format %{ %}
4724   interface(REG_INTER);
4725 %}
4726 
4727 operand no_rax_RegL()
4728 %{
4729   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4730   match(RegL);
4731   match(rRegL);
4732   match(rdx_RegL);
4733 
4734   format %{ %}
4735   interface(REG_INTER);
4736 %}
4737 
4738 operand no_rcx_RegL()
4739 %{
4740   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4741   match(RegL);
4742   match(rRegL);
4743 
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 operand rax_RegL()
4749 %{
4750   constraint(ALLOC_IN_RC(long_rax_reg));
4751   match(RegL);
4752   match(rRegL);
4753 
4754   format %{ "RAX" %}
4755   interface(REG_INTER);
4756 %}
4757 
4758 operand rcx_RegL()
4759 %{
4760   constraint(ALLOC_IN_RC(long_rcx_reg));
4761   match(RegL);
4762   match(rRegL);
4763 
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 operand rdx_RegL()
4769 %{
4770   constraint(ALLOC_IN_RC(long_rdx_reg));
4771   match(RegL);
4772   match(rRegL);
4773 
4774   format %{ %}
4775   interface(REG_INTER);
4776 %}
4777 
4778 // Flags register, used as output of compare instructions
4779 operand rFlagsReg()
4780 %{
4781   constraint(ALLOC_IN_RC(int_flags));
4782   match(RegFlags);
4783 
4784   format %{ "RFLAGS" %}
4785   interface(REG_INTER);
4786 %}
4787 
4788 // Flags register, used as output of FLOATING POINT compare instructions
4789 operand rFlagsRegU()
4790 %{
4791   constraint(ALLOC_IN_RC(int_flags));
4792   match(RegFlags);
4793 
4794   format %{ "RFLAGS_U" %}
4795   interface(REG_INTER);
4796 %}
4797 
4798 operand rFlagsRegUCF() %{
4799   constraint(ALLOC_IN_RC(int_flags));
4800   match(RegFlags);
4801   predicate(false);
4802 
4803   format %{ "RFLAGS_U_CF" %}
4804   interface(REG_INTER);
4805 %}
4806 
4807 // Float register operands
4808 operand regF()
4809 %{
4810   constraint(ALLOC_IN_RC(float_reg));
4811   match(RegF);
4812 
4813   format %{ %}
4814   interface(REG_INTER);
4815 %}
4816 
4817 // Double register operands
4818 operand regD()
4819 %{
4820   constraint(ALLOC_IN_RC(double_reg));
4821   match(RegD);
4822 
4823   format %{ %}
4824   interface(REG_INTER);
4825 %}
4826 
4827 
4828 //----------Memory Operands----------------------------------------------------
4829 // Direct Memory Operand
4830 // operand direct(immP addr)
4831 // %{
4832 //   match(addr);
4833 
4834 //   format %{ "[$addr]" %}
4835 //   interface(MEMORY_INTER) %{
4836 //     base(0xFFFFFFFF);
4837 //     index(0x4);
4838 //     scale(0x0);
4839 //     disp($addr);
4840 //   %}
4841 // %}
4842 
4843 // Indirect Memory Operand
4844 operand indirect(any_RegP reg)
4845 %{
4846   constraint(ALLOC_IN_RC(ptr_reg));
4847   match(reg);
4848 
4849   format %{ "[$reg]" %}
4850   interface(MEMORY_INTER) %{
4851     base($reg);
4852     index(0x4);
4853     scale(0x0);
4854     disp(0x0);
4855   %}
4856 %}
4857 
4858 // Indirect Memory Plus Short Offset Operand
4859 operand indOffset8(any_RegP reg, immL8 off)
4860 %{
4861   constraint(ALLOC_IN_RC(ptr_reg));
4862   match(AddP reg off);
4863 
4864   format %{ "[$reg + $off (8-bit)]" %}
4865   interface(MEMORY_INTER) %{
4866     base($reg);
4867     index(0x4);
4868     scale(0x0);
4869     disp($off);
4870   %}
4871 %}
4872 
4873 // Indirect Memory Plus Long Offset Operand
4874 operand indOffset32(any_RegP reg, immL32 off)
4875 %{
4876   constraint(ALLOC_IN_RC(ptr_reg));
4877   match(AddP reg off);
4878 
4879   format %{ "[$reg + $off (32-bit)]" %}
4880   interface(MEMORY_INTER) %{
4881     base($reg);
4882     index(0x4);
4883     scale(0x0);
4884     disp($off);
4885   %}
4886 %}
4887 
4888 // Indirect Memory Plus Index Register Plus Offset Operand
4889 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4890 %{
4891   constraint(ALLOC_IN_RC(ptr_reg));
4892   match(AddP (AddP reg lreg) off);
4893 
4894   op_cost(10);
4895   format %{"[$reg + $off + $lreg]" %}
4896   interface(MEMORY_INTER) %{
4897     base($reg);
4898     index($lreg);
4899     scale(0x0);
4900     disp($off);
4901   %}
4902 %}
4903 
4904 // Indirect Memory Plus Index Register Plus Offset Operand
4905 operand indIndex(any_RegP reg, rRegL lreg)
4906 %{
4907   constraint(ALLOC_IN_RC(ptr_reg));
4908   match(AddP reg lreg);
4909 
4910   op_cost(10);
4911   format %{"[$reg + $lreg]" %}
4912   interface(MEMORY_INTER) %{
4913     base($reg);
4914     index($lreg);
4915     scale(0x0);
4916     disp(0x0);
4917   %}
4918 %}
4919 
4920 // Indirect Memory Times Scale Plus Index Register
4921 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4922 %{
4923   constraint(ALLOC_IN_RC(ptr_reg));
4924   match(AddP reg (LShiftL lreg scale));
4925 
4926   op_cost(10);
4927   format %{"[$reg + $lreg << $scale]" %}
4928   interface(MEMORY_INTER) %{
4929     base($reg);
4930     index($lreg);
4931     scale($scale);
4932     disp(0x0);
4933   %}
4934 %}
4935 
4936 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4937 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4938 %{
4939   constraint(ALLOC_IN_RC(ptr_reg));
4940   match(AddP (AddP reg (LShiftL lreg scale)) off);
4941 
4942   op_cost(10);
4943   format %{"[$reg + $off + $lreg << $scale]" %}
4944   interface(MEMORY_INTER) %{
4945     base($reg);
4946     index($lreg);
4947     scale($scale);
4948     disp($off);
4949   %}
4950 %}
4951 
4952 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4953 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4954 %{
4955   constraint(ALLOC_IN_RC(ptr_reg));
4956   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4957   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4958 
4959   op_cost(10);
4960   format %{"[$reg + $off + $idx << $scale]" %}
4961   interface(MEMORY_INTER) %{
4962     base($reg);
4963     index($idx);
4964     scale($scale);
4965     disp($off);
4966   %}
4967 %}
4968 
4969 // Indirect Narrow Oop Plus Offset Operand
4970 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4971 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4972 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4973   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4974   constraint(ALLOC_IN_RC(ptr_reg));
4975   match(AddP (DecodeN reg) off);
4976 
4977   op_cost(10);
4978   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4979   interface(MEMORY_INTER) %{
4980     base(0xc); // R12
4981     index($reg);
4982     scale(0x3);
4983     disp($off);
4984   %}
4985 %}
4986 
4987 // Indirect Memory Operand
4988 operand indirectNarrow(rRegN reg)
4989 %{
4990   predicate(Universe::narrow_oop_shift() == 0);
4991   constraint(ALLOC_IN_RC(ptr_reg));
4992   match(DecodeN reg);
4993 
4994   format %{ "[$reg]" %}
4995   interface(MEMORY_INTER) %{
4996     base($reg);
4997     index(0x4);
4998     scale(0x0);
4999     disp(0x0);
5000   %}
5001 %}
5002 
5003 // Indirect Memory Plus Short Offset Operand
5004 operand indOffset8Narrow(rRegN reg, immL8 off)
5005 %{
5006   predicate(Universe::narrow_oop_shift() == 0);
5007   constraint(ALLOC_IN_RC(ptr_reg));
5008   match(AddP (DecodeN reg) off);
5009 
5010   format %{ "[$reg + $off (8-bit)]" %}
5011   interface(MEMORY_INTER) %{
5012     base($reg);
5013     index(0x4);
5014     scale(0x0);
5015     disp($off);
5016   %}
5017 %}
5018 
5019 // Indirect Memory Plus Long Offset Operand
5020 operand indOffset32Narrow(rRegN reg, immL32 off)
5021 %{
5022   predicate(Universe::narrow_oop_shift() == 0);
5023   constraint(ALLOC_IN_RC(ptr_reg));
5024   match(AddP (DecodeN reg) off);
5025 
5026   format %{ "[$reg + $off (32-bit)]" %}
5027   interface(MEMORY_INTER) %{
5028     base($reg);
5029     index(0x4);
5030     scale(0x0);
5031     disp($off);
5032   %}
5033 %}
5034 
5035 // Indirect Memory Plus Index Register Plus Offset Operand
5036 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5037 %{
5038   predicate(Universe::narrow_oop_shift() == 0);
5039   constraint(ALLOC_IN_RC(ptr_reg));
5040   match(AddP (AddP (DecodeN reg) lreg) off);
5041 
5042   op_cost(10);
5043   format %{"[$reg + $off + $lreg]" %}
5044   interface(MEMORY_INTER) %{
5045     base($reg);
5046     index($lreg);
5047     scale(0x0);
5048     disp($off);
5049   %}
5050 %}
5051 
5052 // Indirect Memory Plus Index Register Plus Offset Operand
5053 operand indIndexNarrow(rRegN reg, rRegL lreg)
5054 %{
5055   predicate(Universe::narrow_oop_shift() == 0);
5056   constraint(ALLOC_IN_RC(ptr_reg));
5057   match(AddP (DecodeN reg) lreg);
5058 
5059   op_cost(10);
5060   format %{"[$reg + $lreg]" %}
5061   interface(MEMORY_INTER) %{
5062     base($reg);
5063     index($lreg);
5064     scale(0x0);
5065     disp(0x0);
5066   %}
5067 %}
5068 
5069 // Indirect Memory Times Scale Plus Index Register
5070 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5071 %{
5072   predicate(Universe::narrow_oop_shift() == 0);
5073   constraint(ALLOC_IN_RC(ptr_reg));
5074   match(AddP (DecodeN reg) (LShiftL lreg scale));
5075 
5076   op_cost(10);
5077   format %{"[$reg + $lreg << $scale]" %}
5078   interface(MEMORY_INTER) %{
5079     base($reg);
5080     index($lreg);
5081     scale($scale);
5082     disp(0x0);
5083   %}
5084 %}
5085 
5086 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5087 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5088 %{
5089   predicate(Universe::narrow_oop_shift() == 0);
5090   constraint(ALLOC_IN_RC(ptr_reg));
5091   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5092 
5093   op_cost(10);
5094   format %{"[$reg + $off + $lreg << $scale]" %}
5095   interface(MEMORY_INTER) %{
5096     base($reg);
5097     index($lreg);
5098     scale($scale);
5099     disp($off);
5100   %}
5101 %}
5102 
5103 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5104 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5105 %{
5106   constraint(ALLOC_IN_RC(ptr_reg));
5107   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5108   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5109 
5110   op_cost(10);
5111   format %{"[$reg + $off + $idx << $scale]" %}
5112   interface(MEMORY_INTER) %{
5113     base($reg);
5114     index($idx);
5115     scale($scale);
5116     disp($off);
5117   %}
5118 %}
5119 
5120 
5121 //----------Special Memory Operands--------------------------------------------
5122 // Stack Slot Operand - This operand is used for loading and storing temporary
5123 //                      values on the stack where a match requires a value to
5124 //                      flow through memory.
5125 operand stackSlotP(sRegP reg)
5126 %{
5127   constraint(ALLOC_IN_RC(stack_slots));
5128   // No match rule because this operand is only generated in matching
5129 
5130   format %{ "[$reg]" %}
5131   interface(MEMORY_INTER) %{
5132     base(0x4);   // RSP
5133     index(0x4);  // No Index
5134     scale(0x0);  // No Scale
5135     disp($reg);  // Stack Offset
5136   %}
5137 %}
5138 
5139 operand stackSlotI(sRegI reg)
5140 %{
5141   constraint(ALLOC_IN_RC(stack_slots));
5142   // No match rule because this operand is only generated in matching
5143 
5144   format %{ "[$reg]" %}
5145   interface(MEMORY_INTER) %{
5146     base(0x4);   // RSP
5147     index(0x4);  // No Index
5148     scale(0x0);  // No Scale
5149     disp($reg);  // Stack Offset
5150   %}
5151 %}
5152 
5153 operand stackSlotF(sRegF reg)
5154 %{
5155   constraint(ALLOC_IN_RC(stack_slots));
5156   // No match rule because this operand is only generated in matching
5157 
5158   format %{ "[$reg]" %}
5159   interface(MEMORY_INTER) %{
5160     base(0x4);   // RSP
5161     index(0x4);  // No Index
5162     scale(0x0);  // No Scale
5163     disp($reg);  // Stack Offset
5164   %}
5165 %}
5166 
5167 operand stackSlotD(sRegD reg)
5168 %{
5169   constraint(ALLOC_IN_RC(stack_slots));
5170   // No match rule because this operand is only generated in matching
5171 
5172   format %{ "[$reg]" %}
5173   interface(MEMORY_INTER) %{
5174     base(0x4);   // RSP
5175     index(0x4);  // No Index
5176     scale(0x0);  // No Scale
5177     disp($reg);  // Stack Offset
5178   %}
5179 %}
5180 operand stackSlotL(sRegL reg)
5181 %{
5182   constraint(ALLOC_IN_RC(stack_slots));
5183   // No match rule because this operand is only generated in matching
5184 
5185   format %{ "[$reg]" %}
5186   interface(MEMORY_INTER) %{
5187     base(0x4);   // RSP
5188     index(0x4);  // No Index
5189     scale(0x0);  // No Scale
5190     disp($reg);  // Stack Offset
5191   %}
5192 %}
5193 
5194 //----------Conditional Branch Operands----------------------------------------
5195 // Comparison Op  - This is the operation of the comparison, and is limited to
5196 //                  the following set of codes:
5197 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5198 //
5199 // Other attributes of the comparison, such as unsignedness, are specified
5200 // by the comparison instruction that sets a condition code flags register.
5201 // That result is represented by a flags operand whose subtype is appropriate
5202 // to the unsignedness (etc.) of the comparison.
5203 //
5204 // Later, the instruction which matches both the Comparison Op (a Bool) and
5205 // the flags (produced by the Cmp) specifies the coding of the comparison op
5206 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5207 
5208 // Comparision Code
5209 operand cmpOp()
5210 %{
5211   match(Bool);
5212 
5213   format %{ "" %}
5214   interface(COND_INTER) %{
5215     equal(0x4, "e");
5216     not_equal(0x5, "ne");
5217     less(0xC, "l");
5218     greater_equal(0xD, "ge");
5219     less_equal(0xE, "le");
5220     greater(0xF, "g");
5221   %}
5222 %}
5223 
5224 // Comparison Code, unsigned compare.  Used by FP also, with
5225 // C2 (unordered) turned into GT or LT already.  The other bits
5226 // C0 and C3 are turned into Carry & Zero flags.
5227 operand cmpOpU()
5228 %{
5229   match(Bool);
5230 
5231   format %{ "" %}
5232   interface(COND_INTER) %{
5233     equal(0x4, "e");
5234     not_equal(0x5, "ne");
5235     less(0x2, "b");
5236     greater_equal(0x3, "nb");
5237     less_equal(0x6, "be");
5238     greater(0x7, "nbe");
5239   %}
5240 %}
5241 
5242 
5243 // Floating comparisons that don't require any fixup for the unordered case
5244 operand cmpOpUCF() %{
5245   match(Bool);
5246   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5247             n->as_Bool()->_test._test == BoolTest::ge ||
5248             n->as_Bool()->_test._test == BoolTest::le ||
5249             n->as_Bool()->_test._test == BoolTest::gt);
5250   format %{ "" %}
5251   interface(COND_INTER) %{
5252     equal(0x4, "e");
5253     not_equal(0x5, "ne");
5254     less(0x2, "b");
5255     greater_equal(0x3, "nb");
5256     less_equal(0x6, "be");
5257     greater(0x7, "nbe");
5258   %}
5259 %}
5260 
5261 
5262 // Floating comparisons that can be fixed up with extra conditional jumps
5263 operand cmpOpUCF2() %{
5264   match(Bool);
5265   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5266             n->as_Bool()->_test._test == BoolTest::eq);
5267   format %{ "" %}
5268   interface(COND_INTER) %{
5269     equal(0x4, "e");
5270     not_equal(0x5, "ne");
5271     less(0x2, "b");
5272     greater_equal(0x3, "nb");
5273     less_equal(0x6, "be");
5274     greater(0x7, "nbe");
5275   %}
5276 %}
5277 
5278 
5279 //----------OPERAND CLASSES----------------------------------------------------
5280 // Operand Classes are groups of operands that are used as to simplify
5281 // instruction definitions by not requiring the AD writer to specify separate
5282 // instructions for every form of operand when the instruction accepts
5283 // multiple operand types with the same basic encoding and format.  The classic
5284 // case of this is memory operands.
5285 
5286 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5287                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5288                indCompressedOopOffset,
5289                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5290                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5291                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5292 
5293 //----------PIPELINE-----------------------------------------------------------
5294 // Rules which define the behavior of the target architectures pipeline.
5295 pipeline %{
5296 
5297 //----------ATTRIBUTES---------------------------------------------------------
5298 attributes %{
5299   variable_size_instructions;        // Fixed size instructions
5300   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5301   instruction_unit_size = 1;         // An instruction is 1 bytes long
5302   instruction_fetch_unit_size = 16;  // The processor fetches one line
5303   instruction_fetch_units = 1;       // of 16 bytes
5304 
5305   // List of nop instructions
5306   nops( MachNop );
5307 %}
5308 
5309 //----------RESOURCES----------------------------------------------------------
5310 // Resources are the functional units available to the machine
5311 
5312 // Generic P2/P3 pipeline
5313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5314 // 3 instructions decoded per cycle.
5315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5316 // 3 ALU op, only ALU0 handles mul instructions.
5317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5318            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5319            BR, FPU,
5320            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5321 
5322 //----------PIPELINE DESCRIPTION-----------------------------------------------
5323 // Pipeline Description specifies the stages in the machine's pipeline
5324 
5325 // Generic P2/P3 pipeline
5326 pipe_desc(S0, S1, S2, S3, S4, S5);
5327 
5328 //----------PIPELINE CLASSES---------------------------------------------------
5329 // Pipeline Classes describe the stages in which input and output are
5330 // referenced by the hardware pipeline.
5331 
5332 // Naming convention: ialu or fpu
5333 // Then: _reg
5334 // Then: _reg if there is a 2nd register
5335 // Then: _long if it's a pair of instructions implementing a long
5336 // Then: _fat if it requires the big decoder
5337 //   Or: _mem if it requires the big decoder and a memory unit.
5338 
5339 // Integer ALU reg operation
5340 pipe_class ialu_reg(rRegI dst)
5341 %{
5342     single_instruction;
5343     dst    : S4(write);
5344     dst    : S3(read);
5345     DECODE : S0;        // any decoder
5346     ALU    : S3;        // any alu
5347 %}
5348 
5349 // Long ALU reg operation
5350 pipe_class ialu_reg_long(rRegL dst)
5351 %{
5352     instruction_count(2);
5353     dst    : S4(write);
5354     dst    : S3(read);
5355     DECODE : S0(2);     // any 2 decoders
5356     ALU    : S3(2);     // both alus
5357 %}
5358 
5359 // Integer ALU reg operation using big decoder
5360 pipe_class ialu_reg_fat(rRegI dst)
5361 %{
5362     single_instruction;
5363     dst    : S4(write);
5364     dst    : S3(read);
5365     D0     : S0;        // big decoder only
5366     ALU    : S3;        // any alu
5367 %}
5368 
5369 // Long ALU reg operation using big decoder
5370 pipe_class ialu_reg_long_fat(rRegL dst)
5371 %{
5372     instruction_count(2);
5373     dst    : S4(write);
5374     dst    : S3(read);
5375     D0     : S0(2);     // big decoder only; twice
5376     ALU    : S3(2);     // any 2 alus
5377 %}
5378 
5379 // Integer ALU reg-reg operation
5380 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5381 %{
5382     single_instruction;
5383     dst    : S4(write);
5384     src    : S3(read);
5385     DECODE : S0;        // any decoder
5386     ALU    : S3;        // any alu
5387 %}
5388 
5389 // Long ALU reg-reg operation
5390 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5391 %{
5392     instruction_count(2);
5393     dst    : S4(write);
5394     src    : S3(read);
5395     DECODE : S0(2);     // any 2 decoders
5396     ALU    : S3(2);     // both alus
5397 %}
5398 
5399 // Integer ALU reg-reg operation
5400 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5401 %{
5402     single_instruction;
5403     dst    : S4(write);
5404     src    : S3(read);
5405     D0     : S0;        // big decoder only
5406     ALU    : S3;        // any alu
5407 %}
5408 
5409 // Long ALU reg-reg operation
5410 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5411 %{
5412     instruction_count(2);
5413     dst    : S4(write);
5414     src    : S3(read);
5415     D0     : S0(2);     // big decoder only; twice
5416     ALU    : S3(2);     // both alus
5417 %}
5418 
5419 // Integer ALU reg-mem operation
5420 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5421 %{
5422     single_instruction;
5423     dst    : S5(write);
5424     mem    : S3(read);
5425     D0     : S0;        // big decoder only
5426     ALU    : S4;        // any alu
5427     MEM    : S3;        // any mem
5428 %}
5429 
5430 // Integer mem operation (prefetch)
5431 pipe_class ialu_mem(memory mem)
5432 %{
5433     single_instruction;
5434     mem    : S3(read);
5435     D0     : S0;        // big decoder only
5436     MEM    : S3;        // any mem
5437 %}
5438 
5439 // Integer Store to Memory
5440 pipe_class ialu_mem_reg(memory mem, rRegI src)
5441 %{
5442     single_instruction;
5443     mem    : S3(read);
5444     src    : S5(read);
5445     D0     : S0;        // big decoder only
5446     ALU    : S4;        // any alu
5447     MEM    : S3;
5448 %}
5449 
5450 // // Long Store to Memory
5451 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5452 // %{
5453 //     instruction_count(2);
5454 //     mem    : S3(read);
5455 //     src    : S5(read);
5456 //     D0     : S0(2);          // big decoder only; twice
5457 //     ALU    : S4(2);     // any 2 alus
5458 //     MEM    : S3(2);  // Both mems
5459 // %}
5460 
5461 // Integer Store to Memory
5462 pipe_class ialu_mem_imm(memory mem)
5463 %{
5464     single_instruction;
5465     mem    : S3(read);
5466     D0     : S0;        // big decoder only
5467     ALU    : S4;        // any alu
5468     MEM    : S3;
5469 %}
5470 
5471 // Integer ALU0 reg-reg operation
5472 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5473 %{
5474     single_instruction;
5475     dst    : S4(write);
5476     src    : S3(read);
5477     D0     : S0;        // Big decoder only
5478     ALU0   : S3;        // only alu0
5479 %}
5480 
5481 // Integer ALU0 reg-mem operation
5482 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5483 %{
5484     single_instruction;
5485     dst    : S5(write);
5486     mem    : S3(read);
5487     D0     : S0;        // big decoder only
5488     ALU0   : S4;        // ALU0 only
5489     MEM    : S3;        // any mem
5490 %}
5491 
5492 // Integer ALU reg-reg operation
5493 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5494 %{
5495     single_instruction;
5496     cr     : S4(write);
5497     src1   : S3(read);
5498     src2   : S3(read);
5499     DECODE : S0;        // any decoder
5500     ALU    : S3;        // any alu
5501 %}
5502 
5503 // Integer ALU reg-imm operation
5504 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5505 %{
5506     single_instruction;
5507     cr     : S4(write);
5508     src1   : S3(read);
5509     DECODE : S0;        // any decoder
5510     ALU    : S3;        // any alu
5511 %}
5512 
5513 // Integer ALU reg-mem operation
5514 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5515 %{
5516     single_instruction;
5517     cr     : S4(write);
5518     src1   : S3(read);
5519     src2   : S3(read);
5520     D0     : S0;        // big decoder only
5521     ALU    : S4;        // any alu
5522     MEM    : S3;
5523 %}
5524 
5525 // Conditional move reg-reg
5526 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5527 %{
5528     instruction_count(4);
5529     y      : S4(read);
5530     q      : S3(read);
5531     p      : S3(read);
5532     DECODE : S0(4);     // any decoder
5533 %}
5534 
5535 // Conditional move reg-reg
5536 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5537 %{
5538     single_instruction;
5539     dst    : S4(write);
5540     src    : S3(read);
5541     cr     : S3(read);
5542     DECODE : S0;        // any decoder
5543 %}
5544 
5545 // Conditional move reg-mem
5546 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5547 %{
5548     single_instruction;
5549     dst    : S4(write);
5550     src    : S3(read);
5551     cr     : S3(read);
5552     DECODE : S0;        // any decoder
5553     MEM    : S3;
5554 %}
5555 
5556 // Conditional move reg-reg long
5557 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5558 %{
5559     single_instruction;
5560     dst    : S4(write);
5561     src    : S3(read);
5562     cr     : S3(read);
5563     DECODE : S0(2);     // any 2 decoders
5564 %}
5565 
5566 // XXX
5567 // // Conditional move double reg-reg
5568 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5569 // %{
5570 //     single_instruction;
5571 //     dst    : S4(write);
5572 //     src    : S3(read);
5573 //     cr     : S3(read);
5574 //     DECODE : S0;     // any decoder
5575 // %}
5576 
5577 // Float reg-reg operation
5578 pipe_class fpu_reg(regD dst)
5579 %{
5580     instruction_count(2);
5581     dst    : S3(read);
5582     DECODE : S0(2);     // any 2 decoders
5583     FPU    : S3;
5584 %}
5585 
5586 // Float reg-reg operation
5587 pipe_class fpu_reg_reg(regD dst, regD src)
5588 %{
5589     instruction_count(2);
5590     dst    : S4(write);
5591     src    : S3(read);
5592     DECODE : S0(2);     // any 2 decoders
5593     FPU    : S3;
5594 %}
5595 
5596 // Float reg-reg operation
5597 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5598 %{
5599     instruction_count(3);
5600     dst    : S4(write);
5601     src1   : S3(read);
5602     src2   : S3(read);
5603     DECODE : S0(3);     // any 3 decoders
5604     FPU    : S3(2);
5605 %}
5606 
5607 // Float reg-reg operation
5608 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5609 %{
5610     instruction_count(4);
5611     dst    : S4(write);
5612     src1   : S3(read);
5613     src2   : S3(read);
5614     src3   : S3(read);
5615     DECODE : S0(4);     // any 3 decoders
5616     FPU    : S3(2);
5617 %}
5618 
5619 // Float reg-reg operation
5620 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5621 %{
5622     instruction_count(4);
5623     dst    : S4(write);
5624     src1   : S3(read);
5625     src2   : S3(read);
5626     src3   : S3(read);
5627     DECODE : S1(3);     // any 3 decoders
5628     D0     : S0;        // Big decoder only
5629     FPU    : S3(2);
5630     MEM    : S3;
5631 %}
5632 
5633 // Float reg-mem operation
5634 pipe_class fpu_reg_mem(regD dst, memory mem)
5635 %{
5636     instruction_count(2);
5637     dst    : S5(write);
5638     mem    : S3(read);
5639     D0     : S0;        // big decoder only
5640     DECODE : S1;        // any decoder for FPU POP
5641     FPU    : S4;
5642     MEM    : S3;        // any mem
5643 %}
5644 
5645 // Float reg-mem operation
5646 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5647 %{
5648     instruction_count(3);
5649     dst    : S5(write);
5650     src1   : S3(read);
5651     mem    : S3(read);
5652     D0     : S0;        // big decoder only
5653     DECODE : S1(2);     // any decoder for FPU POP
5654     FPU    : S4;
5655     MEM    : S3;        // any mem
5656 %}
5657 
5658 // Float mem-reg operation
5659 pipe_class fpu_mem_reg(memory mem, regD src)
5660 %{
5661     instruction_count(2);
5662     src    : S5(read);
5663     mem    : S3(read);
5664     DECODE : S0;        // any decoder for FPU PUSH
5665     D0     : S1;        // big decoder only
5666     FPU    : S4;
5667     MEM    : S3;        // any mem
5668 %}
5669 
5670 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5671 %{
5672     instruction_count(3);
5673     src1   : S3(read);
5674     src2   : S3(read);
5675     mem    : S3(read);
5676     DECODE : S0(2);     // any decoder for FPU PUSH
5677     D0     : S1;        // big decoder only
5678     FPU    : S4;
5679     MEM    : S3;        // any mem
5680 %}
5681 
5682 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5683 %{
5684     instruction_count(3);
5685     src1   : S3(read);
5686     src2   : S3(read);
5687     mem    : S4(read);
5688     DECODE : S0;        // any decoder for FPU PUSH
5689     D0     : S0(2);     // big decoder only
5690     FPU    : S4;
5691     MEM    : S3(2);     // any mem
5692 %}
5693 
5694 pipe_class fpu_mem_mem(memory dst, memory src1)
5695 %{
5696     instruction_count(2);
5697     src1   : S3(read);
5698     dst    : S4(read);
5699     D0     : S0(2);     // big decoder only
5700     MEM    : S3(2);     // any mem
5701 %}
5702 
5703 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5704 %{
5705     instruction_count(3);
5706     src1   : S3(read);
5707     src2   : S3(read);
5708     dst    : S4(read);
5709     D0     : S0(3);     // big decoder only
5710     FPU    : S4;
5711     MEM    : S3(3);     // any mem
5712 %}
5713 
5714 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5715 %{
5716     instruction_count(3);
5717     src1   : S4(read);
5718     mem    : S4(read);
5719     DECODE : S0;        // any decoder for FPU PUSH
5720     D0     : S0(2);     // big decoder only
5721     FPU    : S4;
5722     MEM    : S3(2);     // any mem
5723 %}
5724 
5725 // Float load constant
5726 pipe_class fpu_reg_con(regD dst)
5727 %{
5728     instruction_count(2);
5729     dst    : S5(write);
5730     D0     : S0;        // big decoder only for the load
5731     DECODE : S1;        // any decoder for FPU POP
5732     FPU    : S4;
5733     MEM    : S3;        // any mem
5734 %}
5735 
5736 // Float load constant
5737 pipe_class fpu_reg_reg_con(regD dst, regD src)
5738 %{
5739     instruction_count(3);
5740     dst    : S5(write);
5741     src    : S3(read);
5742     D0     : S0;        // big decoder only for the load
5743     DECODE : S1(2);     // any decoder for FPU POP
5744     FPU    : S4;
5745     MEM    : S3;        // any mem
5746 %}
5747 
5748 // UnConditional branch
5749 pipe_class pipe_jmp(label labl)
5750 %{
5751     single_instruction;
5752     BR   : S3;
5753 %}
5754 
5755 // Conditional branch
5756 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5757 %{
5758     single_instruction;
5759     cr    : S1(read);
5760     BR    : S3;
5761 %}
5762 
5763 // Allocation idiom
5764 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5765 %{
5766     instruction_count(1); force_serialization;
5767     fixed_latency(6);
5768     heap_ptr : S3(read);
5769     DECODE   : S0(3);
5770     D0       : S2;
5771     MEM      : S3;
5772     ALU      : S3(2);
5773     dst      : S5(write);
5774     BR       : S5;
5775 %}
5776 
5777 // Generic big/slow expanded idiom
5778 pipe_class pipe_slow()
5779 %{
5780     instruction_count(10); multiple_bundles; force_serialization;
5781     fixed_latency(100);
5782     D0  : S0(2);
5783     MEM : S3(2);
5784 %}
5785 
5786 // The real do-nothing guy
5787 pipe_class empty()
5788 %{
5789     instruction_count(0);
5790 %}
5791 
5792 // Define the class for the Nop node
5793 define
5794 %{
5795    MachNop = empty;
5796 %}
5797 
5798 %}
5799 
5800 //----------INSTRUCTIONS-------------------------------------------------------
5801 //
5802 // match      -- States which machine-independent subtree may be replaced
5803 //               by this instruction.
5804 // ins_cost   -- The estimated cost of this instruction is used by instruction
5805 //               selection to identify a minimum cost tree of machine
5806 //               instructions that matches a tree of machine-independent
5807 //               instructions.
5808 // format     -- A string providing the disassembly for this instruction.
5809 //               The value of an instruction's operand may be inserted
5810 //               by referring to it with a '$' prefix.
5811 // opcode     -- Three instruction opcodes may be provided.  These are referred
5812 //               to within an encode class as $primary, $secondary, and $tertiary
5813 //               rrspectively.  The primary opcode is commonly used to
5814 //               indicate the type of machine instruction, while secondary
5815 //               and tertiary are often used for prefix options or addressing
5816 //               modes.
5817 // ins_encode -- A list of encode classes with parameters. The encode class
5818 //               name must have been defined in an 'enc_class' specification
5819 //               in the encode section of the architecture description.
5820 
5821 
5822 //----------Load/Store/Move Instructions---------------------------------------
5823 //----------Load Instructions--------------------------------------------------
5824 
5825 // Load Byte (8 bit signed)
5826 instruct loadB(rRegI dst, memory mem)
5827 %{
5828   match(Set dst (LoadB mem));
5829 
5830   ins_cost(125);
5831   format %{ "movsbl  $dst, $mem\t# byte" %}
5832 
5833   ins_encode %{
5834     __ movsbl($dst$$Register, $mem$$Address);
5835   %}
5836 
5837   ins_pipe(ialu_reg_mem);
5838 %}
5839 
5840 // Load Byte (8 bit signed) into Long Register
5841 instruct loadB2L(rRegL dst, memory mem)
5842 %{
5843   match(Set dst (ConvI2L (LoadB mem)));
5844 
5845   ins_cost(125);
5846   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5847 
5848   ins_encode %{
5849     __ movsbq($dst$$Register, $mem$$Address);
5850   %}
5851 
5852   ins_pipe(ialu_reg_mem);
5853 %}
5854 
5855 // Load Unsigned Byte (8 bit UNsigned)
5856 instruct loadUB(rRegI dst, memory mem)
5857 %{
5858   match(Set dst (LoadUB mem));
5859 
5860   ins_cost(125);
5861   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5862 
5863   ins_encode %{
5864     __ movzbl($dst$$Register, $mem$$Address);
5865   %}
5866 
5867   ins_pipe(ialu_reg_mem);
5868 %}
5869 
5870 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5871 instruct loadUB2L(rRegL dst, memory mem)
5872 %{
5873   match(Set dst (ConvI2L (LoadUB mem)));
5874 
5875   ins_cost(125);
5876   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5877 
5878   ins_encode %{
5879     __ movzbq($dst$$Register, $mem$$Address);
5880   %}
5881 
5882   ins_pipe(ialu_reg_mem);
5883 %}
5884 
5885 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5886 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5887   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5888   effect(KILL cr);
5889 
5890   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5891             "andl    $dst, $mask" %}
5892   ins_encode %{
5893     Register Rdst = $dst$$Register;
5894     __ movzbq(Rdst, $mem$$Address);
5895     __ andl(Rdst, $mask$$constant);
5896   %}
5897   ins_pipe(ialu_reg_mem);
5898 %}
5899 
5900 // Load Short (16 bit signed)
5901 instruct loadS(rRegI dst, memory mem)
5902 %{
5903   match(Set dst (LoadS mem));
5904 
5905   ins_cost(125);
5906   format %{ "movswl $dst, $mem\t# short" %}
5907 
5908   ins_encode %{
5909     __ movswl($dst$$Register, $mem$$Address);
5910   %}
5911 
5912   ins_pipe(ialu_reg_mem);
5913 %}
5914 
5915 // Load Short (16 bit signed) to Byte (8 bit signed)
5916 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5917   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5918 
5919   ins_cost(125);
5920   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5921   ins_encode %{
5922     __ movsbl($dst$$Register, $mem$$Address);
5923   %}
5924   ins_pipe(ialu_reg_mem);
5925 %}
5926 
5927 // Load Short (16 bit signed) into Long Register
5928 instruct loadS2L(rRegL dst, memory mem)
5929 %{
5930   match(Set dst (ConvI2L (LoadS mem)));
5931 
5932   ins_cost(125);
5933   format %{ "movswq $dst, $mem\t# short -> long" %}
5934 
5935   ins_encode %{
5936     __ movswq($dst$$Register, $mem$$Address);
5937   %}
5938 
5939   ins_pipe(ialu_reg_mem);
5940 %}
5941 
5942 // Load Unsigned Short/Char (16 bit UNsigned)
5943 instruct loadUS(rRegI dst, memory mem)
5944 %{
5945   match(Set dst (LoadUS mem));
5946 
5947   ins_cost(125);
5948   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5949 
5950   ins_encode %{
5951     __ movzwl($dst$$Register, $mem$$Address);
5952   %}
5953 
5954   ins_pipe(ialu_reg_mem);
5955 %}
5956 
5957 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5958 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5959   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5960 
5961   ins_cost(125);
5962   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5963   ins_encode %{
5964     __ movsbl($dst$$Register, $mem$$Address);
5965   %}
5966   ins_pipe(ialu_reg_mem);
5967 %}
5968 
5969 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5970 instruct loadUS2L(rRegL dst, memory mem)
5971 %{
5972   match(Set dst (ConvI2L (LoadUS mem)));
5973 
5974   ins_cost(125);
5975   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5976 
5977   ins_encode %{
5978     __ movzwq($dst$$Register, $mem$$Address);
5979   %}
5980 
5981   ins_pipe(ialu_reg_mem);
5982 %}
5983 
5984 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5985 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5986   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5987 
5988   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5989   ins_encode %{
5990     __ movzbq($dst$$Register, $mem$$Address);
5991   %}
5992   ins_pipe(ialu_reg_mem);
5993 %}
5994 
5995 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5996 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5997   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5998   effect(KILL cr);
5999 
6000   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6001             "andl    $dst, $mask" %}
6002   ins_encode %{
6003     Register Rdst = $dst$$Register;
6004     __ movzwq(Rdst, $mem$$Address);
6005     __ andl(Rdst, $mask$$constant);
6006   %}
6007   ins_pipe(ialu_reg_mem);
6008 %}
6009 
6010 // Load Integer
6011 instruct loadI(rRegI dst, memory mem)
6012 %{
6013   match(Set dst (LoadI mem));
6014 
6015   ins_cost(125);
6016   format %{ "movl    $dst, $mem\t# int" %}
6017 
6018   ins_encode %{
6019     __ movl($dst$$Register, $mem$$Address);
6020   %}
6021 
6022   ins_pipe(ialu_reg_mem);
6023 %}
6024 
6025 // Load Integer (32 bit signed) to Byte (8 bit signed)
6026 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6027   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6028 
6029   ins_cost(125);
6030   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6031   ins_encode %{
6032     __ movsbl($dst$$Register, $mem$$Address);
6033   %}
6034   ins_pipe(ialu_reg_mem);
6035 %}
6036 
6037 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6038 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6039   match(Set dst (AndI (LoadI mem) mask));
6040 
6041   ins_cost(125);
6042   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6043   ins_encode %{
6044     __ movzbl($dst$$Register, $mem$$Address);
6045   %}
6046   ins_pipe(ialu_reg_mem);
6047 %}
6048 
6049 // Load Integer (32 bit signed) to Short (16 bit signed)
6050 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6051   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6052 
6053   ins_cost(125);
6054   format %{ "movswl  $dst, $mem\t# int -> short" %}
6055   ins_encode %{
6056     __ movswl($dst$$Register, $mem$$Address);
6057   %}
6058   ins_pipe(ialu_reg_mem);
6059 %}
6060 
6061 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6062 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6063   match(Set dst (AndI (LoadI mem) mask));
6064 
6065   ins_cost(125);
6066   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6067   ins_encode %{
6068     __ movzwl($dst$$Register, $mem$$Address);
6069   %}
6070   ins_pipe(ialu_reg_mem);
6071 %}
6072 
6073 // Load Integer into Long Register
6074 instruct loadI2L(rRegL dst, memory mem)
6075 %{
6076   match(Set dst (ConvI2L (LoadI mem)));
6077 
6078   ins_cost(125);
6079   format %{ "movslq  $dst, $mem\t# int -> long" %}
6080 
6081   ins_encode %{
6082     __ movslq($dst$$Register, $mem$$Address);
6083   %}
6084 
6085   ins_pipe(ialu_reg_mem);
6086 %}
6087 
6088 // Load Integer with mask 0xFF into Long Register
6089 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6090   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6091 
6092   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6093   ins_encode %{
6094     __ movzbq($dst$$Register, $mem$$Address);
6095   %}
6096   ins_pipe(ialu_reg_mem);
6097 %}
6098 
6099 // Load Integer with mask 0xFFFF into Long Register
6100 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6101   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6102 
6103   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6104   ins_encode %{
6105     __ movzwq($dst$$Register, $mem$$Address);
6106   %}
6107   ins_pipe(ialu_reg_mem);
6108 %}
6109 
6110 // Load Integer with a 32-bit mask into Long Register
6111 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6112   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6113   effect(KILL cr);
6114 
6115   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6116             "andl    $dst, $mask" %}
6117   ins_encode %{
6118     Register Rdst = $dst$$Register;
6119     __ movl(Rdst, $mem$$Address);
6120     __ andl(Rdst, $mask$$constant);
6121   %}
6122   ins_pipe(ialu_reg_mem);
6123 %}
6124 
6125 // Load Unsigned Integer into Long Register
6126 instruct loadUI2L(rRegL dst, memory mem)
6127 %{
6128   match(Set dst (LoadUI2L mem));
6129 
6130   ins_cost(125);
6131   format %{ "movl    $dst, $mem\t# uint -> long" %}
6132 
6133   ins_encode %{
6134     __ movl($dst$$Register, $mem$$Address);
6135   %}
6136 
6137   ins_pipe(ialu_reg_mem);
6138 %}
6139 
6140 // Load Long
6141 instruct loadL(rRegL dst, memory mem)
6142 %{
6143   match(Set dst (LoadL mem));
6144 
6145   ins_cost(125);
6146   format %{ "movq    $dst, $mem\t# long" %}
6147 
6148   ins_encode %{
6149     __ movq($dst$$Register, $mem$$Address);
6150   %}
6151 
6152   ins_pipe(ialu_reg_mem); // XXX
6153 %}
6154 
6155 // Load Range
6156 instruct loadRange(rRegI dst, memory mem)
6157 %{
6158   match(Set dst (LoadRange mem));
6159 
6160   ins_cost(125); // XXX
6161   format %{ "movl    $dst, $mem\t# range" %}
6162   opcode(0x8B);
6163   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6164   ins_pipe(ialu_reg_mem);
6165 %}
6166 
6167 // Load Pointer
6168 instruct loadP(rRegP dst, memory mem)
6169 %{
6170   match(Set dst (LoadP mem));
6171 
6172   ins_cost(125); // XXX
6173   format %{ "movq    $dst, $mem\t# ptr" %}
6174   opcode(0x8B);
6175   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6176   ins_pipe(ialu_reg_mem); // XXX
6177 %}
6178 
6179 // Load Compressed Pointer
6180 instruct loadN(rRegN dst, memory mem)
6181 %{
6182    match(Set dst (LoadN mem));
6183 
6184    ins_cost(125); // XXX
6185    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6186    ins_encode %{
6187      __ movl($dst$$Register, $mem$$Address);
6188    %}
6189    ins_pipe(ialu_reg_mem); // XXX
6190 %}
6191 
6192 
6193 // Load Klass Pointer
6194 instruct loadKlass(rRegP dst, memory mem)
6195 %{
6196   match(Set dst (LoadKlass mem));
6197 
6198   ins_cost(125); // XXX
6199   format %{ "movq    $dst, $mem\t# class" %}
6200   opcode(0x8B);
6201   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6202   ins_pipe(ialu_reg_mem); // XXX
6203 %}
6204 
6205 // Load narrow Klass Pointer
6206 instruct loadNKlass(rRegN dst, memory mem)
6207 %{
6208   match(Set dst (LoadNKlass mem));
6209 
6210   ins_cost(125); // XXX
6211   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6212   ins_encode %{
6213     __ movl($dst$$Register, $mem$$Address);
6214   %}
6215   ins_pipe(ialu_reg_mem); // XXX
6216 %}
6217 
6218 // Load Float
6219 instruct loadF(regF dst, memory mem)
6220 %{
6221   match(Set dst (LoadF mem));
6222 
6223   ins_cost(145); // XXX
6224   format %{ "movss   $dst, $mem\t# float" %}
6225   opcode(0xF3, 0x0F, 0x10);
6226   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6227   ins_pipe(pipe_slow); // XXX
6228 %}
6229 
6230 // Load Double
6231 instruct loadD_partial(regD dst, memory mem)
6232 %{
6233   predicate(!UseXmmLoadAndClearUpper);
6234   match(Set dst (LoadD mem));
6235 
6236   ins_cost(145); // XXX
6237   format %{ "movlpd  $dst, $mem\t# double" %}
6238   opcode(0x66, 0x0F, 0x12);
6239   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6240   ins_pipe(pipe_slow); // XXX
6241 %}
6242 
6243 instruct loadD(regD dst, memory mem)
6244 %{
6245   predicate(UseXmmLoadAndClearUpper);
6246   match(Set dst (LoadD mem));
6247 
6248   ins_cost(145); // XXX
6249   format %{ "movsd   $dst, $mem\t# double" %}
6250   opcode(0xF2, 0x0F, 0x10);
6251   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6252   ins_pipe(pipe_slow); // XXX
6253 %}
6254 
6255 // Load Aligned Packed Byte to XMM register
6256 instruct loadA8B(regD dst, memory mem) %{
6257   match(Set dst (Load8B mem));
6258   ins_cost(125);
6259   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6260   ins_encode( movq_ld(dst, mem));
6261   ins_pipe( pipe_slow );
6262 %}
6263 
6264 // Load Aligned Packed Short to XMM register
6265 instruct loadA4S(regD dst, memory mem) %{
6266   match(Set dst (Load4S mem));
6267   ins_cost(125);
6268   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6269   ins_encode( movq_ld(dst, mem));
6270   ins_pipe( pipe_slow );
6271 %}
6272 
6273 // Load Aligned Packed Char to XMM register
6274 instruct loadA4C(regD dst, memory mem) %{
6275   match(Set dst (Load4C mem));
6276   ins_cost(125);
6277   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6278   ins_encode( movq_ld(dst, mem));
6279   ins_pipe( pipe_slow );
6280 %}
6281 
6282 // Load Aligned Packed Integer to XMM register
6283 instruct load2IU(regD dst, memory mem) %{
6284   match(Set dst (Load2I mem));
6285   ins_cost(125);
6286   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6287   ins_encode( movq_ld(dst, mem));
6288   ins_pipe( pipe_slow );
6289 %}
6290 
6291 // Load Aligned Packed Single to XMM
6292 instruct loadA2F(regD dst, memory mem) %{
6293   match(Set dst (Load2F mem));
6294   ins_cost(145);
6295   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6296   ins_encode( movq_ld(dst, mem));
6297   ins_pipe( pipe_slow );
6298 %}
6299 
6300 // Load Effective Address
6301 instruct leaP8(rRegP dst, indOffset8 mem)
6302 %{
6303   match(Set dst mem);
6304 
6305   ins_cost(110); // XXX
6306   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6307   opcode(0x8D);
6308   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6309   ins_pipe(ialu_reg_reg_fat);
6310 %}
6311 
6312 instruct leaP32(rRegP dst, indOffset32 mem)
6313 %{
6314   match(Set dst mem);
6315 
6316   ins_cost(110);
6317   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6318   opcode(0x8D);
6319   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6320   ins_pipe(ialu_reg_reg_fat);
6321 %}
6322 
6323 // instruct leaPIdx(rRegP dst, indIndex mem)
6324 // %{
6325 //   match(Set dst mem);
6326 
6327 //   ins_cost(110);
6328 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6329 //   opcode(0x8D);
6330 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6331 //   ins_pipe(ialu_reg_reg_fat);
6332 // %}
6333 
6334 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6335 %{
6336   match(Set dst mem);
6337 
6338   ins_cost(110);
6339   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6340   opcode(0x8D);
6341   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6342   ins_pipe(ialu_reg_reg_fat);
6343 %}
6344 
6345 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6346 %{
6347   match(Set dst mem);
6348 
6349   ins_cost(110);
6350   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6351   opcode(0x8D);
6352   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6353   ins_pipe(ialu_reg_reg_fat);
6354 %}
6355 
6356 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6357 %{
6358   match(Set dst mem);
6359 
6360   ins_cost(110);
6361   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6362   opcode(0x8D);
6363   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6364   ins_pipe(ialu_reg_reg_fat);
6365 %}
6366 
6367 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6368 %{
6369   match(Set dst mem);
6370 
6371   ins_cost(110);
6372   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6373   opcode(0x8D);
6374   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6375   ins_pipe(ialu_reg_reg_fat);
6376 %}
6377 
6378 // Load Effective Address which uses Narrow (32-bits) oop
6379 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6380 %{
6381   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6382   match(Set dst mem);
6383 
6384   ins_cost(110);
6385   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6386   opcode(0x8D);
6387   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6388   ins_pipe(ialu_reg_reg_fat);
6389 %}
6390 
6391 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6392 %{
6393   predicate(Universe::narrow_oop_shift() == 0);
6394   match(Set dst mem);
6395 
6396   ins_cost(110); // XXX
6397   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6398   opcode(0x8D);
6399   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6400   ins_pipe(ialu_reg_reg_fat);
6401 %}
6402 
6403 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6404 %{
6405   predicate(Universe::narrow_oop_shift() == 0);
6406   match(Set dst mem);
6407 
6408   ins_cost(110);
6409   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6410   opcode(0x8D);
6411   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6412   ins_pipe(ialu_reg_reg_fat);
6413 %}
6414 
6415 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6416 %{
6417   predicate(Universe::narrow_oop_shift() == 0);
6418   match(Set dst mem);
6419 
6420   ins_cost(110);
6421   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6422   opcode(0x8D);
6423   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6424   ins_pipe(ialu_reg_reg_fat);
6425 %}
6426 
6427 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6428 %{
6429   predicate(Universe::narrow_oop_shift() == 0);
6430   match(Set dst mem);
6431 
6432   ins_cost(110);
6433   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6434   opcode(0x8D);
6435   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6436   ins_pipe(ialu_reg_reg_fat);
6437 %}
6438 
6439 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6440 %{
6441   predicate(Universe::narrow_oop_shift() == 0);
6442   match(Set dst mem);
6443 
6444   ins_cost(110);
6445   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6446   opcode(0x8D);
6447   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6448   ins_pipe(ialu_reg_reg_fat);
6449 %}
6450 
6451 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6452 %{
6453   predicate(Universe::narrow_oop_shift() == 0);
6454   match(Set dst mem);
6455 
6456   ins_cost(110);
6457   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6458   opcode(0x8D);
6459   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6460   ins_pipe(ialu_reg_reg_fat);
6461 %}
6462 
6463 instruct loadConI(rRegI dst, immI src)
6464 %{
6465   match(Set dst src);
6466 
6467   format %{ "movl    $dst, $src\t# int" %}
6468   ins_encode(load_immI(dst, src));
6469   ins_pipe(ialu_reg_fat); // XXX
6470 %}
6471 
6472 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6473 %{
6474   match(Set dst src);
6475   effect(KILL cr);
6476 
6477   ins_cost(50);
6478   format %{ "xorl    $dst, $dst\t# int" %}
6479   opcode(0x33); /* + rd */
6480   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6481   ins_pipe(ialu_reg);
6482 %}
6483 
6484 instruct loadConL(rRegL dst, immL src)
6485 %{
6486   match(Set dst src);
6487 
6488   ins_cost(150);
6489   format %{ "movq    $dst, $src\t# long" %}
6490   ins_encode(load_immL(dst, src));
6491   ins_pipe(ialu_reg);
6492 %}
6493 
6494 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6495 %{
6496   match(Set dst src);
6497   effect(KILL cr);
6498 
6499   ins_cost(50);
6500   format %{ "xorl    $dst, $dst\t# long" %}
6501   opcode(0x33); /* + rd */
6502   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6503   ins_pipe(ialu_reg); // XXX
6504 %}
6505 
6506 instruct loadConUL32(rRegL dst, immUL32 src)
6507 %{
6508   match(Set dst src);
6509 
6510   ins_cost(60);
6511   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6512   ins_encode(load_immUL32(dst, src));
6513   ins_pipe(ialu_reg);
6514 %}
6515 
6516 instruct loadConL32(rRegL dst, immL32 src)
6517 %{
6518   match(Set dst src);
6519 
6520   ins_cost(70);
6521   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6522   ins_encode(load_immL32(dst, src));
6523   ins_pipe(ialu_reg);
6524 %}
6525 
6526 instruct loadConP(rRegP dst, immP con) %{
6527   match(Set dst con);
6528 
6529   format %{ "movq    $dst, $con\t# ptr" %}
6530   ins_encode(load_immP(dst, con));
6531   ins_pipe(ialu_reg_fat); // XXX
6532 %}
6533 
6534 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6535 %{
6536   match(Set dst src);
6537   effect(KILL cr);
6538 
6539   ins_cost(50);
6540   format %{ "xorl    $dst, $dst\t# ptr" %}
6541   opcode(0x33); /* + rd */
6542   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6543   ins_pipe(ialu_reg);
6544 %}
6545 
6546 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6547   match(Set dst src);
6548   format %{ "movq    $dst, $src\t!ptr" %}
6549   ins_encode %{
6550     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6551     __ lea($dst$$Register, polling_page);
6552   %}
6553   ins_pipe(ialu_reg_fat);
6554 %}
6555 
6556 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6557 %{
6558   match(Set dst src);
6559   effect(KILL cr);
6560 
6561   ins_cost(60);
6562   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6563   ins_encode(load_immP31(dst, src));
6564   ins_pipe(ialu_reg);
6565 %}
6566 
6567 instruct loadConF(regF dst, immF con) %{
6568   match(Set dst con);
6569   ins_cost(125);
6570   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6571   ins_encode %{
6572     __ movflt($dst$$XMMRegister, $constantaddress($con));
6573   %}
6574   ins_pipe(pipe_slow);
6575 %}
6576 
6577 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6578   match(Set dst src);
6579   effect(KILL cr);
6580   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6581   ins_encode %{
6582     __ xorq($dst$$Register, $dst$$Register);
6583   %}
6584   ins_pipe(ialu_reg);
6585 %}
6586 
6587 instruct loadConN(rRegN dst, immN src) %{
6588   match(Set dst src);
6589 
6590   ins_cost(125);
6591   format %{ "movl    $dst, $src\t# compressed ptr" %}
6592   ins_encode %{
6593     address con = (address)$src$$constant;
6594     if (con == NULL) {
6595       ShouldNotReachHere();
6596     } else {
6597       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6598     }
6599   %}
6600   ins_pipe(ialu_reg_fat); // XXX
6601 %}
6602 
6603 instruct loadConF0(regF dst, immF0 src)
6604 %{
6605   match(Set dst src);
6606   ins_cost(100);
6607 
6608   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6609   opcode(0x0F, 0x57);
6610   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6611   ins_pipe(pipe_slow);
6612 %}
6613 
6614 // Use the same format since predicate() can not be used here.
6615 instruct loadConD(regD dst, immD con) %{
6616   match(Set dst con);
6617   ins_cost(125);
6618   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6619   ins_encode %{
6620     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6621   %}
6622   ins_pipe(pipe_slow);
6623 %}
6624 
6625 instruct loadConD0(regD dst, immD0 src)
6626 %{
6627   match(Set dst src);
6628   ins_cost(100);
6629 
6630   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6631   opcode(0x66, 0x0F, 0x57);
6632   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6633   ins_pipe(pipe_slow);
6634 %}
6635 
6636 instruct loadSSI(rRegI dst, stackSlotI src)
6637 %{
6638   match(Set dst src);
6639 
6640   ins_cost(125);
6641   format %{ "movl    $dst, $src\t# int stk" %}
6642   opcode(0x8B);
6643   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6644   ins_pipe(ialu_reg_mem);
6645 %}
6646 
6647 instruct loadSSL(rRegL dst, stackSlotL src)
6648 %{
6649   match(Set dst src);
6650 
6651   ins_cost(125);
6652   format %{ "movq    $dst, $src\t# long stk" %}
6653   opcode(0x8B);
6654   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6655   ins_pipe(ialu_reg_mem);
6656 %}
6657 
6658 instruct loadSSP(rRegP dst, stackSlotP src)
6659 %{
6660   match(Set dst src);
6661 
6662   ins_cost(125);
6663   format %{ "movq    $dst, $src\t# ptr stk" %}
6664   opcode(0x8B);
6665   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6666   ins_pipe(ialu_reg_mem);
6667 %}
6668 
6669 instruct loadSSF(regF dst, stackSlotF src)
6670 %{
6671   match(Set dst src);
6672 
6673   ins_cost(125);
6674   format %{ "movss   $dst, $src\t# float stk" %}
6675   opcode(0xF3, 0x0F, 0x10);
6676   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6677   ins_pipe(pipe_slow); // XXX
6678 %}
6679 
6680 // Use the same format since predicate() can not be used here.
6681 instruct loadSSD(regD dst, stackSlotD src)
6682 %{
6683   match(Set dst src);
6684 
6685   ins_cost(125);
6686   format %{ "movsd   $dst, $src\t# double stk" %}
6687   ins_encode  %{
6688     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6689   %}
6690   ins_pipe(pipe_slow); // XXX
6691 %}
6692 
6693 // Prefetch instructions.
6694 // Must be safe to execute with invalid address (cannot fault).
6695 
6696 instruct prefetchr( memory mem ) %{
6697   predicate(ReadPrefetchInstr==3);
6698   match(PrefetchRead mem);
6699   ins_cost(125);
6700 
6701   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6702   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6703   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6704   ins_pipe(ialu_mem);
6705 %}
6706 
6707 instruct prefetchrNTA( memory mem ) %{
6708   predicate(ReadPrefetchInstr==0);
6709   match(PrefetchRead mem);
6710   ins_cost(125);
6711 
6712   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6713   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6714   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6715   ins_pipe(ialu_mem);
6716 %}
6717 
6718 instruct prefetchrT0( memory mem ) %{
6719   predicate(ReadPrefetchInstr==1);
6720   match(PrefetchRead mem);
6721   ins_cost(125);
6722 
6723   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6724   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6725   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6726   ins_pipe(ialu_mem);
6727 %}
6728 
6729 instruct prefetchrT2( memory mem ) %{
6730   predicate(ReadPrefetchInstr==2);
6731   match(PrefetchRead mem);
6732   ins_cost(125);
6733 
6734   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6735   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6736   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6737   ins_pipe(ialu_mem);
6738 %}
6739 
6740 instruct prefetchw( memory mem ) %{
6741   predicate(AllocatePrefetchInstr==3);
6742   match(PrefetchWrite mem);
6743   ins_cost(125);
6744 
6745   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6746   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6747   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6748   ins_pipe(ialu_mem);
6749 %}
6750 
6751 instruct prefetchwNTA( memory mem ) %{
6752   predicate(AllocatePrefetchInstr==0);
6753   match(PrefetchWrite mem);
6754   ins_cost(125);
6755 
6756   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6757   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6758   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6759   ins_pipe(ialu_mem);
6760 %}
6761 
6762 instruct prefetchwT0( memory mem ) %{
6763   predicate(AllocatePrefetchInstr==1);
6764   match(PrefetchWrite mem);
6765   ins_cost(125);
6766 
6767   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6768   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6769   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6770   ins_pipe(ialu_mem);
6771 %}
6772 
6773 instruct prefetchwT2( memory mem ) %{
6774   predicate(AllocatePrefetchInstr==2);
6775   match(PrefetchWrite mem);
6776   ins_cost(125);
6777 
6778   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6779   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6780   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6781   ins_pipe(ialu_mem);
6782 %}
6783 
6784 //----------Store Instructions-------------------------------------------------
6785 
6786 // Store Byte
6787 instruct storeB(memory mem, rRegI src)
6788 %{
6789   match(Set mem (StoreB mem src));
6790 
6791   ins_cost(125); // XXX
6792   format %{ "movb    $mem, $src\t# byte" %}
6793   opcode(0x88);
6794   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6795   ins_pipe(ialu_mem_reg);
6796 %}
6797 
6798 // Store Char/Short
6799 instruct storeC(memory mem, rRegI src)
6800 %{
6801   match(Set mem (StoreC mem src));
6802 
6803   ins_cost(125); // XXX
6804   format %{ "movw    $mem, $src\t# char/short" %}
6805   opcode(0x89);
6806   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6807   ins_pipe(ialu_mem_reg);
6808 %}
6809 
6810 // Store Integer
6811 instruct storeI(memory mem, rRegI src)
6812 %{
6813   match(Set mem (StoreI mem src));
6814 
6815   ins_cost(125); // XXX
6816   format %{ "movl    $mem, $src\t# int" %}
6817   opcode(0x89);
6818   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6819   ins_pipe(ialu_mem_reg);
6820 %}
6821 
6822 // Store Long
6823 instruct storeL(memory mem, rRegL src)
6824 %{
6825   match(Set mem (StoreL mem src));
6826 
6827   ins_cost(125); // XXX
6828   format %{ "movq    $mem, $src\t# long" %}
6829   opcode(0x89);
6830   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6831   ins_pipe(ialu_mem_reg); // XXX
6832 %}
6833 
6834 // Store Pointer
6835 instruct storeP(memory mem, any_RegP src)
6836 %{
6837   match(Set mem (StoreP mem src));
6838 
6839   ins_cost(125); // XXX
6840   format %{ "movq    $mem, $src\t# ptr" %}
6841   opcode(0x89);
6842   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6843   ins_pipe(ialu_mem_reg);
6844 %}
6845 
6846 instruct storeImmP0(memory mem, immP0 zero)
6847 %{
6848   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6849   match(Set mem (StoreP mem zero));
6850 
6851   ins_cost(125); // XXX
6852   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6853   ins_encode %{
6854     __ movq($mem$$Address, r12);
6855   %}
6856   ins_pipe(ialu_mem_reg);
6857 %}
6858 
6859 // Store NULL Pointer, mark word, or other simple pointer constant.
6860 instruct storeImmP(memory mem, immP31 src)
6861 %{
6862   match(Set mem (StoreP mem src));
6863 
6864   ins_cost(150); // XXX
6865   format %{ "movq    $mem, $src\t# ptr" %}
6866   opcode(0xC7); /* C7 /0 */
6867   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6868   ins_pipe(ialu_mem_imm);
6869 %}
6870 
6871 // Store Compressed Pointer
6872 instruct storeN(memory mem, rRegN src)
6873 %{
6874   match(Set mem (StoreN mem src));
6875 
6876   ins_cost(125); // XXX
6877   format %{ "movl    $mem, $src\t# compressed ptr" %}
6878   ins_encode %{
6879     __ movl($mem$$Address, $src$$Register);
6880   %}
6881   ins_pipe(ialu_mem_reg);
6882 %}
6883 
6884 instruct storeImmN0(memory mem, immN0 zero)
6885 %{
6886   predicate(Universe::narrow_oop_base() == NULL);
6887   match(Set mem (StoreN mem zero));
6888 
6889   ins_cost(125); // XXX
6890   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6891   ins_encode %{
6892     __ movl($mem$$Address, r12);
6893   %}
6894   ins_pipe(ialu_mem_reg);
6895 %}
6896 
6897 instruct storeImmN(memory mem, immN src)
6898 %{
6899   match(Set mem (StoreN mem src));
6900 
6901   ins_cost(150); // XXX
6902   format %{ "movl    $mem, $src\t# compressed ptr" %}
6903   ins_encode %{
6904     address con = (address)$src$$constant;
6905     if (con == NULL) {
6906       __ movl($mem$$Address, (int32_t)0);
6907     } else {
6908       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6909     }
6910   %}
6911   ins_pipe(ialu_mem_imm);
6912 %}
6913 
6914 // Store Integer Immediate
6915 instruct storeImmI0(memory mem, immI0 zero)
6916 %{
6917   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6918   match(Set mem (StoreI mem zero));
6919 
6920   ins_cost(125); // XXX
6921   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6922   ins_encode %{
6923     __ movl($mem$$Address, r12);
6924   %}
6925   ins_pipe(ialu_mem_reg);
6926 %}
6927 
6928 instruct storeImmI(memory mem, immI src)
6929 %{
6930   match(Set mem (StoreI mem src));
6931 
6932   ins_cost(150);
6933   format %{ "movl    $mem, $src\t# int" %}
6934   opcode(0xC7); /* C7 /0 */
6935   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6936   ins_pipe(ialu_mem_imm);
6937 %}
6938 
6939 // Store Long Immediate
6940 instruct storeImmL0(memory mem, immL0 zero)
6941 %{
6942   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6943   match(Set mem (StoreL mem zero));
6944 
6945   ins_cost(125); // XXX
6946   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6947   ins_encode %{
6948     __ movq($mem$$Address, r12);
6949   %}
6950   ins_pipe(ialu_mem_reg);
6951 %}
6952 
6953 instruct storeImmL(memory mem, immL32 src)
6954 %{
6955   match(Set mem (StoreL mem src));
6956 
6957   ins_cost(150);
6958   format %{ "movq    $mem, $src\t# long" %}
6959   opcode(0xC7); /* C7 /0 */
6960   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6961   ins_pipe(ialu_mem_imm);
6962 %}
6963 
6964 // Store Short/Char Immediate
6965 instruct storeImmC0(memory mem, immI0 zero)
6966 %{
6967   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6968   match(Set mem (StoreC mem zero));
6969 
6970   ins_cost(125); // XXX
6971   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6972   ins_encode %{
6973     __ movw($mem$$Address, r12);
6974   %}
6975   ins_pipe(ialu_mem_reg);
6976 %}
6977 
6978 instruct storeImmI16(memory mem, immI16 src)
6979 %{
6980   predicate(UseStoreImmI16);
6981   match(Set mem (StoreC mem src));
6982 
6983   ins_cost(150);
6984   format %{ "movw    $mem, $src\t# short/char" %}
6985   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6986   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6987   ins_pipe(ialu_mem_imm);
6988 %}
6989 
6990 // Store Byte Immediate
6991 instruct storeImmB0(memory mem, immI0 zero)
6992 %{
6993   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6994   match(Set mem (StoreB mem zero));
6995 
6996   ins_cost(125); // XXX
6997   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6998   ins_encode %{
6999     __ movb($mem$$Address, r12);
7000   %}
7001   ins_pipe(ialu_mem_reg);
7002 %}
7003 
7004 instruct storeImmB(memory mem, immI8 src)
7005 %{
7006   match(Set mem (StoreB mem src));
7007 
7008   ins_cost(150); // XXX
7009   format %{ "movb    $mem, $src\t# byte" %}
7010   opcode(0xC6); /* C6 /0 */
7011   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7012   ins_pipe(ialu_mem_imm);
7013 %}
7014 
7015 // Store Aligned Packed Byte XMM register to memory
7016 instruct storeA8B(memory mem, regD src) %{
7017   match(Set mem (Store8B mem src));
7018   ins_cost(145);
7019   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7020   ins_encode( movq_st(mem, src));
7021   ins_pipe( pipe_slow );
7022 %}
7023 
7024 // Store Aligned Packed Char/Short XMM register to memory
7025 instruct storeA4C(memory mem, regD src) %{
7026   match(Set mem (Store4C mem src));
7027   ins_cost(145);
7028   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7029   ins_encode( movq_st(mem, src));
7030   ins_pipe( pipe_slow );
7031 %}
7032 
7033 // Store Aligned Packed Integer XMM register to memory
7034 instruct storeA2I(memory mem, regD src) %{
7035   match(Set mem (Store2I mem src));
7036   ins_cost(145);
7037   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7038   ins_encode( movq_st(mem, src));
7039   ins_pipe( pipe_slow );
7040 %}
7041 
7042 // Store CMS card-mark Immediate
7043 instruct storeImmCM0_reg(memory mem, immI0 zero)
7044 %{
7045   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7046   match(Set mem (StoreCM mem zero));
7047 
7048   ins_cost(125); // XXX
7049   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7050   ins_encode %{
7051     __ movb($mem$$Address, r12);
7052   %}
7053   ins_pipe(ialu_mem_reg);
7054 %}
7055 
7056 instruct storeImmCM0(memory mem, immI0 src)
7057 %{
7058   match(Set mem (StoreCM mem src));
7059 
7060   ins_cost(150); // XXX
7061   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7062   opcode(0xC6); /* C6 /0 */
7063   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7064   ins_pipe(ialu_mem_imm);
7065 %}
7066 
7067 // Store Aligned Packed Single Float XMM register to memory
7068 instruct storeA2F(memory mem, regD src) %{
7069   match(Set mem (Store2F mem src));
7070   ins_cost(145);
7071   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7072   ins_encode( movq_st(mem, src));
7073   ins_pipe( pipe_slow );
7074 %}
7075 
7076 // Store Float
7077 instruct storeF(memory mem, regF src)
7078 %{
7079   match(Set mem (StoreF mem src));
7080 
7081   ins_cost(95); // XXX
7082   format %{ "movss   $mem, $src\t# float" %}
7083   opcode(0xF3, 0x0F, 0x11);
7084   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7085   ins_pipe(pipe_slow); // XXX
7086 %}
7087 
7088 // Store immediate Float value (it is faster than store from XMM register)
7089 instruct storeF0(memory mem, immF0 zero)
7090 %{
7091   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7092   match(Set mem (StoreF mem zero));
7093 
7094   ins_cost(25); // XXX
7095   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7096   ins_encode %{
7097     __ movl($mem$$Address, r12);
7098   %}
7099   ins_pipe(ialu_mem_reg);
7100 %}
7101 
7102 instruct storeF_imm(memory mem, immF src)
7103 %{
7104   match(Set mem (StoreF mem src));
7105 
7106   ins_cost(50);
7107   format %{ "movl    $mem, $src\t# float" %}
7108   opcode(0xC7); /* C7 /0 */
7109   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7110   ins_pipe(ialu_mem_imm);
7111 %}
7112 
7113 // Store Double
7114 instruct storeD(memory mem, regD src)
7115 %{
7116   match(Set mem (StoreD mem src));
7117 
7118   ins_cost(95); // XXX
7119   format %{ "movsd   $mem, $src\t# double" %}
7120   opcode(0xF2, 0x0F, 0x11);
7121   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7122   ins_pipe(pipe_slow); // XXX
7123 %}
7124 
7125 // Store immediate double 0.0 (it is faster than store from XMM register)
7126 instruct storeD0_imm(memory mem, immD0 src)
7127 %{
7128   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7129   match(Set mem (StoreD mem src));
7130 
7131   ins_cost(50);
7132   format %{ "movq    $mem, $src\t# double 0." %}
7133   opcode(0xC7); /* C7 /0 */
7134   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7135   ins_pipe(ialu_mem_imm);
7136 %}
7137 
7138 instruct storeD0(memory mem, immD0 zero)
7139 %{
7140   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7141   match(Set mem (StoreD mem zero));
7142 
7143   ins_cost(25); // XXX
7144   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7145   ins_encode %{
7146     __ movq($mem$$Address, r12);
7147   %}
7148   ins_pipe(ialu_mem_reg);
7149 %}
7150 
7151 instruct storeSSI(stackSlotI dst, rRegI src)
7152 %{
7153   match(Set dst src);
7154 
7155   ins_cost(100);
7156   format %{ "movl    $dst, $src\t# int stk" %}
7157   opcode(0x89);
7158   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7159   ins_pipe( ialu_mem_reg );
7160 %}
7161 
7162 instruct storeSSL(stackSlotL dst, rRegL src)
7163 %{
7164   match(Set dst src);
7165 
7166   ins_cost(100);
7167   format %{ "movq    $dst, $src\t# long stk" %}
7168   opcode(0x89);
7169   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7170   ins_pipe(ialu_mem_reg);
7171 %}
7172 
7173 instruct storeSSP(stackSlotP dst, rRegP src)
7174 %{
7175   match(Set dst src);
7176 
7177   ins_cost(100);
7178   format %{ "movq    $dst, $src\t# ptr stk" %}
7179   opcode(0x89);
7180   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7181   ins_pipe(ialu_mem_reg);
7182 %}
7183 
7184 instruct storeSSF(stackSlotF dst, regF src)
7185 %{
7186   match(Set dst src);
7187 
7188   ins_cost(95); // XXX
7189   format %{ "movss   $dst, $src\t# float stk" %}
7190   opcode(0xF3, 0x0F, 0x11);
7191   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7192   ins_pipe(pipe_slow); // XXX
7193 %}
7194 
7195 instruct storeSSD(stackSlotD dst, regD src)
7196 %{
7197   match(Set dst src);
7198 
7199   ins_cost(95); // XXX
7200   format %{ "movsd   $dst, $src\t# double stk" %}
7201   opcode(0xF2, 0x0F, 0x11);
7202   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7203   ins_pipe(pipe_slow); // XXX
7204 %}
7205 
7206 //----------BSWAP Instructions-------------------------------------------------
7207 instruct bytes_reverse_int(rRegI dst) %{
7208   match(Set dst (ReverseBytesI dst));
7209 
7210   format %{ "bswapl  $dst" %}
7211   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7212   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7213   ins_pipe( ialu_reg );
7214 %}
7215 
7216 instruct bytes_reverse_long(rRegL dst) %{
7217   match(Set dst (ReverseBytesL dst));
7218 
7219   format %{ "bswapq  $dst" %}
7220 
7221   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7222   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7223   ins_pipe( ialu_reg);
7224 %}
7225 
7226 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7227   match(Set dst (ReverseBytesUS dst));
7228 
7229   format %{ "bswapl  $dst\n\t"
7230             "shrl    $dst,16\n\t" %}
7231   ins_encode %{
7232     __ bswapl($dst$$Register);
7233     __ shrl($dst$$Register, 16);
7234   %}
7235   ins_pipe( ialu_reg );
7236 %}
7237 
7238 instruct bytes_reverse_short(rRegI dst) %{
7239   match(Set dst (ReverseBytesS dst));
7240 
7241   format %{ "bswapl  $dst\n\t"
7242             "sar     $dst,16\n\t" %}
7243   ins_encode %{
7244     __ bswapl($dst$$Register);
7245     __ sarl($dst$$Register, 16);
7246   %}
7247   ins_pipe( ialu_reg );
7248 %}
7249 
7250 //---------- Zeros Count Instructions ------------------------------------------
7251 
7252 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7253   predicate(UseCountLeadingZerosInstruction);
7254   match(Set dst (CountLeadingZerosI src));
7255   effect(KILL cr);
7256 
7257   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7258   ins_encode %{
7259     __ lzcntl($dst$$Register, $src$$Register);
7260   %}
7261   ins_pipe(ialu_reg);
7262 %}
7263 
7264 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7265   predicate(!UseCountLeadingZerosInstruction);
7266   match(Set dst (CountLeadingZerosI src));
7267   effect(KILL cr);
7268 
7269   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7270             "jnz     skip\n\t"
7271             "movl    $dst, -1\n"
7272       "skip:\n\t"
7273             "negl    $dst\n\t"
7274             "addl    $dst, 31" %}
7275   ins_encode %{
7276     Register Rdst = $dst$$Register;
7277     Register Rsrc = $src$$Register;
7278     Label skip;
7279     __ bsrl(Rdst, Rsrc);
7280     __ jccb(Assembler::notZero, skip);
7281     __ movl(Rdst, -1);
7282     __ bind(skip);
7283     __ negl(Rdst);
7284     __ addl(Rdst, BitsPerInt - 1);
7285   %}
7286   ins_pipe(ialu_reg);
7287 %}
7288 
7289 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7290   predicate(UseCountLeadingZerosInstruction);
7291   match(Set dst (CountLeadingZerosL src));
7292   effect(KILL cr);
7293 
7294   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7295   ins_encode %{
7296     __ lzcntq($dst$$Register, $src$$Register);
7297   %}
7298   ins_pipe(ialu_reg);
7299 %}
7300 
7301 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7302   predicate(!UseCountLeadingZerosInstruction);
7303   match(Set dst (CountLeadingZerosL src));
7304   effect(KILL cr);
7305 
7306   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7307             "jnz     skip\n\t"
7308             "movl    $dst, -1\n"
7309       "skip:\n\t"
7310             "negl    $dst\n\t"
7311             "addl    $dst, 63" %}
7312   ins_encode %{
7313     Register Rdst = $dst$$Register;
7314     Register Rsrc = $src$$Register;
7315     Label skip;
7316     __ bsrq(Rdst, Rsrc);
7317     __ jccb(Assembler::notZero, skip);
7318     __ movl(Rdst, -1);
7319     __ bind(skip);
7320     __ negl(Rdst);
7321     __ addl(Rdst, BitsPerLong - 1);
7322   %}
7323   ins_pipe(ialu_reg);
7324 %}
7325 
7326 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7327   match(Set dst (CountTrailingZerosI src));
7328   effect(KILL cr);
7329 
7330   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7331             "jnz     done\n\t"
7332             "movl    $dst, 32\n"
7333       "done:" %}
7334   ins_encode %{
7335     Register Rdst = $dst$$Register;
7336     Label done;
7337     __ bsfl(Rdst, $src$$Register);
7338     __ jccb(Assembler::notZero, done);
7339     __ movl(Rdst, BitsPerInt);
7340     __ bind(done);
7341   %}
7342   ins_pipe(ialu_reg);
7343 %}
7344 
7345 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7346   match(Set dst (CountTrailingZerosL src));
7347   effect(KILL cr);
7348 
7349   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7350             "jnz     done\n\t"
7351             "movl    $dst, 64\n"
7352       "done:" %}
7353   ins_encode %{
7354     Register Rdst = $dst$$Register;
7355     Label done;
7356     __ bsfq(Rdst, $src$$Register);
7357     __ jccb(Assembler::notZero, done);
7358     __ movl(Rdst, BitsPerLong);
7359     __ bind(done);
7360   %}
7361   ins_pipe(ialu_reg);
7362 %}
7363 
7364 
7365 //---------- Population Count Instructions -------------------------------------
7366 
7367 instruct popCountI(rRegI dst, rRegI src) %{
7368   predicate(UsePopCountInstruction);
7369   match(Set dst (PopCountI src));
7370 
7371   format %{ "popcnt  $dst, $src" %}
7372   ins_encode %{
7373     __ popcntl($dst$$Register, $src$$Register);
7374   %}
7375   ins_pipe(ialu_reg);
7376 %}
7377 
7378 instruct popCountI_mem(rRegI dst, memory mem) %{
7379   predicate(UsePopCountInstruction);
7380   match(Set dst (PopCountI (LoadI mem)));
7381 
7382   format %{ "popcnt  $dst, $mem" %}
7383   ins_encode %{
7384     __ popcntl($dst$$Register, $mem$$Address);
7385   %}
7386   ins_pipe(ialu_reg);
7387 %}
7388 
7389 // Note: Long.bitCount(long) returns an int.
7390 instruct popCountL(rRegI dst, rRegL src) %{
7391   predicate(UsePopCountInstruction);
7392   match(Set dst (PopCountL src));
7393 
7394   format %{ "popcnt  $dst, $src" %}
7395   ins_encode %{
7396     __ popcntq($dst$$Register, $src$$Register);
7397   %}
7398   ins_pipe(ialu_reg);
7399 %}
7400 
7401 // Note: Long.bitCount(long) returns an int.
7402 instruct popCountL_mem(rRegI dst, memory mem) %{
7403   predicate(UsePopCountInstruction);
7404   match(Set dst (PopCountL (LoadL mem)));
7405 
7406   format %{ "popcnt  $dst, $mem" %}
7407   ins_encode %{
7408     __ popcntq($dst$$Register, $mem$$Address);
7409   %}
7410   ins_pipe(ialu_reg);
7411 %}
7412 
7413 
7414 //----------MemBar Instructions-----------------------------------------------
7415 // Memory barrier flavors
7416 
7417 instruct membar_acquire()
7418 %{
7419   match(MemBarAcquire);
7420   ins_cost(0);
7421 
7422   size(0);
7423   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7424   ins_encode();
7425   ins_pipe(empty);
7426 %}
7427 
7428 instruct membar_acquire_lock()
7429 %{
7430   match(MemBarAcquire);
7431   predicate(Matcher::prior_fast_lock(n));
7432   ins_cost(0);
7433 
7434   size(0);
7435   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7436   ins_encode();
7437   ins_pipe(empty);
7438 %}
7439 
7440 instruct membar_release()
7441 %{
7442   match(MemBarRelease);
7443   ins_cost(0);
7444 
7445   size(0);
7446   format %{ "MEMBAR-release ! (empty encoding)" %}
7447   ins_encode();
7448   ins_pipe(empty);
7449 %}
7450 
7451 instruct membar_release_lock()
7452 %{
7453   match(MemBarRelease);
7454   predicate(Matcher::post_fast_unlock(n));
7455   ins_cost(0);
7456 
7457   size(0);
7458   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7459   ins_encode();
7460   ins_pipe(empty);
7461 %}
7462 
7463 instruct membar_volatile(rFlagsReg cr) %{
7464   match(MemBarVolatile);
7465   effect(KILL cr);
7466   ins_cost(400);
7467 
7468   format %{
7469     $$template
7470     if (os::is_MP()) {
7471       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7472     } else {
7473       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7474     }
7475   %}
7476   ins_encode %{
7477     __ membar(Assembler::StoreLoad);
7478   %}
7479   ins_pipe(pipe_slow);
7480 %}
7481 
7482 instruct unnecessary_membar_volatile()
7483 %{
7484   match(MemBarVolatile);
7485   predicate(Matcher::post_store_load_barrier(n));
7486   ins_cost(0);
7487 
7488   size(0);
7489   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7490   ins_encode();
7491   ins_pipe(empty);
7492 %}
7493 
7494 //----------Move Instructions--------------------------------------------------
7495 
7496 instruct castX2P(rRegP dst, rRegL src)
7497 %{
7498   match(Set dst (CastX2P src));
7499 
7500   format %{ "movq    $dst, $src\t# long->ptr" %}
7501   ins_encode(enc_copy_wide(dst, src));
7502   ins_pipe(ialu_reg_reg); // XXX
7503 %}
7504 
7505 instruct castP2X(rRegL dst, rRegP src)
7506 %{
7507   match(Set dst (CastP2X src));
7508 
7509   format %{ "movq    $dst, $src\t# ptr -> long" %}
7510   ins_encode(enc_copy_wide(dst, src));
7511   ins_pipe(ialu_reg_reg); // XXX
7512 %}
7513 
7514 
7515 // Convert oop pointer into compressed form
7516 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7517   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7518   match(Set dst (EncodeP src));
7519   effect(KILL cr);
7520   format %{ "encode_heap_oop $dst,$src" %}
7521   ins_encode %{
7522     Register s = $src$$Register;
7523     Register d = $dst$$Register;
7524     if (s != d) {
7525       __ movq(d, s);
7526     }
7527     __ encode_heap_oop(d);
7528   %}
7529   ins_pipe(ialu_reg_long);
7530 %}
7531 
7532 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7533   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7534   match(Set dst (EncodeP src));
7535   effect(KILL cr);
7536   format %{ "encode_heap_oop_not_null $dst,$src" %}
7537   ins_encode %{
7538     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7539   %}
7540   ins_pipe(ialu_reg_long);
7541 %}
7542 
7543 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7544   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7545             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7546   match(Set dst (DecodeN src));
7547   effect(KILL cr);
7548   format %{ "decode_heap_oop $dst,$src" %}
7549   ins_encode %{
7550     Register s = $src$$Register;
7551     Register d = $dst$$Register;
7552     if (s != d) {
7553       __ movq(d, s);
7554     }
7555     __ decode_heap_oop(d);
7556   %}
7557   ins_pipe(ialu_reg_long);
7558 %}
7559 
7560 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7561   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7562             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7563   match(Set dst (DecodeN src));
7564   effect(KILL cr);
7565   format %{ "decode_heap_oop_not_null $dst,$src" %}
7566   ins_encode %{
7567     Register s = $src$$Register;
7568     Register d = $dst$$Register;
7569     if (s != d) {
7570       __ decode_heap_oop_not_null(d, s);
7571     } else {
7572       __ decode_heap_oop_not_null(d);
7573     }
7574   %}
7575   ins_pipe(ialu_reg_long);
7576 %}
7577 
7578 
7579 //----------Conditional Move---------------------------------------------------
7580 // Jump
7581 // dummy instruction for generating temp registers
7582 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7583   match(Jump (LShiftL switch_val shift));
7584   ins_cost(350);
7585   predicate(false);
7586   effect(TEMP dest);
7587 
7588   format %{ "leaq    $dest, [$constantaddress]\n\t"
7589             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7590   ins_encode %{
7591     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7592     // to do that and the compiler is using that register as one it can allocate.
7593     // So we build it all by hand.
7594     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7595     // ArrayAddress dispatch(table, index);
7596     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7597     __ lea($dest$$Register, $constantaddress);
7598     __ jmp(dispatch);
7599   %}
7600   ins_pipe(pipe_jmp);
7601   ins_pc_relative(1);
7602 %}
7603 
7604 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7605   match(Jump (AddL (LShiftL switch_val shift) offset));
7606   ins_cost(350);
7607   effect(TEMP dest);
7608 
7609   format %{ "leaq    $dest, [$constantaddress]\n\t"
7610             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7611   ins_encode %{
7612     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7613     // to do that and the compiler is using that register as one it can allocate.
7614     // So we build it all by hand.
7615     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7616     // ArrayAddress dispatch(table, index);
7617     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7618     __ lea($dest$$Register, $constantaddress);
7619     __ jmp(dispatch);
7620   %}
7621   ins_pipe(pipe_jmp);
7622   ins_pc_relative(1);
7623 %}
7624 
7625 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7626   match(Jump switch_val);
7627   ins_cost(350);
7628   effect(TEMP dest);
7629 
7630   format %{ "leaq    $dest, [$constantaddress]\n\t"
7631             "jmp     [$dest + $switch_val]\n\t" %}
7632   ins_encode %{
7633     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7634     // to do that and the compiler is using that register as one it can allocate.
7635     // So we build it all by hand.
7636     // Address index(noreg, switch_reg, Address::times_1);
7637     // ArrayAddress dispatch(table, index);
7638     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7639     __ lea($dest$$Register, $constantaddress);
7640     __ jmp(dispatch);
7641   %}
7642   ins_pipe(pipe_jmp);
7643   ins_pc_relative(1);
7644 %}
7645 
7646 // Conditional move
7647 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7648 %{
7649   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7650 
7651   ins_cost(200); // XXX
7652   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7653   opcode(0x0F, 0x40);
7654   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7655   ins_pipe(pipe_cmov_reg);
7656 %}
7657 
7658 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7659   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7660 
7661   ins_cost(200); // XXX
7662   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7663   opcode(0x0F, 0x40);
7664   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7665   ins_pipe(pipe_cmov_reg);
7666 %}
7667 
7668 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7669   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7670   ins_cost(200);
7671   expand %{
7672     cmovI_regU(cop, cr, dst, src);
7673   %}
7674 %}
7675 
7676 // Conditional move
7677 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7678   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7679 
7680   ins_cost(250); // XXX
7681   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7682   opcode(0x0F, 0x40);
7683   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7684   ins_pipe(pipe_cmov_mem);
7685 %}
7686 
7687 // Conditional move
7688 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7689 %{
7690   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7691 
7692   ins_cost(250); // XXX
7693   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7694   opcode(0x0F, 0x40);
7695   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7696   ins_pipe(pipe_cmov_mem);
7697 %}
7698 
7699 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7700   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7701   ins_cost(250);
7702   expand %{
7703     cmovI_memU(cop, cr, dst, src);
7704   %}
7705 %}
7706 
7707 // Conditional move
7708 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7709 %{
7710   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7711 
7712   ins_cost(200); // XXX
7713   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7714   opcode(0x0F, 0x40);
7715   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7716   ins_pipe(pipe_cmov_reg);
7717 %}
7718 
7719 // Conditional move
7720 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7721 %{
7722   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7723 
7724   ins_cost(200); // XXX
7725   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7726   opcode(0x0F, 0x40);
7727   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7728   ins_pipe(pipe_cmov_reg);
7729 %}
7730 
7731 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7732   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7733   ins_cost(200);
7734   expand %{
7735     cmovN_regU(cop, cr, dst, src);
7736   %}
7737 %}
7738 
7739 // Conditional move
7740 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7741 %{
7742   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7743 
7744   ins_cost(200); // XXX
7745   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7746   opcode(0x0F, 0x40);
7747   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7748   ins_pipe(pipe_cmov_reg);  // XXX
7749 %}
7750 
7751 // Conditional move
7752 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7753 %{
7754   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7755 
7756   ins_cost(200); // XXX
7757   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7758   opcode(0x0F, 0x40);
7759   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7760   ins_pipe(pipe_cmov_reg); // XXX
7761 %}
7762 
7763 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7764   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7765   ins_cost(200);
7766   expand %{
7767     cmovP_regU(cop, cr, dst, src);
7768   %}
7769 %}
7770 
7771 // DISABLED: Requires the ADLC to emit a bottom_type call that
7772 // correctly meets the two pointer arguments; one is an incoming
7773 // register but the other is a memory operand.  ALSO appears to
7774 // be buggy with implicit null checks.
7775 //
7776 //// Conditional move
7777 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7778 //%{
7779 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7780 //  ins_cost(250);
7781 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7782 //  opcode(0x0F,0x40);
7783 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7784 //  ins_pipe( pipe_cmov_mem );
7785 //%}
7786 //
7787 //// Conditional move
7788 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7789 //%{
7790 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7791 //  ins_cost(250);
7792 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7793 //  opcode(0x0F,0x40);
7794 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7795 //  ins_pipe( pipe_cmov_mem );
7796 //%}
7797 
7798 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7799 %{
7800   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7801 
7802   ins_cost(200); // XXX
7803   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7804   opcode(0x0F, 0x40);
7805   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7806   ins_pipe(pipe_cmov_reg);  // XXX
7807 %}
7808 
7809 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7810 %{
7811   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7812 
7813   ins_cost(200); // XXX
7814   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7815   opcode(0x0F, 0x40);
7816   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7817   ins_pipe(pipe_cmov_mem);  // XXX
7818 %}
7819 
7820 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7821 %{
7822   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7823 
7824   ins_cost(200); // XXX
7825   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7826   opcode(0x0F, 0x40);
7827   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7828   ins_pipe(pipe_cmov_reg); // XXX
7829 %}
7830 
7831 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7832   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7833   ins_cost(200);
7834   expand %{
7835     cmovL_regU(cop, cr, dst, src);
7836   %}
7837 %}
7838 
7839 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7840 %{
7841   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7842 
7843   ins_cost(200); // XXX
7844   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7845   opcode(0x0F, 0x40);
7846   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7847   ins_pipe(pipe_cmov_mem); // XXX
7848 %}
7849 
7850 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7851   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7852   ins_cost(200);
7853   expand %{
7854     cmovL_memU(cop, cr, dst, src);
7855   %}
7856 %}
7857 
7858 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7859 %{
7860   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7861 
7862   ins_cost(200); // XXX
7863   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7864             "movss     $dst, $src\n"
7865     "skip:" %}
7866   ins_encode(enc_cmovf_branch(cop, dst, src));
7867   ins_pipe(pipe_slow);
7868 %}
7869 
7870 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7871 // %{
7872 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7873 
7874 //   ins_cost(200); // XXX
7875 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7876 //             "movss     $dst, $src\n"
7877 //     "skip:" %}
7878 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7879 //   ins_pipe(pipe_slow);
7880 // %}
7881 
7882 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7883 %{
7884   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7885 
7886   ins_cost(200); // XXX
7887   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7888             "movss     $dst, $src\n"
7889     "skip:" %}
7890   ins_encode(enc_cmovf_branch(cop, dst, src));
7891   ins_pipe(pipe_slow);
7892 %}
7893 
7894 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7895   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7896   ins_cost(200);
7897   expand %{
7898     cmovF_regU(cop, cr, dst, src);
7899   %}
7900 %}
7901 
7902 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7903 %{
7904   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7905 
7906   ins_cost(200); // XXX
7907   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7908             "movsd     $dst, $src\n"
7909     "skip:" %}
7910   ins_encode(enc_cmovd_branch(cop, dst, src));
7911   ins_pipe(pipe_slow);
7912 %}
7913 
7914 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7915 %{
7916   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7917 
7918   ins_cost(200); // XXX
7919   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7920             "movsd     $dst, $src\n"
7921     "skip:" %}
7922   ins_encode(enc_cmovd_branch(cop, dst, src));
7923   ins_pipe(pipe_slow);
7924 %}
7925 
7926 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7927   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7928   ins_cost(200);
7929   expand %{
7930     cmovD_regU(cop, cr, dst, src);
7931   %}
7932 %}
7933 
7934 //----------Arithmetic Instructions--------------------------------------------
7935 //----------Addition Instructions----------------------------------------------
7936 
7937 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7938 %{
7939   match(Set dst (AddI dst src));
7940   effect(KILL cr);
7941 
7942   format %{ "addl    $dst, $src\t# int" %}
7943   opcode(0x03);
7944   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7945   ins_pipe(ialu_reg_reg);
7946 %}
7947 
7948 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7949 %{
7950   match(Set dst (AddI dst src));
7951   effect(KILL cr);
7952 
7953   format %{ "addl    $dst, $src\t# int" %}
7954   opcode(0x81, 0x00); /* /0 id */
7955   ins_encode(OpcSErm(dst, src), Con8or32(src));
7956   ins_pipe( ialu_reg );
7957 %}
7958 
7959 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7960 %{
7961   match(Set dst (AddI dst (LoadI src)));
7962   effect(KILL cr);
7963 
7964   ins_cost(125); // XXX
7965   format %{ "addl    $dst, $src\t# int" %}
7966   opcode(0x03);
7967   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7968   ins_pipe(ialu_reg_mem);
7969 %}
7970 
7971 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7972 %{
7973   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7974   effect(KILL cr);
7975 
7976   ins_cost(150); // XXX
7977   format %{ "addl    $dst, $src\t# int" %}
7978   opcode(0x01); /* Opcode 01 /r */
7979   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7980   ins_pipe(ialu_mem_reg);
7981 %}
7982 
7983 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7984 %{
7985   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7986   effect(KILL cr);
7987 
7988   ins_cost(125); // XXX
7989   format %{ "addl    $dst, $src\t# int" %}
7990   opcode(0x81); /* Opcode 81 /0 id */
7991   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7992   ins_pipe(ialu_mem_imm);
7993 %}
7994 
7995 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7996 %{
7997   predicate(UseIncDec);
7998   match(Set dst (AddI dst src));
7999   effect(KILL cr);
8000 
8001   format %{ "incl    $dst\t# int" %}
8002   opcode(0xFF, 0x00); // FF /0
8003   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8004   ins_pipe(ialu_reg);
8005 %}
8006 
8007 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8008 %{
8009   predicate(UseIncDec);
8010   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8011   effect(KILL cr);
8012 
8013   ins_cost(125); // XXX
8014   format %{ "incl    $dst\t# int" %}
8015   opcode(0xFF); /* Opcode FF /0 */
8016   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8017   ins_pipe(ialu_mem_imm);
8018 %}
8019 
8020 // XXX why does that use AddI
8021 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8022 %{
8023   predicate(UseIncDec);
8024   match(Set dst (AddI dst src));
8025   effect(KILL cr);
8026 
8027   format %{ "decl    $dst\t# int" %}
8028   opcode(0xFF, 0x01); // FF /1
8029   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8030   ins_pipe(ialu_reg);
8031 %}
8032 
8033 // XXX why does that use AddI
8034 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8035 %{
8036   predicate(UseIncDec);
8037   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8038   effect(KILL cr);
8039 
8040   ins_cost(125); // XXX
8041   format %{ "decl    $dst\t# int" %}
8042   opcode(0xFF); /* Opcode FF /1 */
8043   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8044   ins_pipe(ialu_mem_imm);
8045 %}
8046 
8047 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8048 %{
8049   match(Set dst (AddI src0 src1));
8050 
8051   ins_cost(110);
8052   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8053   opcode(0x8D); /* 0x8D /r */
8054   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8055   ins_pipe(ialu_reg_reg);
8056 %}
8057 
8058 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8059 %{
8060   match(Set dst (AddL dst src));
8061   effect(KILL cr);
8062 
8063   format %{ "addq    $dst, $src\t# long" %}
8064   opcode(0x03);
8065   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8066   ins_pipe(ialu_reg_reg);
8067 %}
8068 
8069 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8070 %{
8071   match(Set dst (AddL dst src));
8072   effect(KILL cr);
8073 
8074   format %{ "addq    $dst, $src\t# long" %}
8075   opcode(0x81, 0x00); /* /0 id */
8076   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8077   ins_pipe( ialu_reg );
8078 %}
8079 
8080 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8081 %{
8082   match(Set dst (AddL dst (LoadL src)));
8083   effect(KILL cr);
8084 
8085   ins_cost(125); // XXX
8086   format %{ "addq    $dst, $src\t# long" %}
8087   opcode(0x03);
8088   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8089   ins_pipe(ialu_reg_mem);
8090 %}
8091 
8092 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8093 %{
8094   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8095   effect(KILL cr);
8096 
8097   ins_cost(150); // XXX
8098   format %{ "addq    $dst, $src\t# long" %}
8099   opcode(0x01); /* Opcode 01 /r */
8100   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8101   ins_pipe(ialu_mem_reg);
8102 %}
8103 
8104 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8105 %{
8106   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8107   effect(KILL cr);
8108 
8109   ins_cost(125); // XXX
8110   format %{ "addq    $dst, $src\t# long" %}
8111   opcode(0x81); /* Opcode 81 /0 id */
8112   ins_encode(REX_mem_wide(dst),
8113              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8114   ins_pipe(ialu_mem_imm);
8115 %}
8116 
8117 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8118 %{
8119   predicate(UseIncDec);
8120   match(Set dst (AddL dst src));
8121   effect(KILL cr);
8122 
8123   format %{ "incq    $dst\t# long" %}
8124   opcode(0xFF, 0x00); // FF /0
8125   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8126   ins_pipe(ialu_reg);
8127 %}
8128 
8129 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8130 %{
8131   predicate(UseIncDec);
8132   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8133   effect(KILL cr);
8134 
8135   ins_cost(125); // XXX
8136   format %{ "incq    $dst\t# long" %}
8137   opcode(0xFF); /* Opcode FF /0 */
8138   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8139   ins_pipe(ialu_mem_imm);
8140 %}
8141 
8142 // XXX why does that use AddL
8143 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8144 %{
8145   predicate(UseIncDec);
8146   match(Set dst (AddL dst src));
8147   effect(KILL cr);
8148 
8149   format %{ "decq    $dst\t# long" %}
8150   opcode(0xFF, 0x01); // FF /1
8151   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8152   ins_pipe(ialu_reg);
8153 %}
8154 
8155 // XXX why does that use AddL
8156 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8157 %{
8158   predicate(UseIncDec);
8159   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8160   effect(KILL cr);
8161 
8162   ins_cost(125); // XXX
8163   format %{ "decq    $dst\t# long" %}
8164   opcode(0xFF); /* Opcode FF /1 */
8165   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8166   ins_pipe(ialu_mem_imm);
8167 %}
8168 
8169 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8170 %{
8171   match(Set dst (AddL src0 src1));
8172 
8173   ins_cost(110);
8174   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8175   opcode(0x8D); /* 0x8D /r */
8176   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8177   ins_pipe(ialu_reg_reg);
8178 %}
8179 
8180 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8181 %{
8182   match(Set dst (AddP dst src));
8183   effect(KILL cr);
8184 
8185   format %{ "addq    $dst, $src\t# ptr" %}
8186   opcode(0x03);
8187   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8188   ins_pipe(ialu_reg_reg);
8189 %}
8190 
8191 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8192 %{
8193   match(Set dst (AddP dst src));
8194   effect(KILL cr);
8195 
8196   format %{ "addq    $dst, $src\t# ptr" %}
8197   opcode(0x81, 0x00); /* /0 id */
8198   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8199   ins_pipe( ialu_reg );
8200 %}
8201 
8202 // XXX addP mem ops ????
8203 
8204 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8205 %{
8206   match(Set dst (AddP src0 src1));
8207 
8208   ins_cost(110);
8209   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8210   opcode(0x8D); /* 0x8D /r */
8211   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8212   ins_pipe(ialu_reg_reg);
8213 %}
8214 
8215 instruct checkCastPP(rRegP dst)
8216 %{
8217   match(Set dst (CheckCastPP dst));
8218 
8219   size(0);
8220   format %{ "# checkcastPP of $dst" %}
8221   ins_encode(/* empty encoding */);
8222   ins_pipe(empty);
8223 %}
8224 
8225 instruct castPP(rRegP dst)
8226 %{
8227   match(Set dst (CastPP dst));
8228 
8229   size(0);
8230   format %{ "# castPP of $dst" %}
8231   ins_encode(/* empty encoding */);
8232   ins_pipe(empty);
8233 %}
8234 
8235 instruct castII(rRegI dst)
8236 %{
8237   match(Set dst (CastII dst));
8238 
8239   size(0);
8240   format %{ "# castII of $dst" %}
8241   ins_encode(/* empty encoding */);
8242   ins_cost(0);
8243   ins_pipe(empty);
8244 %}
8245 
8246 // LoadP-locked same as a regular LoadP when used with compare-swap
8247 instruct loadPLocked(rRegP dst, memory mem)
8248 %{
8249   match(Set dst (LoadPLocked mem));
8250 
8251   ins_cost(125); // XXX
8252   format %{ "movq    $dst, $mem\t# ptr locked" %}
8253   opcode(0x8B);
8254   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8255   ins_pipe(ialu_reg_mem); // XXX
8256 %}
8257 
8258 // LoadL-locked - same as a regular LoadL when used with compare-swap
8259 instruct loadLLocked(rRegL dst, memory mem)
8260 %{
8261   match(Set dst (LoadLLocked mem));
8262 
8263   ins_cost(125); // XXX
8264   format %{ "movq    $dst, $mem\t# long locked" %}
8265   opcode(0x8B);
8266   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8267   ins_pipe(ialu_reg_mem); // XXX
8268 %}
8269 
8270 // Conditional-store of the updated heap-top.
8271 // Used during allocation of the shared heap.
8272 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8273 
8274 instruct storePConditional(memory heap_top_ptr,
8275                            rax_RegP oldval, rRegP newval,
8276                            rFlagsReg cr)
8277 %{
8278   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8279 
8280   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8281             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8282   opcode(0x0F, 0xB1);
8283   ins_encode(lock_prefix,
8284              REX_reg_mem_wide(newval, heap_top_ptr),
8285              OpcP, OpcS,
8286              reg_mem(newval, heap_top_ptr));
8287   ins_pipe(pipe_cmpxchg);
8288 %}
8289 
8290 // Conditional-store of an int value.
8291 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8292 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8293 %{
8294   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8295   effect(KILL oldval);
8296 
8297   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8298   opcode(0x0F, 0xB1);
8299   ins_encode(lock_prefix,
8300              REX_reg_mem(newval, mem),
8301              OpcP, OpcS,
8302              reg_mem(newval, mem));
8303   ins_pipe(pipe_cmpxchg);
8304 %}
8305 
8306 // Conditional-store of a long value.
8307 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8308 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8309 %{
8310   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8311   effect(KILL oldval);
8312 
8313   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8314   opcode(0x0F, 0xB1);
8315   ins_encode(lock_prefix,
8316              REX_reg_mem_wide(newval, mem),
8317              OpcP, OpcS,
8318              reg_mem(newval, mem));
8319   ins_pipe(pipe_cmpxchg);
8320 %}
8321 
8322 
8323 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8324 instruct compareAndSwapP(rRegI res,
8325                          memory mem_ptr,
8326                          rax_RegP oldval, rRegP newval,
8327                          rFlagsReg cr)
8328 %{
8329   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8330   effect(KILL cr, KILL oldval);
8331 
8332   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8333             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8334             "sete    $res\n\t"
8335             "movzbl  $res, $res" %}
8336   opcode(0x0F, 0xB1);
8337   ins_encode(lock_prefix,
8338              REX_reg_mem_wide(newval, mem_ptr),
8339              OpcP, OpcS,
8340              reg_mem(newval, mem_ptr),
8341              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8342              REX_reg_breg(res, res), // movzbl
8343              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8344   ins_pipe( pipe_cmpxchg );
8345 %}
8346 
8347 instruct compareAndSwapL(rRegI res,
8348                          memory mem_ptr,
8349                          rax_RegL oldval, rRegL newval,
8350                          rFlagsReg cr)
8351 %{
8352   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8353   effect(KILL cr, KILL oldval);
8354 
8355   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8356             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8357             "sete    $res\n\t"
8358             "movzbl  $res, $res" %}
8359   opcode(0x0F, 0xB1);
8360   ins_encode(lock_prefix,
8361              REX_reg_mem_wide(newval, mem_ptr),
8362              OpcP, OpcS,
8363              reg_mem(newval, mem_ptr),
8364              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8365              REX_reg_breg(res, res), // movzbl
8366              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8367   ins_pipe( pipe_cmpxchg );
8368 %}
8369 
8370 instruct compareAndSwapI(rRegI res,
8371                          memory mem_ptr,
8372                          rax_RegI oldval, rRegI newval,
8373                          rFlagsReg cr)
8374 %{
8375   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8376   effect(KILL cr, KILL oldval);
8377 
8378   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8379             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8380             "sete    $res\n\t"
8381             "movzbl  $res, $res" %}
8382   opcode(0x0F, 0xB1);
8383   ins_encode(lock_prefix,
8384              REX_reg_mem(newval, mem_ptr),
8385              OpcP, OpcS,
8386              reg_mem(newval, mem_ptr),
8387              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8388              REX_reg_breg(res, res), // movzbl
8389              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8390   ins_pipe( pipe_cmpxchg );
8391 %}
8392 
8393 
8394 instruct compareAndSwapN(rRegI res,
8395                           memory mem_ptr,
8396                           rax_RegN oldval, rRegN newval,
8397                           rFlagsReg cr) %{
8398   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8399   effect(KILL cr, KILL oldval);
8400 
8401   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8402             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8403             "sete    $res\n\t"
8404             "movzbl  $res, $res" %}
8405   opcode(0x0F, 0xB1);
8406   ins_encode(lock_prefix,
8407              REX_reg_mem(newval, mem_ptr),
8408              OpcP, OpcS,
8409              reg_mem(newval, mem_ptr),
8410              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8411              REX_reg_breg(res, res), // movzbl
8412              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8413   ins_pipe( pipe_cmpxchg );
8414 %}
8415 
8416 //----------Subtraction Instructions-------------------------------------------
8417 
8418 // Integer Subtraction Instructions
8419 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8420 %{
8421   match(Set dst (SubI dst src));
8422   effect(KILL cr);
8423 
8424   format %{ "subl    $dst, $src\t# int" %}
8425   opcode(0x2B);
8426   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8427   ins_pipe(ialu_reg_reg);
8428 %}
8429 
8430 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8431 %{
8432   match(Set dst (SubI dst src));
8433   effect(KILL cr);
8434 
8435   format %{ "subl    $dst, $src\t# int" %}
8436   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8437   ins_encode(OpcSErm(dst, src), Con8or32(src));
8438   ins_pipe(ialu_reg);
8439 %}
8440 
8441 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8442 %{
8443   match(Set dst (SubI dst (LoadI src)));
8444   effect(KILL cr);
8445 
8446   ins_cost(125);
8447   format %{ "subl    $dst, $src\t# int" %}
8448   opcode(0x2B);
8449   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8450   ins_pipe(ialu_reg_mem);
8451 %}
8452 
8453 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8454 %{
8455   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8456   effect(KILL cr);
8457 
8458   ins_cost(150);
8459   format %{ "subl    $dst, $src\t# int" %}
8460   opcode(0x29); /* Opcode 29 /r */
8461   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8462   ins_pipe(ialu_mem_reg);
8463 %}
8464 
8465 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8466 %{
8467   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8468   effect(KILL cr);
8469 
8470   ins_cost(125); // XXX
8471   format %{ "subl    $dst, $src\t# int" %}
8472   opcode(0x81); /* Opcode 81 /5 id */
8473   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8474   ins_pipe(ialu_mem_imm);
8475 %}
8476 
8477 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8478 %{
8479   match(Set dst (SubL dst src));
8480   effect(KILL cr);
8481 
8482   format %{ "subq    $dst, $src\t# long" %}
8483   opcode(0x2B);
8484   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8485   ins_pipe(ialu_reg_reg);
8486 %}
8487 
8488 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8489 %{
8490   match(Set dst (SubL dst src));
8491   effect(KILL cr);
8492 
8493   format %{ "subq    $dst, $src\t# long" %}
8494   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8495   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8496   ins_pipe(ialu_reg);
8497 %}
8498 
8499 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8500 %{
8501   match(Set dst (SubL dst (LoadL src)));
8502   effect(KILL cr);
8503 
8504   ins_cost(125);
8505   format %{ "subq    $dst, $src\t# long" %}
8506   opcode(0x2B);
8507   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8508   ins_pipe(ialu_reg_mem);
8509 %}
8510 
8511 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8512 %{
8513   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8514   effect(KILL cr);
8515 
8516   ins_cost(150);
8517   format %{ "subq    $dst, $src\t# long" %}
8518   opcode(0x29); /* Opcode 29 /r */
8519   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8520   ins_pipe(ialu_mem_reg);
8521 %}
8522 
8523 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8524 %{
8525   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8526   effect(KILL cr);
8527 
8528   ins_cost(125); // XXX
8529   format %{ "subq    $dst, $src\t# long" %}
8530   opcode(0x81); /* Opcode 81 /5 id */
8531   ins_encode(REX_mem_wide(dst),
8532              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8533   ins_pipe(ialu_mem_imm);
8534 %}
8535 
8536 // Subtract from a pointer
8537 // XXX hmpf???
8538 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8539 %{
8540   match(Set dst (AddP dst (SubI zero src)));
8541   effect(KILL cr);
8542 
8543   format %{ "subq    $dst, $src\t# ptr - int" %}
8544   opcode(0x2B);
8545   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8546   ins_pipe(ialu_reg_reg);
8547 %}
8548 
8549 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8550 %{
8551   match(Set dst (SubI zero dst));
8552   effect(KILL cr);
8553 
8554   format %{ "negl    $dst\t# int" %}
8555   opcode(0xF7, 0x03);  // Opcode F7 /3
8556   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8557   ins_pipe(ialu_reg);
8558 %}
8559 
8560 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8561 %{
8562   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8563   effect(KILL cr);
8564 
8565   format %{ "negl    $dst\t# int" %}
8566   opcode(0xF7, 0x03);  // Opcode F7 /3
8567   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8568   ins_pipe(ialu_reg);
8569 %}
8570 
8571 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8572 %{
8573   match(Set dst (SubL zero dst));
8574   effect(KILL cr);
8575 
8576   format %{ "negq    $dst\t# long" %}
8577   opcode(0xF7, 0x03);  // Opcode F7 /3
8578   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8579   ins_pipe(ialu_reg);
8580 %}
8581 
8582 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8583 %{
8584   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8585   effect(KILL cr);
8586 
8587   format %{ "negq    $dst\t# long" %}
8588   opcode(0xF7, 0x03);  // Opcode F7 /3
8589   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8590   ins_pipe(ialu_reg);
8591 %}
8592 
8593 
8594 //----------Multiplication/Division Instructions-------------------------------
8595 // Integer Multiplication Instructions
8596 // Multiply Register
8597 
8598 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8599 %{
8600   match(Set dst (MulI dst src));
8601   effect(KILL cr);
8602 
8603   ins_cost(300);
8604   format %{ "imull   $dst, $src\t# int" %}
8605   opcode(0x0F, 0xAF);
8606   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8607   ins_pipe(ialu_reg_reg_alu0);
8608 %}
8609 
8610 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8611 %{
8612   match(Set dst (MulI src imm));
8613   effect(KILL cr);
8614 
8615   ins_cost(300);
8616   format %{ "imull   $dst, $src, $imm\t# int" %}
8617   opcode(0x69); /* 69 /r id */
8618   ins_encode(REX_reg_reg(dst, src),
8619              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8620   ins_pipe(ialu_reg_reg_alu0);
8621 %}
8622 
8623 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8624 %{
8625   match(Set dst (MulI dst (LoadI src)));
8626   effect(KILL cr);
8627 
8628   ins_cost(350);
8629   format %{ "imull   $dst, $src\t# int" %}
8630   opcode(0x0F, 0xAF);
8631   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8632   ins_pipe(ialu_reg_mem_alu0);
8633 %}
8634 
8635 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8636 %{
8637   match(Set dst (MulI (LoadI src) imm));
8638   effect(KILL cr);
8639 
8640   ins_cost(300);
8641   format %{ "imull   $dst, $src, $imm\t# int" %}
8642   opcode(0x69); /* 69 /r id */
8643   ins_encode(REX_reg_mem(dst, src),
8644              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8645   ins_pipe(ialu_reg_mem_alu0);
8646 %}
8647 
8648 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8649 %{
8650   match(Set dst (MulL dst src));
8651   effect(KILL cr);
8652 
8653   ins_cost(300);
8654   format %{ "imulq   $dst, $src\t# long" %}
8655   opcode(0x0F, 0xAF);
8656   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8657   ins_pipe(ialu_reg_reg_alu0);
8658 %}
8659 
8660 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8661 %{
8662   match(Set dst (MulL src imm));
8663   effect(KILL cr);
8664 
8665   ins_cost(300);
8666   format %{ "imulq   $dst, $src, $imm\t# long" %}
8667   opcode(0x69); /* 69 /r id */
8668   ins_encode(REX_reg_reg_wide(dst, src),
8669              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8670   ins_pipe(ialu_reg_reg_alu0);
8671 %}
8672 
8673 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8674 %{
8675   match(Set dst (MulL dst (LoadL src)));
8676   effect(KILL cr);
8677 
8678   ins_cost(350);
8679   format %{ "imulq   $dst, $src\t# long" %}
8680   opcode(0x0F, 0xAF);
8681   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8682   ins_pipe(ialu_reg_mem_alu0);
8683 %}
8684 
8685 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8686 %{
8687   match(Set dst (MulL (LoadL src) imm));
8688   effect(KILL cr);
8689 
8690   ins_cost(300);
8691   format %{ "imulq   $dst, $src, $imm\t# long" %}
8692   opcode(0x69); /* 69 /r id */
8693   ins_encode(REX_reg_mem_wide(dst, src),
8694              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8695   ins_pipe(ialu_reg_mem_alu0);
8696 %}
8697 
8698 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8699 %{
8700   match(Set dst (MulHiL src rax));
8701   effect(USE_KILL rax, KILL cr);
8702 
8703   ins_cost(300);
8704   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8705   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8706   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8707   ins_pipe(ialu_reg_reg_alu0);
8708 %}
8709 
8710 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8711                    rFlagsReg cr)
8712 %{
8713   match(Set rax (DivI rax div));
8714   effect(KILL rdx, KILL cr);
8715 
8716   ins_cost(30*100+10*100); // XXX
8717   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8718             "jne,s   normal\n\t"
8719             "xorl    rdx, rdx\n\t"
8720             "cmpl    $div, -1\n\t"
8721             "je,s    done\n"
8722     "normal: cdql\n\t"
8723             "idivl   $div\n"
8724     "done:"        %}
8725   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8726   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8727   ins_pipe(ialu_reg_reg_alu0);
8728 %}
8729 
8730 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8731                    rFlagsReg cr)
8732 %{
8733   match(Set rax (DivL rax div));
8734   effect(KILL rdx, KILL cr);
8735 
8736   ins_cost(30*100+10*100); // XXX
8737   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8738             "cmpq    rax, rdx\n\t"
8739             "jne,s   normal\n\t"
8740             "xorl    rdx, rdx\n\t"
8741             "cmpq    $div, -1\n\t"
8742             "je,s    done\n"
8743     "normal: cdqq\n\t"
8744             "idivq   $div\n"
8745     "done:"        %}
8746   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8747   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8748   ins_pipe(ialu_reg_reg_alu0);
8749 %}
8750 
8751 // Integer DIVMOD with Register, both quotient and mod results
8752 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8753                              rFlagsReg cr)
8754 %{
8755   match(DivModI rax div);
8756   effect(KILL cr);
8757 
8758   ins_cost(30*100+10*100); // XXX
8759   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8760             "jne,s   normal\n\t"
8761             "xorl    rdx, rdx\n\t"
8762             "cmpl    $div, -1\n\t"
8763             "je,s    done\n"
8764     "normal: cdql\n\t"
8765             "idivl   $div\n"
8766     "done:"        %}
8767   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8768   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8769   ins_pipe(pipe_slow);
8770 %}
8771 
8772 // Long DIVMOD with Register, both quotient and mod results
8773 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8774                              rFlagsReg cr)
8775 %{
8776   match(DivModL rax div);
8777   effect(KILL cr);
8778 
8779   ins_cost(30*100+10*100); // XXX
8780   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8781             "cmpq    rax, rdx\n\t"
8782             "jne,s   normal\n\t"
8783             "xorl    rdx, rdx\n\t"
8784             "cmpq    $div, -1\n\t"
8785             "je,s    done\n"
8786     "normal: cdqq\n\t"
8787             "idivq   $div\n"
8788     "done:"        %}
8789   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8790   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8791   ins_pipe(pipe_slow);
8792 %}
8793 
8794 //----------- DivL-By-Constant-Expansions--------------------------------------
8795 // DivI cases are handled by the compiler
8796 
8797 // Magic constant, reciprocal of 10
8798 instruct loadConL_0x6666666666666667(rRegL dst)
8799 %{
8800   effect(DEF dst);
8801 
8802   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8803   ins_encode(load_immL(dst, 0x6666666666666667));
8804   ins_pipe(ialu_reg);
8805 %}
8806 
8807 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8808 %{
8809   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8810 
8811   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8812   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8813   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8814   ins_pipe(ialu_reg_reg_alu0);
8815 %}
8816 
8817 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8818 %{
8819   effect(USE_DEF dst, KILL cr);
8820 
8821   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8822   opcode(0xC1, 0x7); /* C1 /7 ib */
8823   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8824   ins_pipe(ialu_reg);
8825 %}
8826 
8827 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8828 %{
8829   effect(USE_DEF dst, KILL cr);
8830 
8831   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8832   opcode(0xC1, 0x7); /* C1 /7 ib */
8833   ins_encode(reg_opc_imm_wide(dst, 0x2));
8834   ins_pipe(ialu_reg);
8835 %}
8836 
8837 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8838 %{
8839   match(Set dst (DivL src div));
8840 
8841   ins_cost((5+8)*100);
8842   expand %{
8843     rax_RegL rax;                     // Killed temp
8844     rFlagsReg cr;                     // Killed
8845     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8846     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8847     sarL_rReg_63(src, cr);            // sarq  src, 63
8848     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8849     subL_rReg(dst, src, cr);          // subl  rdx, src
8850   %}
8851 %}
8852 
8853 //-----------------------------------------------------------------------------
8854 
8855 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8856                    rFlagsReg cr)
8857 %{
8858   match(Set rdx (ModI rax div));
8859   effect(KILL rax, KILL cr);
8860 
8861   ins_cost(300); // XXX
8862   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8863             "jne,s   normal\n\t"
8864             "xorl    rdx, rdx\n\t"
8865             "cmpl    $div, -1\n\t"
8866             "je,s    done\n"
8867     "normal: cdql\n\t"
8868             "idivl   $div\n"
8869     "done:"        %}
8870   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8871   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8872   ins_pipe(ialu_reg_reg_alu0);
8873 %}
8874 
8875 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8876                    rFlagsReg cr)
8877 %{
8878   match(Set rdx (ModL rax div));
8879   effect(KILL rax, KILL cr);
8880 
8881   ins_cost(300); // XXX
8882   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8883             "cmpq    rax, rdx\n\t"
8884             "jne,s   normal\n\t"
8885             "xorl    rdx, rdx\n\t"
8886             "cmpq    $div, -1\n\t"
8887             "je,s    done\n"
8888     "normal: cdqq\n\t"
8889             "idivq   $div\n"
8890     "done:"        %}
8891   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8892   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8893   ins_pipe(ialu_reg_reg_alu0);
8894 %}
8895 
8896 // Integer Shift Instructions
8897 // Shift Left by one
8898 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8899 %{
8900   match(Set dst (LShiftI dst shift));
8901   effect(KILL cr);
8902 
8903   format %{ "sall    $dst, $shift" %}
8904   opcode(0xD1, 0x4); /* D1 /4 */
8905   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8906   ins_pipe(ialu_reg);
8907 %}
8908 
8909 // Shift Left by one
8910 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8911 %{
8912   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8913   effect(KILL cr);
8914 
8915   format %{ "sall    $dst, $shift\t" %}
8916   opcode(0xD1, 0x4); /* D1 /4 */
8917   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8918   ins_pipe(ialu_mem_imm);
8919 %}
8920 
8921 // Shift Left by 8-bit immediate
8922 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8923 %{
8924   match(Set dst (LShiftI dst shift));
8925   effect(KILL cr);
8926 
8927   format %{ "sall    $dst, $shift" %}
8928   opcode(0xC1, 0x4); /* C1 /4 ib */
8929   ins_encode(reg_opc_imm(dst, shift));
8930   ins_pipe(ialu_reg);
8931 %}
8932 
8933 // Shift Left by 8-bit immediate
8934 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8935 %{
8936   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8937   effect(KILL cr);
8938 
8939   format %{ "sall    $dst, $shift" %}
8940   opcode(0xC1, 0x4); /* C1 /4 ib */
8941   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8942   ins_pipe(ialu_mem_imm);
8943 %}
8944 
8945 // Shift Left by variable
8946 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8947 %{
8948   match(Set dst (LShiftI dst shift));
8949   effect(KILL cr);
8950 
8951   format %{ "sall    $dst, $shift" %}
8952   opcode(0xD3, 0x4); /* D3 /4 */
8953   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8954   ins_pipe(ialu_reg_reg);
8955 %}
8956 
8957 // Shift Left by variable
8958 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8959 %{
8960   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8961   effect(KILL cr);
8962 
8963   format %{ "sall    $dst, $shift" %}
8964   opcode(0xD3, 0x4); /* D3 /4 */
8965   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8966   ins_pipe(ialu_mem_reg);
8967 %}
8968 
8969 // Arithmetic shift right by one
8970 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8971 %{
8972   match(Set dst (RShiftI dst shift));
8973   effect(KILL cr);
8974 
8975   format %{ "sarl    $dst, $shift" %}
8976   opcode(0xD1, 0x7); /* D1 /7 */
8977   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8978   ins_pipe(ialu_reg);
8979 %}
8980 
8981 // Arithmetic shift right by one
8982 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8983 %{
8984   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8985   effect(KILL cr);
8986 
8987   format %{ "sarl    $dst, $shift" %}
8988   opcode(0xD1, 0x7); /* D1 /7 */
8989   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8990   ins_pipe(ialu_mem_imm);
8991 %}
8992 
8993 // Arithmetic Shift Right by 8-bit immediate
8994 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8995 %{
8996   match(Set dst (RShiftI dst shift));
8997   effect(KILL cr);
8998 
8999   format %{ "sarl    $dst, $shift" %}
9000   opcode(0xC1, 0x7); /* C1 /7 ib */
9001   ins_encode(reg_opc_imm(dst, shift));
9002   ins_pipe(ialu_mem_imm);
9003 %}
9004 
9005 // Arithmetic Shift Right by 8-bit immediate
9006 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9007 %{
9008   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9009   effect(KILL cr);
9010 
9011   format %{ "sarl    $dst, $shift" %}
9012   opcode(0xC1, 0x7); /* C1 /7 ib */
9013   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9014   ins_pipe(ialu_mem_imm);
9015 %}
9016 
9017 // Arithmetic Shift Right by variable
9018 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9019 %{
9020   match(Set dst (RShiftI dst shift));
9021   effect(KILL cr);
9022 
9023   format %{ "sarl    $dst, $shift" %}
9024   opcode(0xD3, 0x7); /* D3 /7 */
9025   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9026   ins_pipe(ialu_reg_reg);
9027 %}
9028 
9029 // Arithmetic Shift Right by variable
9030 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9031 %{
9032   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9033   effect(KILL cr);
9034 
9035   format %{ "sarl    $dst, $shift" %}
9036   opcode(0xD3, 0x7); /* D3 /7 */
9037   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9038   ins_pipe(ialu_mem_reg);
9039 %}
9040 
9041 // Logical shift right by one
9042 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9043 %{
9044   match(Set dst (URShiftI dst shift));
9045   effect(KILL cr);
9046 
9047   format %{ "shrl    $dst, $shift" %}
9048   opcode(0xD1, 0x5); /* D1 /5 */
9049   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9050   ins_pipe(ialu_reg);
9051 %}
9052 
9053 // Logical shift right by one
9054 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9055 %{
9056   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9057   effect(KILL cr);
9058 
9059   format %{ "shrl    $dst, $shift" %}
9060   opcode(0xD1, 0x5); /* D1 /5 */
9061   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9062   ins_pipe(ialu_mem_imm);
9063 %}
9064 
9065 // Logical Shift Right by 8-bit immediate
9066 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9067 %{
9068   match(Set dst (URShiftI dst shift));
9069   effect(KILL cr);
9070 
9071   format %{ "shrl    $dst, $shift" %}
9072   opcode(0xC1, 0x5); /* C1 /5 ib */
9073   ins_encode(reg_opc_imm(dst, shift));
9074   ins_pipe(ialu_reg);
9075 %}
9076 
9077 // Logical Shift Right by 8-bit immediate
9078 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9079 %{
9080   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9081   effect(KILL cr);
9082 
9083   format %{ "shrl    $dst, $shift" %}
9084   opcode(0xC1, 0x5); /* C1 /5 ib */
9085   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9086   ins_pipe(ialu_mem_imm);
9087 %}
9088 
9089 // Logical Shift Right by variable
9090 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9091 %{
9092   match(Set dst (URShiftI dst shift));
9093   effect(KILL cr);
9094 
9095   format %{ "shrl    $dst, $shift" %}
9096   opcode(0xD3, 0x5); /* D3 /5 */
9097   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9098   ins_pipe(ialu_reg_reg);
9099 %}
9100 
9101 // Logical Shift Right by variable
9102 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9103 %{
9104   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9105   effect(KILL cr);
9106 
9107   format %{ "shrl    $dst, $shift" %}
9108   opcode(0xD3, 0x5); /* D3 /5 */
9109   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9110   ins_pipe(ialu_mem_reg);
9111 %}
9112 
9113 // Long Shift Instructions
9114 // Shift Left by one
9115 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9116 %{
9117   match(Set dst (LShiftL dst shift));
9118   effect(KILL cr);
9119 
9120   format %{ "salq    $dst, $shift" %}
9121   opcode(0xD1, 0x4); /* D1 /4 */
9122   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9123   ins_pipe(ialu_reg);
9124 %}
9125 
9126 // Shift Left by one
9127 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9128 %{
9129   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9130   effect(KILL cr);
9131 
9132   format %{ "salq    $dst, $shift" %}
9133   opcode(0xD1, 0x4); /* D1 /4 */
9134   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9135   ins_pipe(ialu_mem_imm);
9136 %}
9137 
9138 // Shift Left by 8-bit immediate
9139 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9140 %{
9141   match(Set dst (LShiftL dst shift));
9142   effect(KILL cr);
9143 
9144   format %{ "salq    $dst, $shift" %}
9145   opcode(0xC1, 0x4); /* C1 /4 ib */
9146   ins_encode(reg_opc_imm_wide(dst, shift));
9147   ins_pipe(ialu_reg);
9148 %}
9149 
9150 // Shift Left by 8-bit immediate
9151 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9152 %{
9153   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9154   effect(KILL cr);
9155 
9156   format %{ "salq    $dst, $shift" %}
9157   opcode(0xC1, 0x4); /* C1 /4 ib */
9158   ins_encode(REX_mem_wide(dst), OpcP,
9159              RM_opc_mem(secondary, dst), Con8or32(shift));
9160   ins_pipe(ialu_mem_imm);
9161 %}
9162 
9163 // Shift Left by variable
9164 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9165 %{
9166   match(Set dst (LShiftL dst shift));
9167   effect(KILL cr);
9168 
9169   format %{ "salq    $dst, $shift" %}
9170   opcode(0xD3, 0x4); /* D3 /4 */
9171   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9172   ins_pipe(ialu_reg_reg);
9173 %}
9174 
9175 // Shift Left by variable
9176 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9177 %{
9178   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9179   effect(KILL cr);
9180 
9181   format %{ "salq    $dst, $shift" %}
9182   opcode(0xD3, 0x4); /* D3 /4 */
9183   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9184   ins_pipe(ialu_mem_reg);
9185 %}
9186 
9187 // Arithmetic shift right by one
9188 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9189 %{
9190   match(Set dst (RShiftL dst shift));
9191   effect(KILL cr);
9192 
9193   format %{ "sarq    $dst, $shift" %}
9194   opcode(0xD1, 0x7); /* D1 /7 */
9195   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9196   ins_pipe(ialu_reg);
9197 %}
9198 
9199 // Arithmetic shift right by one
9200 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9201 %{
9202   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9203   effect(KILL cr);
9204 
9205   format %{ "sarq    $dst, $shift" %}
9206   opcode(0xD1, 0x7); /* D1 /7 */
9207   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9208   ins_pipe(ialu_mem_imm);
9209 %}
9210 
9211 // Arithmetic Shift Right by 8-bit immediate
9212 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9213 %{
9214   match(Set dst (RShiftL dst shift));
9215   effect(KILL cr);
9216 
9217   format %{ "sarq    $dst, $shift" %}
9218   opcode(0xC1, 0x7); /* C1 /7 ib */
9219   ins_encode(reg_opc_imm_wide(dst, shift));
9220   ins_pipe(ialu_mem_imm);
9221 %}
9222 
9223 // Arithmetic Shift Right by 8-bit immediate
9224 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9225 %{
9226   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9227   effect(KILL cr);
9228 
9229   format %{ "sarq    $dst, $shift" %}
9230   opcode(0xC1, 0x7); /* C1 /7 ib */
9231   ins_encode(REX_mem_wide(dst), OpcP,
9232              RM_opc_mem(secondary, dst), Con8or32(shift));
9233   ins_pipe(ialu_mem_imm);
9234 %}
9235 
9236 // Arithmetic Shift Right by variable
9237 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9238 %{
9239   match(Set dst (RShiftL dst shift));
9240   effect(KILL cr);
9241 
9242   format %{ "sarq    $dst, $shift" %}
9243   opcode(0xD3, 0x7); /* D3 /7 */
9244   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9245   ins_pipe(ialu_reg_reg);
9246 %}
9247 
9248 // Arithmetic Shift Right by variable
9249 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9250 %{
9251   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9252   effect(KILL cr);
9253 
9254   format %{ "sarq    $dst, $shift" %}
9255   opcode(0xD3, 0x7); /* D3 /7 */
9256   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9257   ins_pipe(ialu_mem_reg);
9258 %}
9259 
9260 // Logical shift right by one
9261 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9262 %{
9263   match(Set dst (URShiftL dst shift));
9264   effect(KILL cr);
9265 
9266   format %{ "shrq    $dst, $shift" %}
9267   opcode(0xD1, 0x5); /* D1 /5 */
9268   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9269   ins_pipe(ialu_reg);
9270 %}
9271 
9272 // Logical shift right by one
9273 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9274 %{
9275   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9276   effect(KILL cr);
9277 
9278   format %{ "shrq    $dst, $shift" %}
9279   opcode(0xD1, 0x5); /* D1 /5 */
9280   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9281   ins_pipe(ialu_mem_imm);
9282 %}
9283 
9284 // Logical Shift Right by 8-bit immediate
9285 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9286 %{
9287   match(Set dst (URShiftL dst shift));
9288   effect(KILL cr);
9289 
9290   format %{ "shrq    $dst, $shift" %}
9291   opcode(0xC1, 0x5); /* C1 /5 ib */
9292   ins_encode(reg_opc_imm_wide(dst, shift));
9293   ins_pipe(ialu_reg);
9294 %}
9295 
9296 
9297 // Logical Shift Right by 8-bit immediate
9298 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9299 %{
9300   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9301   effect(KILL cr);
9302 
9303   format %{ "shrq    $dst, $shift" %}
9304   opcode(0xC1, 0x5); /* C1 /5 ib */
9305   ins_encode(REX_mem_wide(dst), OpcP,
9306              RM_opc_mem(secondary, dst), Con8or32(shift));
9307   ins_pipe(ialu_mem_imm);
9308 %}
9309 
9310 // Logical Shift Right by variable
9311 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9312 %{
9313   match(Set dst (URShiftL dst shift));
9314   effect(KILL cr);
9315 
9316   format %{ "shrq    $dst, $shift" %}
9317   opcode(0xD3, 0x5); /* D3 /5 */
9318   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9319   ins_pipe(ialu_reg_reg);
9320 %}
9321 
9322 // Logical Shift Right by variable
9323 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9324 %{
9325   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9326   effect(KILL cr);
9327 
9328   format %{ "shrq    $dst, $shift" %}
9329   opcode(0xD3, 0x5); /* D3 /5 */
9330   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9331   ins_pipe(ialu_mem_reg);
9332 %}
9333 
9334 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9335 // This idiom is used by the compiler for the i2b bytecode.
9336 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9337 %{
9338   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9339 
9340   format %{ "movsbl  $dst, $src\t# i2b" %}
9341   opcode(0x0F, 0xBE);
9342   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9343   ins_pipe(ialu_reg_reg);
9344 %}
9345 
9346 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9347 // This idiom is used by the compiler the i2s bytecode.
9348 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9349 %{
9350   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9351 
9352   format %{ "movswl  $dst, $src\t# i2s" %}
9353   opcode(0x0F, 0xBF);
9354   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9355   ins_pipe(ialu_reg_reg);
9356 %}
9357 
9358 // ROL/ROR instructions
9359 
9360 // ROL expand
9361 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9362   effect(KILL cr, USE_DEF dst);
9363 
9364   format %{ "roll    $dst" %}
9365   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9366   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9367   ins_pipe(ialu_reg);
9368 %}
9369 
9370 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9371   effect(USE_DEF dst, USE shift, KILL cr);
9372 
9373   format %{ "roll    $dst, $shift" %}
9374   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9375   ins_encode( reg_opc_imm(dst, shift) );
9376   ins_pipe(ialu_reg);
9377 %}
9378 
9379 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9380 %{
9381   effect(USE_DEF dst, USE shift, KILL cr);
9382 
9383   format %{ "roll    $dst, $shift" %}
9384   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9385   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9386   ins_pipe(ialu_reg_reg);
9387 %}
9388 // end of ROL expand
9389 
9390 // Rotate Left by one
9391 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9392 %{
9393   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9394 
9395   expand %{
9396     rolI_rReg_imm1(dst, cr);
9397   %}
9398 %}
9399 
9400 // Rotate Left by 8-bit immediate
9401 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9402 %{
9403   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9404   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9405 
9406   expand %{
9407     rolI_rReg_imm8(dst, lshift, cr);
9408   %}
9409 %}
9410 
9411 // Rotate Left by variable
9412 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9413 %{
9414   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9415 
9416   expand %{
9417     rolI_rReg_CL(dst, shift, cr);
9418   %}
9419 %}
9420 
9421 // Rotate Left by variable
9422 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9423 %{
9424   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9425 
9426   expand %{
9427     rolI_rReg_CL(dst, shift, cr);
9428   %}
9429 %}
9430 
9431 // ROR expand
9432 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9433 %{
9434   effect(USE_DEF dst, KILL cr);
9435 
9436   format %{ "rorl    $dst" %}
9437   opcode(0xD1, 0x1); /* D1 /1 */
9438   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9439   ins_pipe(ialu_reg);
9440 %}
9441 
9442 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9443 %{
9444   effect(USE_DEF dst, USE shift, KILL cr);
9445 
9446   format %{ "rorl    $dst, $shift" %}
9447   opcode(0xC1, 0x1); /* C1 /1 ib */
9448   ins_encode(reg_opc_imm(dst, shift));
9449   ins_pipe(ialu_reg);
9450 %}
9451 
9452 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9453 %{
9454   effect(USE_DEF dst, USE shift, KILL cr);
9455 
9456   format %{ "rorl    $dst, $shift" %}
9457   opcode(0xD3, 0x1); /* D3 /1 */
9458   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9459   ins_pipe(ialu_reg_reg);
9460 %}
9461 // end of ROR expand
9462 
9463 // Rotate Right by one
9464 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9465 %{
9466   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9467 
9468   expand %{
9469     rorI_rReg_imm1(dst, cr);
9470   %}
9471 %}
9472 
9473 // Rotate Right by 8-bit immediate
9474 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9475 %{
9476   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9477   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9478 
9479   expand %{
9480     rorI_rReg_imm8(dst, rshift, cr);
9481   %}
9482 %}
9483 
9484 // Rotate Right by variable
9485 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9486 %{
9487   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9488 
9489   expand %{
9490     rorI_rReg_CL(dst, shift, cr);
9491   %}
9492 %}
9493 
9494 // Rotate Right by variable
9495 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9496 %{
9497   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9498 
9499   expand %{
9500     rorI_rReg_CL(dst, shift, cr);
9501   %}
9502 %}
9503 
9504 // for long rotate
9505 // ROL expand
9506 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9507   effect(USE_DEF dst, KILL cr);
9508 
9509   format %{ "rolq    $dst" %}
9510   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9511   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9512   ins_pipe(ialu_reg);
9513 %}
9514 
9515 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9516   effect(USE_DEF dst, USE shift, KILL cr);
9517 
9518   format %{ "rolq    $dst, $shift" %}
9519   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9520   ins_encode( reg_opc_imm_wide(dst, shift) );
9521   ins_pipe(ialu_reg);
9522 %}
9523 
9524 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9525 %{
9526   effect(USE_DEF dst, USE shift, KILL cr);
9527 
9528   format %{ "rolq    $dst, $shift" %}
9529   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9530   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9531   ins_pipe(ialu_reg_reg);
9532 %}
9533 // end of ROL expand
9534 
9535 // Rotate Left by one
9536 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9537 %{
9538   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9539 
9540   expand %{
9541     rolL_rReg_imm1(dst, cr);
9542   %}
9543 %}
9544 
9545 // Rotate Left by 8-bit immediate
9546 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9547 %{
9548   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9549   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9550 
9551   expand %{
9552     rolL_rReg_imm8(dst, lshift, cr);
9553   %}
9554 %}
9555 
9556 // Rotate Left by variable
9557 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9558 %{
9559   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9560 
9561   expand %{
9562     rolL_rReg_CL(dst, shift, cr);
9563   %}
9564 %}
9565 
9566 // Rotate Left by variable
9567 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9568 %{
9569   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9570 
9571   expand %{
9572     rolL_rReg_CL(dst, shift, cr);
9573   %}
9574 %}
9575 
9576 // ROR expand
9577 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9578 %{
9579   effect(USE_DEF dst, KILL cr);
9580 
9581   format %{ "rorq    $dst" %}
9582   opcode(0xD1, 0x1); /* D1 /1 */
9583   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9584   ins_pipe(ialu_reg);
9585 %}
9586 
9587 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9588 %{
9589   effect(USE_DEF dst, USE shift, KILL cr);
9590 
9591   format %{ "rorq    $dst, $shift" %}
9592   opcode(0xC1, 0x1); /* C1 /1 ib */
9593   ins_encode(reg_opc_imm_wide(dst, shift));
9594   ins_pipe(ialu_reg);
9595 %}
9596 
9597 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9598 %{
9599   effect(USE_DEF dst, USE shift, KILL cr);
9600 
9601   format %{ "rorq    $dst, $shift" %}
9602   opcode(0xD3, 0x1); /* D3 /1 */
9603   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9604   ins_pipe(ialu_reg_reg);
9605 %}
9606 // end of ROR expand
9607 
9608 // Rotate Right by one
9609 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9610 %{
9611   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9612 
9613   expand %{
9614     rorL_rReg_imm1(dst, cr);
9615   %}
9616 %}
9617 
9618 // Rotate Right by 8-bit immediate
9619 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9620 %{
9621   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9622   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9623 
9624   expand %{
9625     rorL_rReg_imm8(dst, rshift, cr);
9626   %}
9627 %}
9628 
9629 // Rotate Right by variable
9630 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9631 %{
9632   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9633 
9634   expand %{
9635     rorL_rReg_CL(dst, shift, cr);
9636   %}
9637 %}
9638 
9639 // Rotate Right by variable
9640 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9641 %{
9642   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9643 
9644   expand %{
9645     rorL_rReg_CL(dst, shift, cr);
9646   %}
9647 %}
9648 
9649 // Logical Instructions
9650 
9651 // Integer Logical Instructions
9652 
9653 // And Instructions
9654 // And Register with Register
9655 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9656 %{
9657   match(Set dst (AndI dst src));
9658   effect(KILL cr);
9659 
9660   format %{ "andl    $dst, $src\t# int" %}
9661   opcode(0x23);
9662   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9663   ins_pipe(ialu_reg_reg);
9664 %}
9665 
9666 // And Register with Immediate 255
9667 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9668 %{
9669   match(Set dst (AndI dst src));
9670 
9671   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9672   opcode(0x0F, 0xB6);
9673   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9674   ins_pipe(ialu_reg);
9675 %}
9676 
9677 // And Register with Immediate 255 and promote to long
9678 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9679 %{
9680   match(Set dst (ConvI2L (AndI src mask)));
9681 
9682   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9683   opcode(0x0F, 0xB6);
9684   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9685   ins_pipe(ialu_reg);
9686 %}
9687 
9688 // And Register with Immediate 65535
9689 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9690 %{
9691   match(Set dst (AndI dst src));
9692 
9693   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9694   opcode(0x0F, 0xB7);
9695   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9696   ins_pipe(ialu_reg);
9697 %}
9698 
9699 // And Register with Immediate 65535 and promote to long
9700 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9701 %{
9702   match(Set dst (ConvI2L (AndI src mask)));
9703 
9704   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9705   opcode(0x0F, 0xB7);
9706   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9707   ins_pipe(ialu_reg);
9708 %}
9709 
9710 // And Register with Immediate
9711 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9712 %{
9713   match(Set dst (AndI dst src));
9714   effect(KILL cr);
9715 
9716   format %{ "andl    $dst, $src\t# int" %}
9717   opcode(0x81, 0x04); /* Opcode 81 /4 */
9718   ins_encode(OpcSErm(dst, src), Con8or32(src));
9719   ins_pipe(ialu_reg);
9720 %}
9721 
9722 // And Register with Memory
9723 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9724 %{
9725   match(Set dst (AndI dst (LoadI src)));
9726   effect(KILL cr);
9727 
9728   ins_cost(125);
9729   format %{ "andl    $dst, $src\t# int" %}
9730   opcode(0x23);
9731   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9732   ins_pipe(ialu_reg_mem);
9733 %}
9734 
9735 // And Memory with Register
9736 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9737 %{
9738   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9739   effect(KILL cr);
9740 
9741   ins_cost(150);
9742   format %{ "andl    $dst, $src\t# int" %}
9743   opcode(0x21); /* Opcode 21 /r */
9744   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9745   ins_pipe(ialu_mem_reg);
9746 %}
9747 
9748 // And Memory with Immediate
9749 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9750 %{
9751   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9752   effect(KILL cr);
9753 
9754   ins_cost(125);
9755   format %{ "andl    $dst, $src\t# int" %}
9756   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9757   ins_encode(REX_mem(dst), OpcSE(src),
9758              RM_opc_mem(secondary, dst), Con8or32(src));
9759   ins_pipe(ialu_mem_imm);
9760 %}
9761 
9762 // Or Instructions
9763 // Or Register with Register
9764 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9765 %{
9766   match(Set dst (OrI dst src));
9767   effect(KILL cr);
9768 
9769   format %{ "orl     $dst, $src\t# int" %}
9770   opcode(0x0B);
9771   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9772   ins_pipe(ialu_reg_reg);
9773 %}
9774 
9775 // Or Register with Immediate
9776 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9777 %{
9778   match(Set dst (OrI dst src));
9779   effect(KILL cr);
9780 
9781   format %{ "orl     $dst, $src\t# int" %}
9782   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9783   ins_encode(OpcSErm(dst, src), Con8or32(src));
9784   ins_pipe(ialu_reg);
9785 %}
9786 
9787 // Or Register with Memory
9788 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9789 %{
9790   match(Set dst (OrI dst (LoadI src)));
9791   effect(KILL cr);
9792 
9793   ins_cost(125);
9794   format %{ "orl     $dst, $src\t# int" %}
9795   opcode(0x0B);
9796   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9797   ins_pipe(ialu_reg_mem);
9798 %}
9799 
9800 // Or Memory with Register
9801 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9802 %{
9803   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9804   effect(KILL cr);
9805 
9806   ins_cost(150);
9807   format %{ "orl     $dst, $src\t# int" %}
9808   opcode(0x09); /* Opcode 09 /r */
9809   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9810   ins_pipe(ialu_mem_reg);
9811 %}
9812 
9813 // Or Memory with Immediate
9814 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9815 %{
9816   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9817   effect(KILL cr);
9818 
9819   ins_cost(125);
9820   format %{ "orl     $dst, $src\t# int" %}
9821   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9822   ins_encode(REX_mem(dst), OpcSE(src),
9823              RM_opc_mem(secondary, dst), Con8or32(src));
9824   ins_pipe(ialu_mem_imm);
9825 %}
9826 
9827 // Xor Instructions
9828 // Xor Register with Register
9829 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9830 %{
9831   match(Set dst (XorI dst src));
9832   effect(KILL cr);
9833 
9834   format %{ "xorl    $dst, $src\t# int" %}
9835   opcode(0x33);
9836   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9837   ins_pipe(ialu_reg_reg);
9838 %}
9839 
9840 // Xor Register with Immediate -1
9841 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9842   match(Set dst (XorI dst imm));
9843 
9844   format %{ "not    $dst" %}
9845   ins_encode %{
9846      __ notl($dst$$Register);
9847   %}
9848   ins_pipe(ialu_reg);
9849 %}
9850 
9851 // Xor Register with Immediate
9852 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9853 %{
9854   match(Set dst (XorI dst src));
9855   effect(KILL cr);
9856 
9857   format %{ "xorl    $dst, $src\t# int" %}
9858   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9859   ins_encode(OpcSErm(dst, src), Con8or32(src));
9860   ins_pipe(ialu_reg);
9861 %}
9862 
9863 // Xor Register with Memory
9864 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9865 %{
9866   match(Set dst (XorI dst (LoadI src)));
9867   effect(KILL cr);
9868 
9869   ins_cost(125);
9870   format %{ "xorl    $dst, $src\t# int" %}
9871   opcode(0x33);
9872   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9873   ins_pipe(ialu_reg_mem);
9874 %}
9875 
9876 // Xor Memory with Register
9877 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9878 %{
9879   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9880   effect(KILL cr);
9881 
9882   ins_cost(150);
9883   format %{ "xorl    $dst, $src\t# int" %}
9884   opcode(0x31); /* Opcode 31 /r */
9885   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9886   ins_pipe(ialu_mem_reg);
9887 %}
9888 
9889 // Xor Memory with Immediate
9890 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9891 %{
9892   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9893   effect(KILL cr);
9894 
9895   ins_cost(125);
9896   format %{ "xorl    $dst, $src\t# int" %}
9897   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9898   ins_encode(REX_mem(dst), OpcSE(src),
9899              RM_opc_mem(secondary, dst), Con8or32(src));
9900   ins_pipe(ialu_mem_imm);
9901 %}
9902 
9903 
9904 // Long Logical Instructions
9905 
9906 // And Instructions
9907 // And Register with Register
9908 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9909 %{
9910   match(Set dst (AndL dst src));
9911   effect(KILL cr);
9912 
9913   format %{ "andq    $dst, $src\t# long" %}
9914   opcode(0x23);
9915   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9916   ins_pipe(ialu_reg_reg);
9917 %}
9918 
9919 // And Register with Immediate 255
9920 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9921 %{
9922   match(Set dst (AndL dst src));
9923 
9924   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9925   opcode(0x0F, 0xB6);
9926   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9927   ins_pipe(ialu_reg);
9928 %}
9929 
9930 // And Register with Immediate 65535
9931 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9932 %{
9933   match(Set dst (AndL dst src));
9934 
9935   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9936   opcode(0x0F, 0xB7);
9937   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9938   ins_pipe(ialu_reg);
9939 %}
9940 
9941 // And Register with Immediate
9942 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9943 %{
9944   match(Set dst (AndL dst src));
9945   effect(KILL cr);
9946 
9947   format %{ "andq    $dst, $src\t# long" %}
9948   opcode(0x81, 0x04); /* Opcode 81 /4 */
9949   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9950   ins_pipe(ialu_reg);
9951 %}
9952 
9953 // And Register with Memory
9954 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9955 %{
9956   match(Set dst (AndL dst (LoadL src)));
9957   effect(KILL cr);
9958 
9959   ins_cost(125);
9960   format %{ "andq    $dst, $src\t# long" %}
9961   opcode(0x23);
9962   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9963   ins_pipe(ialu_reg_mem);
9964 %}
9965 
9966 // And Memory with Register
9967 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9968 %{
9969   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9970   effect(KILL cr);
9971 
9972   ins_cost(150);
9973   format %{ "andq    $dst, $src\t# long" %}
9974   opcode(0x21); /* Opcode 21 /r */
9975   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9976   ins_pipe(ialu_mem_reg);
9977 %}
9978 
9979 // And Memory with Immediate
9980 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9981 %{
9982   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9983   effect(KILL cr);
9984 
9985   ins_cost(125);
9986   format %{ "andq    $dst, $src\t# long" %}
9987   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9988   ins_encode(REX_mem_wide(dst), OpcSE(src),
9989              RM_opc_mem(secondary, dst), Con8or32(src));
9990   ins_pipe(ialu_mem_imm);
9991 %}
9992 
9993 // Or Instructions
9994 // Or Register with Register
9995 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9996 %{
9997   match(Set dst (OrL dst src));
9998   effect(KILL cr);
9999 
10000   format %{ "orq     $dst, $src\t# long" %}
10001   opcode(0x0B);
10002   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10003   ins_pipe(ialu_reg_reg);
10004 %}
10005 
10006 // Use any_RegP to match R15 (TLS register) without spilling.
10007 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10008   match(Set dst (OrL dst (CastP2X src)));
10009   effect(KILL cr);
10010 
10011   format %{ "orq     $dst, $src\t# long" %}
10012   opcode(0x0B);
10013   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10014   ins_pipe(ialu_reg_reg);
10015 %}
10016 
10017 
10018 // Or Register with Immediate
10019 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10020 %{
10021   match(Set dst (OrL dst src));
10022   effect(KILL cr);
10023 
10024   format %{ "orq     $dst, $src\t# long" %}
10025   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10026   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10027   ins_pipe(ialu_reg);
10028 %}
10029 
10030 // Or Register with Memory
10031 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10032 %{
10033   match(Set dst (OrL dst (LoadL src)));
10034   effect(KILL cr);
10035 
10036   ins_cost(125);
10037   format %{ "orq     $dst, $src\t# long" %}
10038   opcode(0x0B);
10039   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10040   ins_pipe(ialu_reg_mem);
10041 %}
10042 
10043 // Or Memory with Register
10044 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10045 %{
10046   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10047   effect(KILL cr);
10048 
10049   ins_cost(150);
10050   format %{ "orq     $dst, $src\t# long" %}
10051   opcode(0x09); /* Opcode 09 /r */
10052   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10053   ins_pipe(ialu_mem_reg);
10054 %}
10055 
10056 // Or Memory with Immediate
10057 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10058 %{
10059   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10060   effect(KILL cr);
10061 
10062   ins_cost(125);
10063   format %{ "orq     $dst, $src\t# long" %}
10064   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10065   ins_encode(REX_mem_wide(dst), OpcSE(src),
10066              RM_opc_mem(secondary, dst), Con8or32(src));
10067   ins_pipe(ialu_mem_imm);
10068 %}
10069 
10070 // Xor Instructions
10071 // Xor Register with Register
10072 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10073 %{
10074   match(Set dst (XorL dst src));
10075   effect(KILL cr);
10076 
10077   format %{ "xorq    $dst, $src\t# long" %}
10078   opcode(0x33);
10079   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10080   ins_pipe(ialu_reg_reg);
10081 %}
10082 
10083 // Xor Register with Immediate -1
10084 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10085   match(Set dst (XorL dst imm));
10086 
10087   format %{ "notq   $dst" %}
10088   ins_encode %{
10089      __ notq($dst$$Register);
10090   %}
10091   ins_pipe(ialu_reg);
10092 %}
10093 
10094 // Xor Register with Immediate
10095 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10096 %{
10097   match(Set dst (XorL dst src));
10098   effect(KILL cr);
10099 
10100   format %{ "xorq    $dst, $src\t# long" %}
10101   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10102   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10103   ins_pipe(ialu_reg);
10104 %}
10105 
10106 // Xor Register with Memory
10107 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10108 %{
10109   match(Set dst (XorL dst (LoadL src)));
10110   effect(KILL cr);
10111 
10112   ins_cost(125);
10113   format %{ "xorq    $dst, $src\t# long" %}
10114   opcode(0x33);
10115   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10116   ins_pipe(ialu_reg_mem);
10117 %}
10118 
10119 // Xor Memory with Register
10120 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10121 %{
10122   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10123   effect(KILL cr);
10124 
10125   ins_cost(150);
10126   format %{ "xorq    $dst, $src\t# long" %}
10127   opcode(0x31); /* Opcode 31 /r */
10128   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10129   ins_pipe(ialu_mem_reg);
10130 %}
10131 
10132 // Xor Memory with Immediate
10133 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10134 %{
10135   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10136   effect(KILL cr);
10137 
10138   ins_cost(125);
10139   format %{ "xorq    $dst, $src\t# long" %}
10140   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10141   ins_encode(REX_mem_wide(dst), OpcSE(src),
10142              RM_opc_mem(secondary, dst), Con8or32(src));
10143   ins_pipe(ialu_mem_imm);
10144 %}
10145 
10146 // Convert Int to Boolean
10147 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10148 %{
10149   match(Set dst (Conv2B src));
10150   effect(KILL cr);
10151 
10152   format %{ "testl   $src, $src\t# ci2b\n\t"
10153             "setnz   $dst\n\t"
10154             "movzbl  $dst, $dst" %}
10155   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10156              setNZ_reg(dst),
10157              REX_reg_breg(dst, dst), // movzbl
10158              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10159   ins_pipe(pipe_slow); // XXX
10160 %}
10161 
10162 // Convert Pointer to Boolean
10163 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10164 %{
10165   match(Set dst (Conv2B src));
10166   effect(KILL cr);
10167 
10168   format %{ "testq   $src, $src\t# cp2b\n\t"
10169             "setnz   $dst\n\t"
10170             "movzbl  $dst, $dst" %}
10171   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10172              setNZ_reg(dst),
10173              REX_reg_breg(dst, dst), // movzbl
10174              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10175   ins_pipe(pipe_slow); // XXX
10176 %}
10177 
10178 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10179 %{
10180   match(Set dst (CmpLTMask p q));
10181   effect(KILL cr);
10182 
10183   ins_cost(400); // XXX
10184   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10185             "setlt   $dst\n\t"
10186             "movzbl  $dst, $dst\n\t"
10187             "negl    $dst" %}
10188   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10189              setLT_reg(dst),
10190              REX_reg_breg(dst, dst), // movzbl
10191              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10192              neg_reg(dst));
10193   ins_pipe(pipe_slow);
10194 %}
10195 
10196 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10197 %{
10198   match(Set dst (CmpLTMask dst zero));
10199   effect(KILL cr);
10200 
10201   ins_cost(100); // XXX
10202   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10203   opcode(0xC1, 0x7);  /* C1 /7 ib */
10204   ins_encode(reg_opc_imm(dst, 0x1F));
10205   ins_pipe(ialu_reg);
10206 %}
10207 
10208 
10209 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10210                          rRegI tmp,
10211                          rFlagsReg cr)
10212 %{
10213   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10214   effect(TEMP tmp, KILL cr);
10215 
10216   ins_cost(400); // XXX
10217   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10218             "sbbl    $tmp, $tmp\n\t"
10219             "andl    $tmp, $y\n\t"
10220             "addl    $p, $tmp" %}
10221   ins_encode(enc_cmpLTP(p, q, y, tmp));
10222   ins_pipe(pipe_cmplt);
10223 %}
10224 
10225 /* If I enable this, I encourage spilling in the inner loop of compress.
10226 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10227 %{
10228   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10229   effect( TEMP tmp, KILL cr );
10230   ins_cost(400);
10231 
10232   format %{ "SUB    $p,$q\n\t"
10233             "SBB    RCX,RCX\n\t"
10234             "AND    RCX,$y\n\t"
10235             "ADD    $p,RCX" %}
10236   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10237 %}
10238 */
10239 
10240 //---------- FP Instructions------------------------------------------------
10241 
10242 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10243 %{
10244   match(Set cr (CmpF src1 src2));
10245 
10246   ins_cost(145);
10247   format %{ "ucomiss $src1, $src2\n\t"
10248             "jnp,s   exit\n\t"
10249             "pushfq\t# saw NaN, set CF\n\t"
10250             "andq    [rsp], #0xffffff2b\n\t"
10251             "popfq\n"
10252     "exit:   nop\t# avoid branch to branch" %}
10253   opcode(0x0F, 0x2E);
10254   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10255              cmpfp_fixup);
10256   ins_pipe(pipe_slow);
10257 %}
10258 
10259 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10260   match(Set cr (CmpF src1 src2));
10261 
10262   ins_cost(145);
10263   format %{ "ucomiss $src1, $src2" %}
10264   ins_encode %{
10265     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10266   %}
10267   ins_pipe(pipe_slow);
10268 %}
10269 
10270 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10271 %{
10272   match(Set cr (CmpF src1 (LoadF src2)));
10273 
10274   ins_cost(145);
10275   format %{ "ucomiss $src1, $src2\n\t"
10276             "jnp,s   exit\n\t"
10277             "pushfq\t# saw NaN, set CF\n\t"
10278             "andq    [rsp], #0xffffff2b\n\t"
10279             "popfq\n"
10280     "exit:   nop\t# avoid branch to branch" %}
10281   opcode(0x0F, 0x2E);
10282   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10283              cmpfp_fixup);
10284   ins_pipe(pipe_slow);
10285 %}
10286 
10287 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10288   match(Set cr (CmpF src1 (LoadF src2)));
10289 
10290   ins_cost(100);
10291   format %{ "ucomiss $src1, $src2" %}
10292   opcode(0x0F, 0x2E);
10293   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10294   ins_pipe(pipe_slow);
10295 %}
10296 
10297 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10298   match(Set cr (CmpF src con));
10299 
10300   ins_cost(145);
10301   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10302             "jnp,s   exit\n\t"
10303             "pushfq\t# saw NaN, set CF\n\t"
10304             "andq    [rsp], #0xffffff2b\n\t"
10305             "popfq\n"
10306     "exit:   nop\t# avoid branch to branch" %}
10307   ins_encode %{
10308     Label L_exit;
10309     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10310     __ jcc(Assembler::noParity, L_exit);
10311     __ pushf();
10312     __ andq(rsp, 0xffffff2b);
10313     __ popf();
10314     __ bind(L_exit);
10315     __ nop();
10316   %}
10317   ins_pipe(pipe_slow);
10318 %}
10319 
10320 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10321   match(Set cr (CmpF src con));
10322   ins_cost(100);
10323   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10324   ins_encode %{
10325     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10326   %}
10327   ins_pipe(pipe_slow);
10328 %}
10329 
10330 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10331 %{
10332   match(Set cr (CmpD src1 src2));
10333 
10334   ins_cost(145);
10335   format %{ "ucomisd $src1, $src2\n\t"
10336             "jnp,s   exit\n\t"
10337             "pushfq\t# saw NaN, set CF\n\t"
10338             "andq    [rsp], #0xffffff2b\n\t"
10339             "popfq\n"
10340     "exit:   nop\t# avoid branch to branch" %}
10341   opcode(0x66, 0x0F, 0x2E);
10342   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10343              cmpfp_fixup);
10344   ins_pipe(pipe_slow);
10345 %}
10346 
10347 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10348   match(Set cr (CmpD src1 src2));
10349 
10350   ins_cost(100);
10351   format %{ "ucomisd $src1, $src2 test" %}
10352   ins_encode %{
10353     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10354   %}
10355   ins_pipe(pipe_slow);
10356 %}
10357 
10358 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10359 %{
10360   match(Set cr (CmpD src1 (LoadD src2)));
10361 
10362   ins_cost(145);
10363   format %{ "ucomisd $src1, $src2\n\t"
10364             "jnp,s   exit\n\t"
10365             "pushfq\t# saw NaN, set CF\n\t"
10366             "andq    [rsp], #0xffffff2b\n\t"
10367             "popfq\n"
10368     "exit:   nop\t# avoid branch to branch" %}
10369   opcode(0x66, 0x0F, 0x2E);
10370   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10371              cmpfp_fixup);
10372   ins_pipe(pipe_slow);
10373 %}
10374 
10375 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10376   match(Set cr (CmpD src1 (LoadD src2)));
10377 
10378   ins_cost(100);
10379   format %{ "ucomisd $src1, $src2" %}
10380   opcode(0x66, 0x0F, 0x2E);
10381   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10382   ins_pipe(pipe_slow);
10383 %}
10384 
10385 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10386   match(Set cr (CmpD src con));
10387 
10388   ins_cost(145);
10389   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10390             "jnp,s   exit\n\t"
10391             "pushfq\t# saw NaN, set CF\n\t"
10392             "andq    [rsp], #0xffffff2b\n\t"
10393             "popfq\n"
10394     "exit:   nop\t# avoid branch to branch" %}
10395   ins_encode %{
10396     Label L_exit;
10397     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10398     __ jcc(Assembler::noParity, L_exit);
10399     __ pushf();
10400     __ andq(rsp, 0xffffff2b);
10401     __ popf();
10402     __ bind(L_exit);
10403     __ nop();
10404   %}
10405   ins_pipe(pipe_slow);
10406 %}
10407 
10408 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10409   match(Set cr (CmpD src con));
10410   ins_cost(100);
10411   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10412   ins_encode %{
10413     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10414   %}
10415   ins_pipe(pipe_slow);
10416 %}
10417 
10418 // Compare into -1,0,1
10419 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10420 %{
10421   match(Set dst (CmpF3 src1 src2));
10422   effect(KILL cr);
10423 
10424   ins_cost(275);
10425   format %{ "ucomiss $src1, $src2\n\t"
10426             "movl    $dst, #-1\n\t"
10427             "jp,s    done\n\t"
10428             "jb,s    done\n\t"
10429             "setne   $dst\n\t"
10430             "movzbl  $dst, $dst\n"
10431     "done:" %}
10432 
10433   opcode(0x0F, 0x2E);
10434   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10435              cmpfp3(dst));
10436   ins_pipe(pipe_slow);
10437 %}
10438 
10439 // Compare into -1,0,1
10440 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10441 %{
10442   match(Set dst (CmpF3 src1 (LoadF src2)));
10443   effect(KILL cr);
10444 
10445   ins_cost(275);
10446   format %{ "ucomiss $src1, $src2\n\t"
10447             "movl    $dst, #-1\n\t"
10448             "jp,s    done\n\t"
10449             "jb,s    done\n\t"
10450             "setne   $dst\n\t"
10451             "movzbl  $dst, $dst\n"
10452     "done:" %}
10453 
10454   opcode(0x0F, 0x2E);
10455   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10456              cmpfp3(dst));
10457   ins_pipe(pipe_slow);
10458 %}
10459 
10460 // Compare into -1,0,1
10461 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10462   match(Set dst (CmpF3 src con));
10463   effect(KILL cr);
10464 
10465   ins_cost(275);
10466   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10467             "movl    $dst, #-1\n\t"
10468             "jp,s    done\n\t"
10469             "jb,s    done\n\t"
10470             "setne   $dst\n\t"
10471             "movzbl  $dst, $dst\n"
10472     "done:" %}
10473   ins_encode %{
10474     Label L_done;
10475     Register Rdst = $dst$$Register;
10476     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10477     __ movl(Rdst, -1);
10478     __ jcc(Assembler::parity, L_done);
10479     __ jcc(Assembler::below, L_done);
10480     __ setb(Assembler::notEqual, Rdst);
10481     __ movzbl(Rdst, Rdst);
10482     __ bind(L_done);
10483   %}
10484   ins_pipe(pipe_slow);
10485 %}
10486 
10487 // Compare into -1,0,1
10488 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10489 %{
10490   match(Set dst (CmpD3 src1 src2));
10491   effect(KILL cr);
10492 
10493   ins_cost(275);
10494   format %{ "ucomisd $src1, $src2\n\t"
10495             "movl    $dst, #-1\n\t"
10496             "jp,s    done\n\t"
10497             "jb,s    done\n\t"
10498             "setne   $dst\n\t"
10499             "movzbl  $dst, $dst\n"
10500     "done:" %}
10501 
10502   opcode(0x66, 0x0F, 0x2E);
10503   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10504              cmpfp3(dst));
10505   ins_pipe(pipe_slow);
10506 %}
10507 
10508 // Compare into -1,0,1
10509 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10510 %{
10511   match(Set dst (CmpD3 src1 (LoadD src2)));
10512   effect(KILL cr);
10513 
10514   ins_cost(275);
10515   format %{ "ucomisd $src1, $src2\n\t"
10516             "movl    $dst, #-1\n\t"
10517             "jp,s    done\n\t"
10518             "jb,s    done\n\t"
10519             "setne   $dst\n\t"
10520             "movzbl  $dst, $dst\n"
10521     "done:" %}
10522 
10523   opcode(0x66, 0x0F, 0x2E);
10524   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10525              cmpfp3(dst));
10526   ins_pipe(pipe_slow);
10527 %}
10528 
10529 // Compare into -1,0,1
10530 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10531   match(Set dst (CmpD3 src con));
10532   effect(KILL cr);
10533 
10534   ins_cost(275);
10535   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10536             "movl    $dst, #-1\n\t"
10537             "jp,s    done\n\t"
10538             "jb,s    done\n\t"
10539             "setne   $dst\n\t"
10540             "movzbl  $dst, $dst\n"
10541     "done:" %}
10542   ins_encode %{
10543     Register Rdst = $dst$$Register;
10544     Label L_done;
10545     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10546     __ movl(Rdst, -1);
10547     __ jcc(Assembler::parity, L_done);
10548     __ jcc(Assembler::below, L_done);
10549     __ setb(Assembler::notEqual, Rdst);
10550     __ movzbl(Rdst, Rdst);
10551     __ bind(L_done);
10552   %}
10553   ins_pipe(pipe_slow);
10554 %}
10555 
10556 instruct addF_reg(regF dst, regF src)
10557 %{
10558   match(Set dst (AddF dst src));
10559 
10560   format %{ "addss   $dst, $src" %}
10561   ins_cost(150); // XXX
10562   opcode(0xF3, 0x0F, 0x58);
10563   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10564   ins_pipe(pipe_slow);
10565 %}
10566 
10567 instruct addF_mem(regF dst, memory src)
10568 %{
10569   match(Set dst (AddF dst (LoadF src)));
10570 
10571   format %{ "addss   $dst, $src" %}
10572   ins_cost(150); // XXX
10573   opcode(0xF3, 0x0F, 0x58);
10574   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10575   ins_pipe(pipe_slow);
10576 %}
10577 
10578 instruct addF_imm(regF dst, immF con) %{
10579   match(Set dst (AddF dst con));
10580   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10581   ins_cost(150); // XXX
10582   ins_encode %{
10583     __ addss($dst$$XMMRegister, $constantaddress($con));
10584   %}
10585   ins_pipe(pipe_slow);
10586 %}
10587 
10588 instruct addD_reg(regD dst, regD src)
10589 %{
10590   match(Set dst (AddD dst src));
10591 
10592   format %{ "addsd   $dst, $src" %}
10593   ins_cost(150); // XXX
10594   opcode(0xF2, 0x0F, 0x58);
10595   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10596   ins_pipe(pipe_slow);
10597 %}
10598 
10599 instruct addD_mem(regD dst, memory src)
10600 %{
10601   match(Set dst (AddD dst (LoadD src)));
10602 
10603   format %{ "addsd   $dst, $src" %}
10604   ins_cost(150); // XXX
10605   opcode(0xF2, 0x0F, 0x58);
10606   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10607   ins_pipe(pipe_slow);
10608 %}
10609 
10610 instruct addD_imm(regD dst, immD con) %{
10611   match(Set dst (AddD dst con));
10612   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10613   ins_cost(150); // XXX
10614   ins_encode %{
10615     __ addsd($dst$$XMMRegister, $constantaddress($con));
10616   %}
10617   ins_pipe(pipe_slow);
10618 %}
10619 
10620 instruct subF_reg(regF dst, regF src)
10621 %{
10622   match(Set dst (SubF dst src));
10623 
10624   format %{ "subss   $dst, $src" %}
10625   ins_cost(150); // XXX
10626   opcode(0xF3, 0x0F, 0x5C);
10627   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10628   ins_pipe(pipe_slow);
10629 %}
10630 
10631 instruct subF_mem(regF dst, memory src)
10632 %{
10633   match(Set dst (SubF dst (LoadF src)));
10634 
10635   format %{ "subss   $dst, $src" %}
10636   ins_cost(150); // XXX
10637   opcode(0xF3, 0x0F, 0x5C);
10638   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10639   ins_pipe(pipe_slow);
10640 %}
10641 
10642 instruct subF_imm(regF dst, immF con) %{
10643   match(Set dst (SubF dst con));
10644   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10645   ins_cost(150); // XXX
10646   ins_encode %{
10647     __ subss($dst$$XMMRegister, $constantaddress($con));
10648   %}
10649   ins_pipe(pipe_slow);
10650 %}
10651 
10652 instruct subD_reg(regD dst, regD src)
10653 %{
10654   match(Set dst (SubD dst src));
10655 
10656   format %{ "subsd   $dst, $src" %}
10657   ins_cost(150); // XXX
10658   opcode(0xF2, 0x0F, 0x5C);
10659   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10660   ins_pipe(pipe_slow);
10661 %}
10662 
10663 instruct subD_mem(regD dst, memory src)
10664 %{
10665   match(Set dst (SubD dst (LoadD src)));
10666 
10667   format %{ "subsd   $dst, $src" %}
10668   ins_cost(150); // XXX
10669   opcode(0xF2, 0x0F, 0x5C);
10670   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10671   ins_pipe(pipe_slow);
10672 %}
10673 
10674 instruct subD_imm(regD dst, immD con) %{
10675   match(Set dst (SubD dst con));
10676   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10677   ins_cost(150); // XXX
10678   ins_encode %{
10679     __ subsd($dst$$XMMRegister, $constantaddress($con));
10680   %}
10681   ins_pipe(pipe_slow);
10682 %}
10683 
10684 instruct mulF_reg(regF dst, regF src)
10685 %{
10686   match(Set dst (MulF dst src));
10687 
10688   format %{ "mulss   $dst, $src" %}
10689   ins_cost(150); // XXX
10690   opcode(0xF3, 0x0F, 0x59);
10691   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10692   ins_pipe(pipe_slow);
10693 %}
10694 
10695 instruct mulF_mem(regF dst, memory src)
10696 %{
10697   match(Set dst (MulF dst (LoadF src)));
10698 
10699   format %{ "mulss   $dst, $src" %}
10700   ins_cost(150); // XXX
10701   opcode(0xF3, 0x0F, 0x59);
10702   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10703   ins_pipe(pipe_slow);
10704 %}
10705 
10706 instruct mulF_imm(regF dst, immF con) %{
10707   match(Set dst (MulF dst con));
10708   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10709   ins_cost(150); // XXX
10710   ins_encode %{
10711     __ mulss($dst$$XMMRegister, $constantaddress($con));
10712   %}
10713   ins_pipe(pipe_slow);
10714 %}
10715 
10716 instruct mulD_reg(regD dst, regD src)
10717 %{
10718   match(Set dst (MulD dst src));
10719 
10720   format %{ "mulsd   $dst, $src" %}
10721   ins_cost(150); // XXX
10722   opcode(0xF2, 0x0F, 0x59);
10723   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10724   ins_pipe(pipe_slow);
10725 %}
10726 
10727 instruct mulD_mem(regD dst, memory src)
10728 %{
10729   match(Set dst (MulD dst (LoadD src)));
10730 
10731   format %{ "mulsd   $dst, $src" %}
10732   ins_cost(150); // XXX
10733   opcode(0xF2, 0x0F, 0x59);
10734   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10735   ins_pipe(pipe_slow);
10736 %}
10737 
10738 instruct mulD_imm(regD dst, immD con) %{
10739   match(Set dst (MulD dst con));
10740   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10741   ins_cost(150); // XXX
10742   ins_encode %{
10743     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10744   %}
10745   ins_pipe(pipe_slow);
10746 %}
10747 
10748 instruct divF_reg(regF dst, regF src)
10749 %{
10750   match(Set dst (DivF dst src));
10751 
10752   format %{ "divss   $dst, $src" %}
10753   ins_cost(150); // XXX
10754   opcode(0xF3, 0x0F, 0x5E);
10755   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10756   ins_pipe(pipe_slow);
10757 %}
10758 
10759 instruct divF_mem(regF dst, memory src)
10760 %{
10761   match(Set dst (DivF dst (LoadF src)));
10762 
10763   format %{ "divss   $dst, $src" %}
10764   ins_cost(150); // XXX
10765   opcode(0xF3, 0x0F, 0x5E);
10766   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10767   ins_pipe(pipe_slow);
10768 %}
10769 
10770 instruct divF_imm(regF dst, immF con) %{
10771   match(Set dst (DivF dst con));
10772   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10773   ins_cost(150); // XXX
10774   ins_encode %{
10775     __ divss($dst$$XMMRegister, $constantaddress($con));
10776   %}
10777   ins_pipe(pipe_slow);
10778 %}
10779 
10780 instruct divD_reg(regD dst, regD src)
10781 %{
10782   match(Set dst (DivD dst src));
10783 
10784   format %{ "divsd   $dst, $src" %}
10785   ins_cost(150); // XXX
10786   opcode(0xF2, 0x0F, 0x5E);
10787   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10788   ins_pipe(pipe_slow);
10789 %}
10790 
10791 instruct divD_mem(regD dst, memory src)
10792 %{
10793   match(Set dst (DivD dst (LoadD src)));
10794 
10795   format %{ "divsd   $dst, $src" %}
10796   ins_cost(150); // XXX
10797   opcode(0xF2, 0x0F, 0x5E);
10798   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10799   ins_pipe(pipe_slow);
10800 %}
10801 
10802 instruct divD_imm(regD dst, immD con) %{
10803   match(Set dst (DivD dst con));
10804   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10805   ins_cost(150); // XXX
10806   ins_encode %{
10807     __ divsd($dst$$XMMRegister, $constantaddress($con));
10808   %}
10809   ins_pipe(pipe_slow);
10810 %}
10811 
10812 instruct sqrtF_reg(regF dst, regF src)
10813 %{
10814   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10815 
10816   format %{ "sqrtss  $dst, $src" %}
10817   ins_cost(150); // XXX
10818   opcode(0xF3, 0x0F, 0x51);
10819   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10820   ins_pipe(pipe_slow);
10821 %}
10822 
10823 instruct sqrtF_mem(regF dst, memory src)
10824 %{
10825   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10826 
10827   format %{ "sqrtss  $dst, $src" %}
10828   ins_cost(150); // XXX
10829   opcode(0xF3, 0x0F, 0x51);
10830   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10831   ins_pipe(pipe_slow);
10832 %}
10833 
10834 instruct sqrtF_imm(regF dst, immF con) %{
10835   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10836   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10837   ins_cost(150); // XXX
10838   ins_encode %{
10839     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10840   %}
10841   ins_pipe(pipe_slow);
10842 %}
10843 
10844 instruct sqrtD_reg(regD dst, regD src)
10845 %{
10846   match(Set dst (SqrtD src));
10847 
10848   format %{ "sqrtsd  $dst, $src" %}
10849   ins_cost(150); // XXX
10850   opcode(0xF2, 0x0F, 0x51);
10851   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10852   ins_pipe(pipe_slow);
10853 %}
10854 
10855 instruct sqrtD_mem(regD dst, memory src)
10856 %{
10857   match(Set dst (SqrtD (LoadD src)));
10858 
10859   format %{ "sqrtsd  $dst, $src" %}
10860   ins_cost(150); // XXX
10861   opcode(0xF2, 0x0F, 0x51);
10862   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10863   ins_pipe(pipe_slow);
10864 %}
10865 
10866 instruct sqrtD_imm(regD dst, immD con) %{
10867   match(Set dst (SqrtD con));
10868   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10869   ins_cost(150); // XXX
10870   ins_encode %{
10871     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10872   %}
10873   ins_pipe(pipe_slow);
10874 %}
10875 
10876 instruct absF_reg(regF dst)
10877 %{
10878   match(Set dst (AbsF dst));
10879 
10880   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10881   ins_encode(absF_encoding(dst));
10882   ins_pipe(pipe_slow);
10883 %}
10884 
10885 instruct absD_reg(regD dst)
10886 %{
10887   match(Set dst (AbsD dst));
10888 
10889   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10890             "# abs double by sign masking" %}
10891   ins_encode(absD_encoding(dst));
10892   ins_pipe(pipe_slow);
10893 %}
10894 
10895 instruct negF_reg(regF dst)
10896 %{
10897   match(Set dst (NegF dst));
10898 
10899   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10900   ins_encode(negF_encoding(dst));
10901   ins_pipe(pipe_slow);
10902 %}
10903 
10904 instruct negD_reg(regD dst)
10905 %{
10906   match(Set dst (NegD dst));
10907 
10908   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10909             "# neg double by sign flipping" %}
10910   ins_encode(negD_encoding(dst));
10911   ins_pipe(pipe_slow);
10912 %}
10913 
10914 // -----------Trig and Trancendental Instructions------------------------------
10915 instruct cosD_reg(regD dst) %{
10916   match(Set dst (CosD dst));
10917 
10918   format %{ "dcos   $dst\n\t" %}
10919   opcode(0xD9, 0xFF);
10920   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10921   ins_pipe( pipe_slow );
10922 %}
10923 
10924 instruct sinD_reg(regD dst) %{
10925   match(Set dst (SinD dst));
10926 
10927   format %{ "dsin   $dst\n\t" %}
10928   opcode(0xD9, 0xFE);
10929   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 instruct tanD_reg(regD dst) %{
10934   match(Set dst (TanD dst));
10935 
10936   format %{ "dtan   $dst\n\t" %}
10937   ins_encode( Push_SrcXD(dst),
10938               Opcode(0xD9), Opcode(0xF2),   //fptan
10939               Opcode(0xDD), Opcode(0xD8),   //fstp st
10940               Push_ResultXD(dst) );
10941   ins_pipe( pipe_slow );
10942 %}
10943 
10944 instruct log10D_reg(regD dst) %{
10945   // The source and result Double operands in XMM registers
10946   match(Set dst (Log10D dst));
10947   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10948   // fyl2x        ; compute log_10(2) * log_2(x)
10949   format %{ "fldlg2\t\t\t#Log10\n\t"
10950             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10951          %}
10952    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10953               Push_SrcXD(dst),
10954               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10955               Push_ResultXD(dst));
10956 
10957   ins_pipe( pipe_slow );
10958 %}
10959 
10960 instruct logD_reg(regD dst) %{
10961   // The source and result Double operands in XMM registers
10962   match(Set dst (LogD dst));
10963   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10964   // fyl2x        ; compute log_e(2) * log_2(x)
10965   format %{ "fldln2\t\t\t#Log_e\n\t"
10966             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10967          %}
10968   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10969               Push_SrcXD(dst),
10970               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10971               Push_ResultXD(dst));
10972   ins_pipe( pipe_slow );
10973 %}
10974 
10975 
10976 
10977 //----------Arithmetic Conversion Instructions---------------------------------
10978 
10979 instruct roundFloat_nop(regF dst)
10980 %{
10981   match(Set dst (RoundFloat dst));
10982 
10983   ins_cost(0);
10984   ins_encode();
10985   ins_pipe(empty);
10986 %}
10987 
10988 instruct roundDouble_nop(regD dst)
10989 %{
10990   match(Set dst (RoundDouble dst));
10991 
10992   ins_cost(0);
10993   ins_encode();
10994   ins_pipe(empty);
10995 %}
10996 
10997 instruct convF2D_reg_reg(regD dst, regF src)
10998 %{
10999   match(Set dst (ConvF2D src));
11000 
11001   format %{ "cvtss2sd $dst, $src" %}
11002   opcode(0xF3, 0x0F, 0x5A);
11003   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11004   ins_pipe(pipe_slow); // XXX
11005 %}
11006 
11007 instruct convF2D_reg_mem(regD dst, memory src)
11008 %{
11009   match(Set dst (ConvF2D (LoadF src)));
11010 
11011   format %{ "cvtss2sd $dst, $src" %}
11012   opcode(0xF3, 0x0F, 0x5A);
11013   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11014   ins_pipe(pipe_slow); // XXX
11015 %}
11016 
11017 instruct convD2F_reg_reg(regF dst, regD src)
11018 %{
11019   match(Set dst (ConvD2F src));
11020 
11021   format %{ "cvtsd2ss $dst, $src" %}
11022   opcode(0xF2, 0x0F, 0x5A);
11023   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11024   ins_pipe(pipe_slow); // XXX
11025 %}
11026 
11027 instruct convD2F_reg_mem(regF dst, memory src)
11028 %{
11029   match(Set dst (ConvD2F (LoadD src)));
11030 
11031   format %{ "cvtsd2ss $dst, $src" %}
11032   opcode(0xF2, 0x0F, 0x5A);
11033   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11034   ins_pipe(pipe_slow); // XXX
11035 %}
11036 
11037 // XXX do mem variants
11038 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11039 %{
11040   match(Set dst (ConvF2I src));
11041   effect(KILL cr);
11042 
11043   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11044             "cmpl    $dst, #0x80000000\n\t"
11045             "jne,s   done\n\t"
11046             "subq    rsp, #8\n\t"
11047             "movss   [rsp], $src\n\t"
11048             "call    f2i_fixup\n\t"
11049             "popq    $dst\n"
11050     "done:   "%}
11051   opcode(0xF3, 0x0F, 0x2C);
11052   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11053              f2i_fixup(dst, src));
11054   ins_pipe(pipe_slow);
11055 %}
11056 
11057 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11058 %{
11059   match(Set dst (ConvF2L src));
11060   effect(KILL cr);
11061 
11062   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11063             "cmpq    $dst, [0x8000000000000000]\n\t"
11064             "jne,s   done\n\t"
11065             "subq    rsp, #8\n\t"
11066             "movss   [rsp], $src\n\t"
11067             "call    f2l_fixup\n\t"
11068             "popq    $dst\n"
11069     "done:   "%}
11070   opcode(0xF3, 0x0F, 0x2C);
11071   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11072              f2l_fixup(dst, src));
11073   ins_pipe(pipe_slow);
11074 %}
11075 
11076 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11077 %{
11078   match(Set dst (ConvD2I src));
11079   effect(KILL cr);
11080 
11081   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11082             "cmpl    $dst, #0x80000000\n\t"
11083             "jne,s   done\n\t"
11084             "subq    rsp, #8\n\t"
11085             "movsd   [rsp], $src\n\t"
11086             "call    d2i_fixup\n\t"
11087             "popq    $dst\n"
11088     "done:   "%}
11089   opcode(0xF2, 0x0F, 0x2C);
11090   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11091              d2i_fixup(dst, src));
11092   ins_pipe(pipe_slow);
11093 %}
11094 
11095 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11096 %{
11097   match(Set dst (ConvD2L src));
11098   effect(KILL cr);
11099 
11100   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11101             "cmpq    $dst, [0x8000000000000000]\n\t"
11102             "jne,s   done\n\t"
11103             "subq    rsp, #8\n\t"
11104             "movsd   [rsp], $src\n\t"
11105             "call    d2l_fixup\n\t"
11106             "popq    $dst\n"
11107     "done:   "%}
11108   opcode(0xF2, 0x0F, 0x2C);
11109   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11110              d2l_fixup(dst, src));
11111   ins_pipe(pipe_slow);
11112 %}
11113 
11114 instruct convI2F_reg_reg(regF dst, rRegI src)
11115 %{
11116   predicate(!UseXmmI2F);
11117   match(Set dst (ConvI2F src));
11118 
11119   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11120   opcode(0xF3, 0x0F, 0x2A);
11121   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11122   ins_pipe(pipe_slow); // XXX
11123 %}
11124 
11125 instruct convI2F_reg_mem(regF dst, memory src)
11126 %{
11127   match(Set dst (ConvI2F (LoadI src)));
11128 
11129   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11130   opcode(0xF3, 0x0F, 0x2A);
11131   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11132   ins_pipe(pipe_slow); // XXX
11133 %}
11134 
11135 instruct convI2D_reg_reg(regD dst, rRegI src)
11136 %{
11137   predicate(!UseXmmI2D);
11138   match(Set dst (ConvI2D src));
11139 
11140   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11141   opcode(0xF2, 0x0F, 0x2A);
11142   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11143   ins_pipe(pipe_slow); // XXX
11144 %}
11145 
11146 instruct convI2D_reg_mem(regD dst, memory src)
11147 %{
11148   match(Set dst (ConvI2D (LoadI src)));
11149 
11150   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11151   opcode(0xF2, 0x0F, 0x2A);
11152   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11153   ins_pipe(pipe_slow); // XXX
11154 %}
11155 
11156 instruct convXI2F_reg(regF dst, rRegI src)
11157 %{
11158   predicate(UseXmmI2F);
11159   match(Set dst (ConvI2F src));
11160 
11161   format %{ "movdl $dst, $src\n\t"
11162             "cvtdq2psl $dst, $dst\t# i2f" %}
11163   ins_encode %{
11164     __ movdl($dst$$XMMRegister, $src$$Register);
11165     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11166   %}
11167   ins_pipe(pipe_slow); // XXX
11168 %}
11169 
11170 instruct convXI2D_reg(regD dst, rRegI src)
11171 %{
11172   predicate(UseXmmI2D);
11173   match(Set dst (ConvI2D src));
11174 
11175   format %{ "movdl $dst, $src\n\t"
11176             "cvtdq2pdl $dst, $dst\t# i2d" %}
11177   ins_encode %{
11178     __ movdl($dst$$XMMRegister, $src$$Register);
11179     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11180   %}
11181   ins_pipe(pipe_slow); // XXX
11182 %}
11183 
11184 instruct convL2F_reg_reg(regF dst, rRegL src)
11185 %{
11186   match(Set dst (ConvL2F src));
11187 
11188   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11189   opcode(0xF3, 0x0F, 0x2A);
11190   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11191   ins_pipe(pipe_slow); // XXX
11192 %}
11193 
11194 instruct convL2F_reg_mem(regF dst, memory src)
11195 %{
11196   match(Set dst (ConvL2F (LoadL src)));
11197 
11198   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11199   opcode(0xF3, 0x0F, 0x2A);
11200   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11201   ins_pipe(pipe_slow); // XXX
11202 %}
11203 
11204 instruct convL2D_reg_reg(regD dst, rRegL src)
11205 %{
11206   match(Set dst (ConvL2D src));
11207 
11208   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11209   opcode(0xF2, 0x0F, 0x2A);
11210   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11211   ins_pipe(pipe_slow); // XXX
11212 %}
11213 
11214 instruct convL2D_reg_mem(regD dst, memory src)
11215 %{
11216   match(Set dst (ConvL2D (LoadL src)));
11217 
11218   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11219   opcode(0xF2, 0x0F, 0x2A);
11220   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11221   ins_pipe(pipe_slow); // XXX
11222 %}
11223 
11224 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11225 %{
11226   match(Set dst (ConvI2L src));
11227 
11228   ins_cost(125);
11229   format %{ "movslq  $dst, $src\t# i2l" %}
11230   ins_encode %{
11231     __ movslq($dst$$Register, $src$$Register);
11232   %}
11233   ins_pipe(ialu_reg_reg);
11234 %}
11235 
11236 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11237 // %{
11238 //   match(Set dst (ConvI2L src));
11239 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11240 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11241 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11242 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11243 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11244 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11245 
11246 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11247 //   ins_encode(enc_copy(dst, src));
11248 // //   opcode(0x63); // needs REX.W
11249 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11250 //   ins_pipe(ialu_reg_reg);
11251 // %}
11252 
11253 // Zero-extend convert int to long
11254 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11255 %{
11256   match(Set dst (AndL (ConvI2L src) mask));
11257 
11258   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11259   ins_encode(enc_copy(dst, src));
11260   ins_pipe(ialu_reg_reg);
11261 %}
11262 
11263 // Zero-extend convert int to long
11264 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11265 %{
11266   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11267 
11268   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11269   opcode(0x8B);
11270   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11271   ins_pipe(ialu_reg_mem);
11272 %}
11273 
11274 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11275 %{
11276   match(Set dst (AndL src mask));
11277 
11278   format %{ "movl    $dst, $src\t# zero-extend long" %}
11279   ins_encode(enc_copy_always(dst, src));
11280   ins_pipe(ialu_reg_reg);
11281 %}
11282 
11283 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11284 %{
11285   match(Set dst (ConvL2I src));
11286 
11287   format %{ "movl    $dst, $src\t# l2i" %}
11288   ins_encode(enc_copy_always(dst, src));
11289   ins_pipe(ialu_reg_reg);
11290 %}
11291 
11292 
11293 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11294   match(Set dst (MoveF2I src));
11295   effect(DEF dst, USE src);
11296 
11297   ins_cost(125);
11298   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11299   opcode(0x8B);
11300   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11301   ins_pipe(ialu_reg_mem);
11302 %}
11303 
11304 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11305   match(Set dst (MoveI2F src));
11306   effect(DEF dst, USE src);
11307 
11308   ins_cost(125);
11309   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11310   opcode(0xF3, 0x0F, 0x10);
11311   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11312   ins_pipe(pipe_slow);
11313 %}
11314 
11315 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11316   match(Set dst (MoveD2L src));
11317   effect(DEF dst, USE src);
11318 
11319   ins_cost(125);
11320   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11321   opcode(0x8B);
11322   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11323   ins_pipe(ialu_reg_mem);
11324 %}
11325 
11326 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11327   predicate(!UseXmmLoadAndClearUpper);
11328   match(Set dst (MoveL2D src));
11329   effect(DEF dst, USE src);
11330 
11331   ins_cost(125);
11332   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11333   opcode(0x66, 0x0F, 0x12);
11334   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11335   ins_pipe(pipe_slow);
11336 %}
11337 
11338 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11339   predicate(UseXmmLoadAndClearUpper);
11340   match(Set dst (MoveL2D src));
11341   effect(DEF dst, USE src);
11342 
11343   ins_cost(125);
11344   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11345   opcode(0xF2, 0x0F, 0x10);
11346   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11347   ins_pipe(pipe_slow);
11348 %}
11349 
11350 
11351 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11352   match(Set dst (MoveF2I src));
11353   effect(DEF dst, USE src);
11354 
11355   ins_cost(95); // XXX
11356   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11357   opcode(0xF3, 0x0F, 0x11);
11358   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11359   ins_pipe(pipe_slow);
11360 %}
11361 
11362 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11363   match(Set dst (MoveI2F src));
11364   effect(DEF dst, USE src);
11365 
11366   ins_cost(100);
11367   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11368   opcode(0x89);
11369   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11370   ins_pipe( ialu_mem_reg );
11371 %}
11372 
11373 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11374   match(Set dst (MoveD2L src));
11375   effect(DEF dst, USE src);
11376 
11377   ins_cost(95); // XXX
11378   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11379   opcode(0xF2, 0x0F, 0x11);
11380   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11381   ins_pipe(pipe_slow);
11382 %}
11383 
11384 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11385   match(Set dst (MoveL2D src));
11386   effect(DEF dst, USE src);
11387 
11388   ins_cost(100);
11389   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11390   opcode(0x89);
11391   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11392   ins_pipe(ialu_mem_reg);
11393 %}
11394 
11395 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11396   match(Set dst (MoveF2I src));
11397   effect(DEF dst, USE src);
11398   ins_cost(85);
11399   format %{ "movd    $dst,$src\t# MoveF2I" %}
11400   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11405   match(Set dst (MoveD2L src));
11406   effect(DEF dst, USE src);
11407   ins_cost(85);
11408   format %{ "movd    $dst,$src\t# MoveD2L" %}
11409   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11410   ins_pipe( pipe_slow );
11411 %}
11412 
11413 // The next instructions have long latency and use Int unit. Set high cost.
11414 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11415   match(Set dst (MoveI2F src));
11416   effect(DEF dst, USE src);
11417   ins_cost(300);
11418   format %{ "movd    $dst,$src\t# MoveI2F" %}
11419   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11420   ins_pipe( pipe_slow );
11421 %}
11422 
11423 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11424   match(Set dst (MoveL2D src));
11425   effect(DEF dst, USE src);
11426   ins_cost(300);
11427   format %{ "movd    $dst,$src\t# MoveL2D" %}
11428   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11429   ins_pipe( pipe_slow );
11430 %}
11431 
11432 // Replicate scalar to packed byte (1 byte) values in xmm
11433 instruct Repl8B_reg(regD dst, regD src) %{
11434   match(Set dst (Replicate8B src));
11435   format %{ "MOVDQA  $dst,$src\n\t"
11436             "PUNPCKLBW $dst,$dst\n\t"
11437             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11438   ins_encode( pshufd_8x8(dst, src));
11439   ins_pipe( pipe_slow );
11440 %}
11441 
11442 // Replicate scalar to packed byte (1 byte) values in xmm
11443 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11444   match(Set dst (Replicate8B src));
11445   format %{ "MOVD    $dst,$src\n\t"
11446             "PUNPCKLBW $dst,$dst\n\t"
11447             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11448   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 // Replicate scalar zero to packed byte (1 byte) values in xmm
11453 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11454   match(Set dst (Replicate8B zero));
11455   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11456   ins_encode( pxor(dst, dst));
11457   ins_pipe( fpu_reg_reg );
11458 %}
11459 
11460 // Replicate scalar to packed shore (2 byte) values in xmm
11461 instruct Repl4S_reg(regD dst, regD src) %{
11462   match(Set dst (Replicate4S src));
11463   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11464   ins_encode( pshufd_4x16(dst, src));
11465   ins_pipe( fpu_reg_reg );
11466 %}
11467 
11468 // Replicate scalar to packed shore (2 byte) values in xmm
11469 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11470   match(Set dst (Replicate4S src));
11471   format %{ "MOVD    $dst,$src\n\t"
11472             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11473   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11474   ins_pipe( fpu_reg_reg );
11475 %}
11476 
11477 // Replicate scalar zero to packed short (2 byte) values in xmm
11478 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11479   match(Set dst (Replicate4S zero));
11480   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11481   ins_encode( pxor(dst, dst));
11482   ins_pipe( fpu_reg_reg );
11483 %}
11484 
11485 // Replicate scalar to packed char (2 byte) values in xmm
11486 instruct Repl4C_reg(regD dst, regD src) %{
11487   match(Set dst (Replicate4C src));
11488   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11489   ins_encode( pshufd_4x16(dst, src));
11490   ins_pipe( fpu_reg_reg );
11491 %}
11492 
11493 // Replicate scalar to packed char (2 byte) values in xmm
11494 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11495   match(Set dst (Replicate4C src));
11496   format %{ "MOVD    $dst,$src\n\t"
11497             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11498   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11499   ins_pipe( fpu_reg_reg );
11500 %}
11501 
11502 // Replicate scalar zero to packed char (2 byte) values in xmm
11503 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11504   match(Set dst (Replicate4C zero));
11505   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11506   ins_encode( pxor(dst, dst));
11507   ins_pipe( fpu_reg_reg );
11508 %}
11509 
11510 // Replicate scalar to packed integer (4 byte) values in xmm
11511 instruct Repl2I_reg(regD dst, regD src) %{
11512   match(Set dst (Replicate2I src));
11513   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11514   ins_encode( pshufd(dst, src, 0x00));
11515   ins_pipe( fpu_reg_reg );
11516 %}
11517 
11518 // Replicate scalar to packed integer (4 byte) values in xmm
11519 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11520   match(Set dst (Replicate2I src));
11521   format %{ "MOVD   $dst,$src\n\t"
11522             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11523   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11524   ins_pipe( fpu_reg_reg );
11525 %}
11526 
11527 // Replicate scalar zero to packed integer (2 byte) values in xmm
11528 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11529   match(Set dst (Replicate2I zero));
11530   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11531   ins_encode( pxor(dst, dst));
11532   ins_pipe( fpu_reg_reg );
11533 %}
11534 
11535 // Replicate scalar to packed single precision floating point values in xmm
11536 instruct Repl2F_reg(regD dst, regD src) %{
11537   match(Set dst (Replicate2F src));
11538   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11539   ins_encode( pshufd(dst, src, 0xe0));
11540   ins_pipe( fpu_reg_reg );
11541 %}
11542 
11543 // Replicate scalar to packed single precision floating point values in xmm
11544 instruct Repl2F_regF(regD dst, regF src) %{
11545   match(Set dst (Replicate2F src));
11546   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11547   ins_encode( pshufd(dst, src, 0xe0));
11548   ins_pipe( fpu_reg_reg );
11549 %}
11550 
11551 // Replicate scalar to packed single precision floating point values in xmm
11552 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11553   match(Set dst (Replicate2F zero));
11554   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11555   ins_encode( pxor(dst, dst));
11556   ins_pipe( fpu_reg_reg );
11557 %}
11558 
11559 
11560 // =======================================================================
11561 // fast clearing of an array
11562 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11563                   rFlagsReg cr)
11564 %{
11565   match(Set dummy (ClearArray cnt base));
11566   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11567 
11568   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11569             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11570   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11571              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11572   ins_pipe(pipe_slow);
11573 %}
11574 
11575 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11576                         rax_RegI result, regD tmp1, rFlagsReg cr)
11577 %{
11578   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11579   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11580 
11581   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11582   ins_encode %{
11583     __ string_compare($str1$$Register, $str2$$Register,
11584                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11585                       $tmp1$$XMMRegister);
11586   %}
11587   ins_pipe( pipe_slow );
11588 %}
11589 
11590 // fast search of substring with known size.
11591 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11592                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11593 %{
11594   predicate(UseSSE42Intrinsics);
11595   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11596   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11597 
11598   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11599   ins_encode %{
11600     int icnt2 = (int)$int_cnt2$$constant;
11601     if (icnt2 >= 8) {
11602       // IndexOf for constant substrings with size >= 8 elements
11603       // which don't need to be loaded through stack.
11604       __ string_indexofC8($str1$$Register, $str2$$Register,
11605                           $cnt1$$Register, $cnt2$$Register,
11606                           icnt2, $result$$Register,
11607                           $vec$$XMMRegister, $tmp$$Register);
11608     } else {
11609       // Small strings are loaded through stack if they cross page boundary.
11610       __ string_indexof($str1$$Register, $str2$$Register,
11611                         $cnt1$$Register, $cnt2$$Register,
11612                         icnt2, $result$$Register,
11613                         $vec$$XMMRegister, $tmp$$Register);
11614     }
11615   %}
11616   ins_pipe( pipe_slow );
11617 %}
11618 
11619 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11620                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11621 %{
11622   predicate(UseSSE42Intrinsics);
11623   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11624   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11625 
11626   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11627   ins_encode %{
11628     __ string_indexof($str1$$Register, $str2$$Register,
11629                       $cnt1$$Register, $cnt2$$Register,
11630                       (-1), $result$$Register,
11631                       $vec$$XMMRegister, $tmp$$Register);
11632   %}
11633   ins_pipe( pipe_slow );
11634 %}
11635 
11636 // fast string equals
11637 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11638                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11639 %{
11640   match(Set result (StrEquals (Binary str1 str2) cnt));
11641   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11642 
11643   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11644   ins_encode %{
11645     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11646                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11647                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11648   %}
11649   ins_pipe( pipe_slow );
11650 %}
11651 
11652 // fast array equals
11653 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11654                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11655 %{
11656   match(Set result (AryEq ary1 ary2));
11657   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11658   //ins_cost(300);
11659 
11660   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11661   ins_encode %{
11662     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11663                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11664                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11665   %}
11666   ins_pipe( pipe_slow );
11667 %}
11668 
11669 //----------Control Flow Instructions------------------------------------------
11670 // Signed compare Instructions
11671 
11672 // XXX more variants!!
11673 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11674 %{
11675   match(Set cr (CmpI op1 op2));
11676   effect(DEF cr, USE op1, USE op2);
11677 
11678   format %{ "cmpl    $op1, $op2" %}
11679   opcode(0x3B);  /* Opcode 3B /r */
11680   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11681   ins_pipe(ialu_cr_reg_reg);
11682 %}
11683 
11684 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11685 %{
11686   match(Set cr (CmpI op1 op2));
11687 
11688   format %{ "cmpl    $op1, $op2" %}
11689   opcode(0x81, 0x07); /* Opcode 81 /7 */
11690   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11691   ins_pipe(ialu_cr_reg_imm);
11692 %}
11693 
11694 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11695 %{
11696   match(Set cr (CmpI op1 (LoadI op2)));
11697 
11698   ins_cost(500); // XXX
11699   format %{ "cmpl    $op1, $op2" %}
11700   opcode(0x3B); /* Opcode 3B /r */
11701   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11702   ins_pipe(ialu_cr_reg_mem);
11703 %}
11704 
11705 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11706 %{
11707   match(Set cr (CmpI src zero));
11708 
11709   format %{ "testl   $src, $src" %}
11710   opcode(0x85);
11711   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11712   ins_pipe(ialu_cr_reg_imm);
11713 %}
11714 
11715 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11716 %{
11717   match(Set cr (CmpI (AndI src con) zero));
11718 
11719   format %{ "testl   $src, $con" %}
11720   opcode(0xF7, 0x00);
11721   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11722   ins_pipe(ialu_cr_reg_imm);
11723 %}
11724 
11725 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11726 %{
11727   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11728 
11729   format %{ "testl   $src, $mem" %}
11730   opcode(0x85);
11731   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11732   ins_pipe(ialu_cr_reg_mem);
11733 %}
11734 
11735 // Unsigned compare Instructions; really, same as signed except they
11736 // produce an rFlagsRegU instead of rFlagsReg.
11737 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11738 %{
11739   match(Set cr (CmpU op1 op2));
11740 
11741   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11742   opcode(0x3B); /* Opcode 3B /r */
11743   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11744   ins_pipe(ialu_cr_reg_reg);
11745 %}
11746 
11747 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11748 %{
11749   match(Set cr (CmpU op1 op2));
11750 
11751   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11752   opcode(0x81,0x07); /* Opcode 81 /7 */
11753   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11754   ins_pipe(ialu_cr_reg_imm);
11755 %}
11756 
11757 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11758 %{
11759   match(Set cr (CmpU op1 (LoadI op2)));
11760 
11761   ins_cost(500); // XXX
11762   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11763   opcode(0x3B); /* Opcode 3B /r */
11764   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11765   ins_pipe(ialu_cr_reg_mem);
11766 %}
11767 
11768 // // // Cisc-spilled version of cmpU_rReg
11769 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11770 // //%{
11771 // //  match(Set cr (CmpU (LoadI op1) op2));
11772 // //
11773 // //  format %{ "CMPu   $op1,$op2" %}
11774 // //  ins_cost(500);
11775 // //  opcode(0x39);  /* Opcode 39 /r */
11776 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11777 // //%}
11778 
11779 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11780 %{
11781   match(Set cr (CmpU src zero));
11782 
11783   format %{ "testl  $src, $src\t# unsigned" %}
11784   opcode(0x85);
11785   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11786   ins_pipe(ialu_cr_reg_imm);
11787 %}
11788 
11789 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11790 %{
11791   match(Set cr (CmpP op1 op2));
11792 
11793   format %{ "cmpq    $op1, $op2\t# ptr" %}
11794   opcode(0x3B); /* Opcode 3B /r */
11795   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11796   ins_pipe(ialu_cr_reg_reg);
11797 %}
11798 
11799 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11800 %{
11801   match(Set cr (CmpP op1 (LoadP op2)));
11802 
11803   ins_cost(500); // XXX
11804   format %{ "cmpq    $op1, $op2\t# ptr" %}
11805   opcode(0x3B); /* Opcode 3B /r */
11806   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11807   ins_pipe(ialu_cr_reg_mem);
11808 %}
11809 
11810 // // // Cisc-spilled version of cmpP_rReg
11811 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11812 // //%{
11813 // //  match(Set cr (CmpP (LoadP op1) op2));
11814 // //
11815 // //  format %{ "CMPu   $op1,$op2" %}
11816 // //  ins_cost(500);
11817 // //  opcode(0x39);  /* Opcode 39 /r */
11818 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11819 // //%}
11820 
11821 // XXX this is generalized by compP_rReg_mem???
11822 // Compare raw pointer (used in out-of-heap check).
11823 // Only works because non-oop pointers must be raw pointers
11824 // and raw pointers have no anti-dependencies.
11825 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11826 %{
11827   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11828   match(Set cr (CmpP op1 (LoadP op2)));
11829 
11830   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11831   opcode(0x3B); /* Opcode 3B /r */
11832   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11833   ins_pipe(ialu_cr_reg_mem);
11834 %}
11835 
11836 // This will generate a signed flags result. This should be OK since
11837 // any compare to a zero should be eq/neq.
11838 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11839 %{
11840   match(Set cr (CmpP src zero));
11841 
11842   format %{ "testq   $src, $src\t# ptr" %}
11843   opcode(0x85);
11844   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11845   ins_pipe(ialu_cr_reg_imm);
11846 %}
11847 
11848 // This will generate a signed flags result. This should be OK since
11849 // any compare to a zero should be eq/neq.
11850 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11851 %{
11852   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11853   match(Set cr (CmpP (LoadP op) zero));
11854 
11855   ins_cost(500); // XXX
11856   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11857   opcode(0xF7); /* Opcode F7 /0 */
11858   ins_encode(REX_mem_wide(op),
11859              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11860   ins_pipe(ialu_cr_reg_imm);
11861 %}
11862 
11863 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11864 %{
11865   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11866   match(Set cr (CmpP (LoadP mem) zero));
11867 
11868   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11869   ins_encode %{
11870     __ cmpq(r12, $mem$$Address);
11871   %}
11872   ins_pipe(ialu_cr_reg_mem);
11873 %}
11874 
11875 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11876 %{
11877   match(Set cr (CmpN op1 op2));
11878 
11879   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11880   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11881   ins_pipe(ialu_cr_reg_reg);
11882 %}
11883 
11884 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11885 %{
11886   match(Set cr (CmpN src (LoadN mem)));
11887 
11888   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11889   ins_encode %{
11890     __ cmpl($src$$Register, $mem$$Address);
11891   %}
11892   ins_pipe(ialu_cr_reg_mem);
11893 %}
11894 
11895 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11896   match(Set cr (CmpN op1 op2));
11897 
11898   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11899   ins_encode %{
11900     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11901   %}
11902   ins_pipe(ialu_cr_reg_imm);
11903 %}
11904 
11905 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11906 %{
11907   match(Set cr (CmpN src (LoadN mem)));
11908 
11909   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11910   ins_encode %{
11911     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11912   %}
11913   ins_pipe(ialu_cr_reg_mem);
11914 %}
11915 
11916 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11917   match(Set cr (CmpN src zero));
11918 
11919   format %{ "testl   $src, $src\t# compressed ptr" %}
11920   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11921   ins_pipe(ialu_cr_reg_imm);
11922 %}
11923 
11924 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11925 %{
11926   predicate(Universe::narrow_oop_base() != NULL);
11927   match(Set cr (CmpN (LoadN mem) zero));
11928 
11929   ins_cost(500); // XXX
11930   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11931   ins_encode %{
11932     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11933   %}
11934   ins_pipe(ialu_cr_reg_mem);
11935 %}
11936 
11937 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11938 %{
11939   predicate(Universe::narrow_oop_base() == NULL);
11940   match(Set cr (CmpN (LoadN mem) zero));
11941 
11942   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11943   ins_encode %{
11944     __ cmpl(r12, $mem$$Address);
11945   %}
11946   ins_pipe(ialu_cr_reg_mem);
11947 %}
11948 
11949 // Yanked all unsigned pointer compare operations.
11950 // Pointer compares are done with CmpP which is already unsigned.
11951 
11952 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11953 %{
11954   match(Set cr (CmpL op1 op2));
11955 
11956   format %{ "cmpq    $op1, $op2" %}
11957   opcode(0x3B);  /* Opcode 3B /r */
11958   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11959   ins_pipe(ialu_cr_reg_reg);
11960 %}
11961 
11962 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11963 %{
11964   match(Set cr (CmpL op1 op2));
11965 
11966   format %{ "cmpq    $op1, $op2" %}
11967   opcode(0x81, 0x07); /* Opcode 81 /7 */
11968   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11969   ins_pipe(ialu_cr_reg_imm);
11970 %}
11971 
11972 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11973 %{
11974   match(Set cr (CmpL op1 (LoadL op2)));
11975 
11976   format %{ "cmpq    $op1, $op2" %}
11977   opcode(0x3B); /* Opcode 3B /r */
11978   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11979   ins_pipe(ialu_cr_reg_mem);
11980 %}
11981 
11982 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11983 %{
11984   match(Set cr (CmpL src zero));
11985 
11986   format %{ "testq   $src, $src" %}
11987   opcode(0x85);
11988   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11989   ins_pipe(ialu_cr_reg_imm);
11990 %}
11991 
11992 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11993 %{
11994   match(Set cr (CmpL (AndL src con) zero));
11995 
11996   format %{ "testq   $src, $con\t# long" %}
11997   opcode(0xF7, 0x00);
11998   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11999   ins_pipe(ialu_cr_reg_imm);
12000 %}
12001 
12002 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12003 %{
12004   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12005 
12006   format %{ "testq   $src, $mem" %}
12007   opcode(0x85);
12008   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12009   ins_pipe(ialu_cr_reg_mem);
12010 %}
12011 
12012 // Manifest a CmpL result in an integer register.  Very painful.
12013 // This is the test to avoid.
12014 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12015 %{
12016   match(Set dst (CmpL3 src1 src2));
12017   effect(KILL flags);
12018 
12019   ins_cost(275); // XXX
12020   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12021             "movl    $dst, -1\n\t"
12022             "jl,s    done\n\t"
12023             "setne   $dst\n\t"
12024             "movzbl  $dst, $dst\n\t"
12025     "done:" %}
12026   ins_encode(cmpl3_flag(src1, src2, dst));
12027   ins_pipe(pipe_slow);
12028 %}
12029 
12030 //----------Max and Min--------------------------------------------------------
12031 // Min Instructions
12032 
12033 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12034 %{
12035   effect(USE_DEF dst, USE src, USE cr);
12036 
12037   format %{ "cmovlgt $dst, $src\t# min" %}
12038   opcode(0x0F, 0x4F);
12039   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12040   ins_pipe(pipe_cmov_reg);
12041 %}
12042 
12043 
12044 instruct minI_rReg(rRegI dst, rRegI src)
12045 %{
12046   match(Set dst (MinI dst src));
12047 
12048   ins_cost(200);
12049   expand %{
12050     rFlagsReg cr;
12051     compI_rReg(cr, dst, src);
12052     cmovI_reg_g(dst, src, cr);
12053   %}
12054 %}
12055 
12056 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12057 %{
12058   effect(USE_DEF dst, USE src, USE cr);
12059 
12060   format %{ "cmovllt $dst, $src\t# max" %}
12061   opcode(0x0F, 0x4C);
12062   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12063   ins_pipe(pipe_cmov_reg);
12064 %}
12065 
12066 
12067 instruct maxI_rReg(rRegI dst, rRegI src)
12068 %{
12069   match(Set dst (MaxI dst src));
12070 
12071   ins_cost(200);
12072   expand %{
12073     rFlagsReg cr;
12074     compI_rReg(cr, dst, src);
12075     cmovI_reg_l(dst, src, cr);
12076   %}
12077 %}
12078 
12079 // ============================================================================
12080 // Branch Instructions
12081 
12082 // Jump Direct - Label defines a relative address from JMP+1
12083 instruct jmpDir(label labl)
12084 %{
12085   match(Goto);
12086   effect(USE labl);
12087 
12088   ins_cost(300);
12089   format %{ "jmp     $labl" %}
12090   size(5);
12091   opcode(0xE9);
12092   ins_encode(OpcP, Lbl(labl));
12093   ins_pipe(pipe_jmp);
12094   ins_pc_relative(1);
12095 %}
12096 
12097 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12098 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12099 %{
12100   match(If cop cr);
12101   effect(USE labl);
12102 
12103   ins_cost(300);
12104   format %{ "j$cop     $labl" %}
12105   size(6);
12106   opcode(0x0F, 0x80);
12107   ins_encode(Jcc(cop, labl));
12108   ins_pipe(pipe_jcc);
12109   ins_pc_relative(1);
12110 %}
12111 
12112 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12113 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12114 %{
12115   match(CountedLoopEnd cop cr);
12116   effect(USE labl);
12117 
12118   ins_cost(300);
12119   format %{ "j$cop     $labl\t# loop end" %}
12120   size(6);
12121   opcode(0x0F, 0x80);
12122   ins_encode(Jcc(cop, labl));
12123   ins_pipe(pipe_jcc);
12124   ins_pc_relative(1);
12125 %}
12126 
12127 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12128 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12129   match(CountedLoopEnd cop cmp);
12130   effect(USE labl);
12131 
12132   ins_cost(300);
12133   format %{ "j$cop,u   $labl\t# loop end" %}
12134   size(6);
12135   opcode(0x0F, 0x80);
12136   ins_encode(Jcc(cop, labl));
12137   ins_pipe(pipe_jcc);
12138   ins_pc_relative(1);
12139 %}
12140 
12141 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12142   match(CountedLoopEnd cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(200);
12146   format %{ "j$cop,u   $labl\t# loop end" %}
12147   size(6);
12148   opcode(0x0F, 0x80);
12149   ins_encode(Jcc(cop, labl));
12150   ins_pipe(pipe_jcc);
12151   ins_pc_relative(1);
12152 %}
12153 
12154 // Jump Direct Conditional - using unsigned comparison
12155 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12156   match(If cop cmp);
12157   effect(USE labl);
12158 
12159   ins_cost(300);
12160   format %{ "j$cop,u  $labl" %}
12161   size(6);
12162   opcode(0x0F, 0x80);
12163   ins_encode(Jcc(cop, labl));
12164   ins_pipe(pipe_jcc);
12165   ins_pc_relative(1);
12166 %}
12167 
12168 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12169   match(If cop cmp);
12170   effect(USE labl);
12171 
12172   ins_cost(200);
12173   format %{ "j$cop,u  $labl" %}
12174   size(6);
12175   opcode(0x0F, 0x80);
12176   ins_encode(Jcc(cop, labl));
12177   ins_pipe(pipe_jcc);
12178   ins_pc_relative(1);
12179 %}
12180 
12181 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12182   match(If cop cmp);
12183   effect(USE labl);
12184 
12185   ins_cost(200);
12186   format %{ $$template
12187     if ($cop$$cmpcode == Assembler::notEqual) {
12188       $$emit$$"jp,u   $labl\n\t"
12189       $$emit$$"j$cop,u   $labl"
12190     } else {
12191       $$emit$$"jp,u   done\n\t"
12192       $$emit$$"j$cop,u   $labl\n\t"
12193       $$emit$$"done:"
12194     }
12195   %}
12196   size(12);
12197   opcode(0x0F, 0x80);
12198   ins_encode %{
12199     Label* l = $labl$$label;
12200     $$$emit8$primary;
12201     emit_cc(cbuf, $secondary, Assembler::parity);
12202     int parity_disp = -1;
12203     if ($cop$$cmpcode == Assembler::notEqual) {
12204        // the two jumps 6 bytes apart so the jump distances are too
12205        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12206     } else if ($cop$$cmpcode == Assembler::equal) {
12207        parity_disp = 6;
12208     } else {
12209        ShouldNotReachHere();
12210     }
12211     emit_d32(cbuf, parity_disp);
12212     $$$emit8$primary;
12213     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12214     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12215     emit_d32(cbuf, disp);
12216   %}
12217   ins_pipe(pipe_jcc);
12218   ins_pc_relative(1);
12219 %}
12220 
12221 // ============================================================================
12222 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12223 // superklass array for an instance of the superklass.  Set a hidden
12224 // internal cache on a hit (cache is checked with exposed code in
12225 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12226 // encoding ALSO sets flags.
12227 
12228 instruct partialSubtypeCheck(rdi_RegP result,
12229                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12230                              rFlagsReg cr)
12231 %{
12232   match(Set result (PartialSubtypeCheck sub super));
12233   effect(KILL rcx, KILL cr);
12234 
12235   ins_cost(1100);  // slightly larger than the next version
12236   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12237             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12238             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12239             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12240             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12241             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12242             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12243     "miss:\t" %}
12244 
12245   opcode(0x1); // Force a XOR of RDI
12246   ins_encode(enc_PartialSubtypeCheck());
12247   ins_pipe(pipe_slow);
12248 %}
12249 
12250 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12251                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12252                                      immP0 zero,
12253                                      rdi_RegP result)
12254 %{
12255   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12256   effect(KILL rcx, KILL result);
12257 
12258   ins_cost(1000);
12259   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12260             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12261             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12262             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12263             "jne,s   miss\t\t# Missed: flags nz\n\t"
12264             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12265     "miss:\t" %}
12266 
12267   opcode(0x0); // No need to XOR RDI
12268   ins_encode(enc_PartialSubtypeCheck());
12269   ins_pipe(pipe_slow);
12270 %}
12271 
12272 // ============================================================================
12273 // Branch Instructions -- short offset versions
12274 //
12275 // These instructions are used to replace jumps of a long offset (the default
12276 // match) with jumps of a shorter offset.  These instructions are all tagged
12277 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12278 // match rules in general matching.  Instead, the ADLC generates a conversion
12279 // method in the MachNode which can be used to do in-place replacement of the
12280 // long variant with the shorter variant.  The compiler will determine if a
12281 // branch can be taken by the is_short_branch_offset() predicate in the machine
12282 // specific code section of the file.
12283 
12284 // Jump Direct - Label defines a relative address from JMP+1
12285 instruct jmpDir_short(label labl) %{
12286   match(Goto);
12287   effect(USE labl);
12288 
12289   ins_cost(300);
12290   format %{ "jmp,s   $labl" %}
12291   size(2);
12292   opcode(0xEB);
12293   ins_encode(OpcP, LblShort(labl));
12294   ins_pipe(pipe_jmp);
12295   ins_pc_relative(1);
12296   ins_short_branch(1);
12297 %}
12298 
12299 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12300 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12301   match(If cop cr);
12302   effect(USE labl);
12303 
12304   ins_cost(300);
12305   format %{ "j$cop,s   $labl" %}
12306   size(2);
12307   opcode(0x70);
12308   ins_encode(JccShort(cop, labl));
12309   ins_pipe(pipe_jcc);
12310   ins_pc_relative(1);
12311   ins_short_branch(1);
12312 %}
12313 
12314 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12315 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12316   match(CountedLoopEnd cop cr);
12317   effect(USE labl);
12318 
12319   ins_cost(300);
12320   format %{ "j$cop,s   $labl\t# loop end" %}
12321   size(2);
12322   opcode(0x70);
12323   ins_encode(JccShort(cop, labl));
12324   ins_pipe(pipe_jcc);
12325   ins_pc_relative(1);
12326   ins_short_branch(1);
12327 %}
12328 
12329 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12330 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12331   match(CountedLoopEnd cop cmp);
12332   effect(USE labl);
12333 
12334   ins_cost(300);
12335   format %{ "j$cop,us  $labl\t# loop end" %}
12336   size(2);
12337   opcode(0x70);
12338   ins_encode(JccShort(cop, labl));
12339   ins_pipe(pipe_jcc);
12340   ins_pc_relative(1);
12341   ins_short_branch(1);
12342 %}
12343 
12344 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12345   match(CountedLoopEnd cop cmp);
12346   effect(USE labl);
12347 
12348   ins_cost(300);
12349   format %{ "j$cop,us  $labl\t# loop end" %}
12350   size(2);
12351   opcode(0x70);
12352   ins_encode(JccShort(cop, labl));
12353   ins_pipe(pipe_jcc);
12354   ins_pc_relative(1);
12355   ins_short_branch(1);
12356 %}
12357 
12358 // Jump Direct Conditional - using unsigned comparison
12359 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12360   match(If cop cmp);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "j$cop,us  $labl" %}
12365   size(2);
12366   opcode(0x70);
12367   ins_encode(JccShort(cop, labl));
12368   ins_pipe(pipe_jcc);
12369   ins_pc_relative(1);
12370   ins_short_branch(1);
12371 %}
12372 
12373 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12374   match(If cop cmp);
12375   effect(USE labl);
12376 
12377   ins_cost(300);
12378   format %{ "j$cop,us  $labl" %}
12379   size(2);
12380   opcode(0x70);
12381   ins_encode(JccShort(cop, labl));
12382   ins_pipe(pipe_jcc);
12383   ins_pc_relative(1);
12384   ins_short_branch(1);
12385 %}
12386 
12387 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12388   match(If cop cmp);
12389   effect(USE labl);
12390 
12391   ins_cost(300);
12392   format %{ $$template
12393     if ($cop$$cmpcode == Assembler::notEqual) {
12394       $$emit$$"jp,u,s   $labl\n\t"
12395       $$emit$$"j$cop,u,s   $labl"
12396     } else {
12397       $$emit$$"jp,u,s   done\n\t"
12398       $$emit$$"j$cop,u,s  $labl\n\t"
12399       $$emit$$"done:"
12400     }
12401   %}
12402   size(4);
12403   opcode(0x70);
12404   ins_encode %{
12405     Label* l = $labl$$label;
12406     emit_cc(cbuf, $primary, Assembler::parity);
12407     int parity_disp = -1;
12408     if ($cop$$cmpcode == Assembler::notEqual) {
12409       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12410     } else if ($cop$$cmpcode == Assembler::equal) {
12411       parity_disp = 2;
12412     } else {
12413       ShouldNotReachHere();
12414     }
12415     emit_d8(cbuf, parity_disp);
12416     emit_cc(cbuf, $primary, $cop$$cmpcode);
12417     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12418     emit_d8(cbuf, disp);
12419     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12420     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12421   %}
12422   ins_pipe(pipe_jcc);
12423   ins_pc_relative(1);
12424   ins_short_branch(1);
12425 %}
12426 
12427 // ============================================================================
12428 // inlined locking and unlocking
12429 
12430 instruct cmpFastLock(rFlagsReg cr,
12431                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12432 %{
12433   match(Set cr (FastLock object box));
12434   effect(TEMP tmp, TEMP scr);
12435 
12436   ins_cost(300);
12437   format %{ "fastlock $object,$box,$tmp,$scr" %}
12438   ins_encode(Fast_Lock(object, box, tmp, scr));
12439   ins_pipe(pipe_slow);
12440   ins_pc_relative(1);
12441 %}
12442 
12443 instruct cmpFastUnlock(rFlagsReg cr,
12444                        rRegP object, rax_RegP box, rRegP tmp)
12445 %{
12446   match(Set cr (FastUnlock object box));
12447   effect(TEMP tmp);
12448 
12449   ins_cost(300);
12450   format %{ "fastunlock $object, $box, $tmp" %}
12451   ins_encode(Fast_Unlock(object, box, tmp));
12452   ins_pipe(pipe_slow);
12453   ins_pc_relative(1);
12454 %}
12455 
12456 
12457 // ============================================================================
12458 // Safepoint Instructions
12459 instruct safePoint_poll(rFlagsReg cr)
12460 %{
12461   predicate(!Assembler::is_polling_page_far());
12462   match(SafePoint);
12463   effect(KILL cr);
12464 
12465   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12466             "# Safepoint: poll for GC" %}
12467   ins_cost(125);
12468   ins_encode %{
12469     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12470     __ testl(rax, addr);
12471   %}
12472   ins_pipe(ialu_reg_mem);
12473 %}
12474 
12475 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12476 %{
12477   predicate(Assembler::is_polling_page_far());
12478   match(SafePoint poll);
12479   effect(KILL cr, USE poll);
12480 
12481   format %{ "testl  rax, [$poll]\t"
12482             "# Safepoint: poll for GC" %}
12483   ins_cost(125);
12484   ins_encode %{
12485     __ relocate(relocInfo::poll_type);
12486     __ testl(rax, Address($poll$$Register, 0));
12487   %}
12488   ins_pipe(ialu_reg_mem);
12489 %}
12490 
12491 // ============================================================================
12492 // Procedure Call/Return Instructions
12493 // Call Java Static Instruction
12494 // Note: If this code changes, the corresponding ret_addr_offset() and
12495 //       compute_padding() functions will have to be adjusted.
12496 instruct CallStaticJavaDirect(method meth) %{
12497   match(CallStaticJava);
12498   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12499   effect(USE meth);
12500 
12501   ins_cost(300);
12502   format %{ "call,static " %}
12503   opcode(0xE8); /* E8 cd */
12504   ins_encode(Java_Static_Call(meth), call_epilog);
12505   ins_pipe(pipe_slow);
12506   ins_pc_relative(1);
12507   ins_alignment(4);
12508 %}
12509 
12510 // Call Java Static Instruction (method handle version)
12511 // Note: If this code changes, the corresponding ret_addr_offset() and
12512 //       compute_padding() functions will have to be adjusted.
12513 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12514   match(CallStaticJava);
12515   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12516   effect(USE meth);
12517   // RBP is saved by all callees (for interpreter stack correction).
12518   // We use it here for a similar purpose, in {preserve,restore}_SP.
12519 
12520   ins_cost(300);
12521   format %{ "call,static/MethodHandle " %}
12522   opcode(0xE8); /* E8 cd */
12523   ins_encode(preserve_SP,
12524              Java_Static_Call(meth),
12525              restore_SP,
12526              call_epilog);
12527   ins_pipe(pipe_slow);
12528   ins_pc_relative(1);
12529   ins_alignment(4);
12530 %}
12531 
12532 // Call Java Dynamic Instruction
12533 // Note: If this code changes, the corresponding ret_addr_offset() and
12534 //       compute_padding() functions will have to be adjusted.
12535 instruct CallDynamicJavaDirect(method meth)
12536 %{
12537   match(CallDynamicJava);
12538   effect(USE meth);
12539 
12540   ins_cost(300);
12541   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12542             "call,dynamic " %}
12543   opcode(0xE8); /* E8 cd */
12544   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12545   ins_pipe(pipe_slow);
12546   ins_pc_relative(1);
12547   ins_alignment(4);
12548 %}
12549 
12550 // Call Runtime Instruction
12551 instruct CallRuntimeDirect(method meth)
12552 %{
12553   match(CallRuntime);
12554   effect(USE meth);
12555 
12556   ins_cost(300);
12557   format %{ "call,runtime " %}
12558   opcode(0xE8); /* E8 cd */
12559   ins_encode(Java_To_Runtime(meth));
12560   ins_pipe(pipe_slow);
12561   ins_pc_relative(1);
12562 %}
12563 
12564 // Call runtime without safepoint
12565 instruct CallLeafDirect(method meth)
12566 %{
12567   match(CallLeaf);
12568   effect(USE meth);
12569 
12570   ins_cost(300);
12571   format %{ "call_leaf,runtime " %}
12572   opcode(0xE8); /* E8 cd */
12573   ins_encode(Java_To_Runtime(meth));
12574   ins_pipe(pipe_slow);
12575   ins_pc_relative(1);
12576 %}
12577 
12578 // Call runtime without safepoint
12579 instruct CallLeafNoFPDirect(method meth)
12580 %{
12581   match(CallLeafNoFP);
12582   effect(USE meth);
12583 
12584   ins_cost(300);
12585   format %{ "call_leaf_nofp,runtime " %}
12586   opcode(0xE8); /* E8 cd */
12587   ins_encode(Java_To_Runtime(meth));
12588   ins_pipe(pipe_slow);
12589   ins_pc_relative(1);
12590 %}
12591 
12592 // Return Instruction
12593 // Remove the return address & jump to it.
12594 // Notice: We always emit a nop after a ret to make sure there is room
12595 // for safepoint patching
12596 instruct Ret()
12597 %{
12598   match(Return);
12599 
12600   format %{ "ret" %}
12601   opcode(0xC3);
12602   ins_encode(OpcP);
12603   ins_pipe(pipe_jmp);
12604 %}
12605 
12606 // Tail Call; Jump from runtime stub to Java code.
12607 // Also known as an 'interprocedural jump'.
12608 // Target of jump will eventually return to caller.
12609 // TailJump below removes the return address.
12610 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12611 %{
12612   match(TailCall jump_target method_oop);
12613 
12614   ins_cost(300);
12615   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12616   opcode(0xFF, 0x4); /* Opcode FF /4 */
12617   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12618   ins_pipe(pipe_jmp);
12619 %}
12620 
12621 // Tail Jump; remove the return address; jump to target.
12622 // TailCall above leaves the return address around.
12623 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12624 %{
12625   match(TailJump jump_target ex_oop);
12626 
12627   ins_cost(300);
12628   format %{ "popq    rdx\t# pop return address\n\t"
12629             "jmp     $jump_target" %}
12630   opcode(0xFF, 0x4); /* Opcode FF /4 */
12631   ins_encode(Opcode(0x5a), // popq rdx
12632              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12633   ins_pipe(pipe_jmp);
12634 %}
12635 
12636 // Create exception oop: created by stack-crawling runtime code.
12637 // Created exception is now available to this handler, and is setup
12638 // just prior to jumping to this handler.  No code emitted.
12639 instruct CreateException(rax_RegP ex_oop)
12640 %{
12641   match(Set ex_oop (CreateEx));
12642 
12643   size(0);
12644   // use the following format syntax
12645   format %{ "# exception oop is in rax; no code emitted" %}
12646   ins_encode();
12647   ins_pipe(empty);
12648 %}
12649 
12650 // Rethrow exception:
12651 // The exception oop will come in the first argument position.
12652 // Then JUMP (not call) to the rethrow stub code.
12653 instruct RethrowException()
12654 %{
12655   match(Rethrow);
12656 
12657   // use the following format syntax
12658   format %{ "jmp     rethrow_stub" %}
12659   ins_encode(enc_rethrow);
12660   ins_pipe(pipe_jmp);
12661 %}
12662 
12663 
12664 //----------PEEPHOLE RULES-----------------------------------------------------
12665 // These must follow all instruction definitions as they use the names
12666 // defined in the instructions definitions.
12667 //
12668 // peepmatch ( root_instr_name [preceding_instruction]* );
12669 //
12670 // peepconstraint %{
12671 // (instruction_number.operand_name relational_op instruction_number.operand_name
12672 //  [, ...] );
12673 // // instruction numbers are zero-based using left to right order in peepmatch
12674 //
12675 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12676 // // provide an instruction_number.operand_name for each operand that appears
12677 // // in the replacement instruction's match rule
12678 //
12679 // ---------VM FLAGS---------------------------------------------------------
12680 //
12681 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12682 //
12683 // Each peephole rule is given an identifying number starting with zero and
12684 // increasing by one in the order seen by the parser.  An individual peephole
12685 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12686 // on the command-line.
12687 //
12688 // ---------CURRENT LIMITATIONS----------------------------------------------
12689 //
12690 // Only match adjacent instructions in same basic block
12691 // Only equality constraints
12692 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12693 // Only one replacement instruction
12694 //
12695 // ---------EXAMPLE----------------------------------------------------------
12696 //
12697 // // pertinent parts of existing instructions in architecture description
12698 // instruct movI(rRegI dst, rRegI src)
12699 // %{
12700 //   match(Set dst (CopyI src));
12701 // %}
12702 //
12703 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12704 // %{
12705 //   match(Set dst (AddI dst src));
12706 //   effect(KILL cr);
12707 // %}
12708 //
12709 // // Change (inc mov) to lea
12710 // peephole %{
12711 //   // increment preceeded by register-register move
12712 //   peepmatch ( incI_rReg movI );
12713 //   // require that the destination register of the increment
12714 //   // match the destination register of the move
12715 //   peepconstraint ( 0.dst == 1.dst );
12716 //   // construct a replacement instruction that sets
12717 //   // the destination to ( move's source register + one )
12718 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12719 // %}
12720 //
12721 
12722 // Implementation no longer uses movX instructions since
12723 // machine-independent system no longer uses CopyX nodes.
12724 //
12725 // peephole
12726 // %{
12727 //   peepmatch (incI_rReg movI);
12728 //   peepconstraint (0.dst == 1.dst);
12729 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12730 // %}
12731 
12732 // peephole
12733 // %{
12734 //   peepmatch (decI_rReg movI);
12735 //   peepconstraint (0.dst == 1.dst);
12736 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12737 // %}
12738 
12739 // peephole
12740 // %{
12741 //   peepmatch (addI_rReg_imm movI);
12742 //   peepconstraint (0.dst == 1.dst);
12743 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12744 // %}
12745 
12746 // peephole
12747 // %{
12748 //   peepmatch (incL_rReg movL);
12749 //   peepconstraint (0.dst == 1.dst);
12750 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12751 // %}
12752 
12753 // peephole
12754 // %{
12755 //   peepmatch (decL_rReg movL);
12756 //   peepconstraint (0.dst == 1.dst);
12757 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12758 // %}
12759 
12760 // peephole
12761 // %{
12762 //   peepmatch (addL_rReg_imm movL);
12763 //   peepconstraint (0.dst == 1.dst);
12764 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12765 // %}
12766 
12767 // peephole
12768 // %{
12769 //   peepmatch (addP_rReg_imm movP);
12770 //   peepconstraint (0.dst == 1.dst);
12771 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12772 // %}
12773 
12774 // // Change load of spilled value to only a spill
12775 // instruct storeI(memory mem, rRegI src)
12776 // %{
12777 //   match(Set mem (StoreI mem src));
12778 // %}
12779 //
12780 // instruct loadI(rRegI dst, memory mem)
12781 // %{
12782 //   match(Set dst (LoadI mem));
12783 // %}
12784 //
12785 
12786 peephole
12787 %{
12788   peepmatch (loadI storeI);
12789   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12790   peepreplace (storeI(1.mem 1.mem 1.src));
12791 %}
12792 
12793 peephole
12794 %{
12795   peepmatch (loadL storeL);
12796   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12797   peepreplace (storeL(1.mem 1.mem 1.src));
12798 %}
12799 
12800 //----------SMARTSPILL RULES---------------------------------------------------
12801 // These must follow all instruction definitions as they use the names
12802 // defined in the instructions definitions.