1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 
 834 //=============================================================================
 835 const bool Matcher::constant_table_absolute_addressing = true;
 836 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 837 
 838 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 839   // Empty encoding
 840 }
 841 
 842 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 843   return 0;
 844 }
 845 
 846 #ifndef PRODUCT
 847 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 848   st->print("# MachConstantBaseNode (empty encoding)");
 849 }
 850 #endif
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.insts_size());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq   rbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print("\t");
1001     if (Assembler::is_polling_page_far()) {
1002       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1003                    "testl  rax, [rscratch1]\t"
1004                    "# Safepoint: poll for GC");
1005     } else {
1006       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1007                    "# Safepoint: poll for GC");
1008     }
1009   }
1010 }
1011 #endif
1012 
1013 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1014 {
1015   Compile* C = ra_->C;
1016   int framesize = C->frame_slots() << LogBytesPerInt;
1017   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1018   // Remove word for return adr already pushed
1019   // and RBP
1020   framesize -= 2*wordSize;
1021 
1022   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1023 
1024   if (framesize) {
1025     emit_opcode(cbuf, Assembler::REX_W);
1026     if (framesize < 0x80) {
1027       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1028       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1029       emit_d8(cbuf, framesize);
1030     } else {
1031       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1032       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1033       emit_d32(cbuf, framesize);
1034     }
1035   }
1036 
1037   // popq rbp
1038   emit_opcode(cbuf, 0x58 | RBP_enc);
1039 
1040   if (do_polling() && C->is_method_compilation()) {
1041     MacroAssembler _masm(&cbuf);
1042     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1043     if (Assembler::is_polling_page_far()) {
1044       __ lea(rscratch1, polling_page);
1045       __ relocate(relocInfo::poll_return_type);
1046       __ testl(rax, Address(rscratch1, 0));
1047     } else {
1048       __ testl(rax, polling_page);
1049     }
1050   }
1051 }
1052 
1053 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1054 {
1055   return MachNode::size(ra_); // too many variables; just compute it
1056                               // the hard way
1057 }
1058 
1059 int MachEpilogNode::reloc() const
1060 {
1061   return 2; // a large enough number
1062 }
1063 
1064 const Pipeline* MachEpilogNode::pipeline() const
1065 {
1066   return MachNode::pipeline_class();
1067 }
1068 
1069 int MachEpilogNode::safepoint_offset() const
1070 {
1071   return 0;
1072 }
1073 
1074 //=============================================================================
1075 
1076 enum RC {
1077   rc_bad,
1078   rc_int,
1079   rc_float,
1080   rc_stack
1081 };
1082 
1083 static enum RC rc_class(OptoReg::Name reg)
1084 {
1085   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1086 
1087   if (OptoReg::is_stack(reg)) return rc_stack;
1088 
1089   VMReg r = OptoReg::as_VMReg(reg);
1090 
1091   if (r->is_Register()) return rc_int;
1092 
1093   assert(r->is_XMMRegister(), "must be");
1094   return rc_float;
1095 }
1096 
1097 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1098                                        PhaseRegAlloc* ra_,
1099                                        bool do_size,
1100                                        outputStream* st) const
1101 {
1102 
1103   // Get registers to move
1104   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1105   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1106   OptoReg::Name dst_second = ra_->get_reg_second(this);
1107   OptoReg::Name dst_first = ra_->get_reg_first(this);
1108 
1109   enum RC src_second_rc = rc_class(src_second);
1110   enum RC src_first_rc = rc_class(src_first);
1111   enum RC dst_second_rc = rc_class(dst_second);
1112   enum RC dst_first_rc = rc_class(dst_first);
1113 
1114   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1115          "must move at least 1 register" );
1116 
1117   if (src_first == dst_first && src_second == dst_second) {
1118     // Self copy, no move
1119     return 0;
1120   } else if (src_first_rc == rc_stack) {
1121     // mem ->
1122     if (dst_first_rc == rc_stack) {
1123       // mem -> mem
1124       assert(src_second != dst_first, "overlap");
1125       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1126           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1127         // 64-bit
1128         int src_offset = ra_->reg2offset(src_first);
1129         int dst_offset = ra_->reg2offset(dst_first);
1130         if (cbuf) {
1131           emit_opcode(*cbuf, 0xFF);
1132           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1133 
1134           emit_opcode(*cbuf, 0x8F);
1135           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1136 
1137 #ifndef PRODUCT
1138         } else if (!do_size) {
1139           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1140                      "popq    [rsp + #%d]",
1141                      src_offset,
1142                      dst_offset);
1143 #endif
1144         }
1145         return
1146           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1147           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1148       } else {
1149         // 32-bit
1150         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1151         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1152         // No pushl/popl, so:
1153         int src_offset = ra_->reg2offset(src_first);
1154         int dst_offset = ra_->reg2offset(dst_first);
1155         if (cbuf) {
1156           emit_opcode(*cbuf, Assembler::REX_W);
1157           emit_opcode(*cbuf, 0x89);
1158           emit_opcode(*cbuf, 0x44);
1159           emit_opcode(*cbuf, 0x24);
1160           emit_opcode(*cbuf, 0xF8);
1161 
1162           emit_opcode(*cbuf, 0x8B);
1163           encode_RegMem(*cbuf,
1164                         RAX_enc,
1165                         RSP_enc, 0x4, 0, src_offset,
1166                         false);
1167 
1168           emit_opcode(*cbuf, 0x89);
1169           encode_RegMem(*cbuf,
1170                         RAX_enc,
1171                         RSP_enc, 0x4, 0, dst_offset,
1172                         false);
1173 
1174           emit_opcode(*cbuf, Assembler::REX_W);
1175           emit_opcode(*cbuf, 0x8B);
1176           emit_opcode(*cbuf, 0x44);
1177           emit_opcode(*cbuf, 0x24);
1178           emit_opcode(*cbuf, 0xF8);
1179 
1180 #ifndef PRODUCT
1181         } else if (!do_size) {
1182           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1183                      "movl    rax, [rsp + #%d]\n\t"
1184                      "movl    [rsp + #%d], rax\n\t"
1185                      "movq    rax, [rsp - #8]",
1186                      src_offset,
1187                      dst_offset);
1188 #endif
1189         }
1190         return
1191           5 + // movq
1192           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1193           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1194           5; // movq
1195       }
1196     } else if (dst_first_rc == rc_int) {
1197       // mem -> gpr
1198       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1199           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1200         // 64-bit
1201         int offset = ra_->reg2offset(src_first);
1202         if (cbuf) {
1203           if (Matcher::_regEncode[dst_first] < 8) {
1204             emit_opcode(*cbuf, Assembler::REX_W);
1205           } else {
1206             emit_opcode(*cbuf, Assembler::REX_WR);
1207           }
1208           emit_opcode(*cbuf, 0x8B);
1209           encode_RegMem(*cbuf,
1210                         Matcher::_regEncode[dst_first],
1211                         RSP_enc, 0x4, 0, offset,
1212                         false);
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    %s, [rsp + #%d]\t# spill",
1216                      Matcher::regName[dst_first],
1217                      offset);
1218 #endif
1219         }
1220         return
1221           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1222       } else {
1223         // 32-bit
1224         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1225         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1226         int offset = ra_->reg2offset(src_first);
1227         if (cbuf) {
1228           if (Matcher::_regEncode[dst_first] >= 8) {
1229             emit_opcode(*cbuf, Assembler::REX_R);
1230           }
1231           emit_opcode(*cbuf, 0x8B);
1232           encode_RegMem(*cbuf,
1233                         Matcher::_regEncode[dst_first],
1234                         RSP_enc, 0x4, 0, offset,
1235                         false);
1236 #ifndef PRODUCT
1237         } else if (!do_size) {
1238           st->print("movl    %s, [rsp + #%d]\t# spill",
1239                      Matcher::regName[dst_first],
1240                      offset);
1241 #endif
1242         }
1243         return
1244           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1245           ((Matcher::_regEncode[dst_first] < 8)
1246            ? 3
1247            : 4); // REX
1248       }
1249     } else if (dst_first_rc == rc_float) {
1250       // mem-> xmm
1251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1253         // 64-bit
1254         int offset = ra_->reg2offset(src_first);
1255         if (cbuf) {
1256           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1257           if (Matcher::_regEncode[dst_first] >= 8) {
1258             emit_opcode(*cbuf, Assembler::REX_R);
1259           }
1260           emit_opcode(*cbuf, 0x0F);
1261           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1262           encode_RegMem(*cbuf,
1263                         Matcher::_regEncode[dst_first],
1264                         RSP_enc, 0x4, 0, offset,
1265                         false);
1266 #ifndef PRODUCT
1267         } else if (!do_size) {
1268           st->print("%s  %s, [rsp + #%d]\t# spill",
1269                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1270                      Matcher::regName[dst_first],
1271                      offset);
1272 #endif
1273         }
1274         return
1275           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1276           ((Matcher::_regEncode[dst_first] < 8)
1277            ? 5
1278            : 6); // REX
1279       } else {
1280         // 32-bit
1281         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1282         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1283         int offset = ra_->reg2offset(src_first);
1284         if (cbuf) {
1285           emit_opcode(*cbuf, 0xF3);
1286           if (Matcher::_regEncode[dst_first] >= 8) {
1287             emit_opcode(*cbuf, Assembler::REX_R);
1288           }
1289           emit_opcode(*cbuf, 0x0F);
1290           emit_opcode(*cbuf, 0x10);
1291           encode_RegMem(*cbuf,
1292                         Matcher::_regEncode[dst_first],
1293                         RSP_enc, 0x4, 0, offset,
1294                         false);
1295 #ifndef PRODUCT
1296         } else if (!do_size) {
1297           st->print("movss   %s, [rsp + #%d]\t# spill",
1298                      Matcher::regName[dst_first],
1299                      offset);
1300 #endif
1301         }
1302         return
1303           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1304           ((Matcher::_regEncode[dst_first] < 8)
1305            ? 5
1306            : 6); // REX
1307       }
1308     }
1309   } else if (src_first_rc == rc_int) {
1310     // gpr ->
1311     if (dst_first_rc == rc_stack) {
1312       // gpr -> mem
1313       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1314           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1315         // 64-bit
1316         int offset = ra_->reg2offset(dst_first);
1317         if (cbuf) {
1318           if (Matcher::_regEncode[src_first] < 8) {
1319             emit_opcode(*cbuf, Assembler::REX_W);
1320           } else {
1321             emit_opcode(*cbuf, Assembler::REX_WR);
1322           }
1323           emit_opcode(*cbuf, 0x89);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[src_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movq    [rsp + #%d], %s\t# spill",
1331                      offset,
1332                      Matcher::regName[src_first]);
1333 #endif
1334         }
1335         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1336       } else {
1337         // 32-bit
1338         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1339         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1340         int offset = ra_->reg2offset(dst_first);
1341         if (cbuf) {
1342           if (Matcher::_regEncode[src_first] >= 8) {
1343             emit_opcode(*cbuf, Assembler::REX_R);
1344           }
1345           emit_opcode(*cbuf, 0x89);
1346           encode_RegMem(*cbuf,
1347                         Matcher::_regEncode[src_first],
1348                         RSP_enc, 0x4, 0, offset,
1349                         false);
1350 #ifndef PRODUCT
1351         } else if (!do_size) {
1352           st->print("movl    [rsp + #%d], %s\t# spill",
1353                      offset,
1354                      Matcher::regName[src_first]);
1355 #endif
1356         }
1357         return
1358           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1359           ((Matcher::_regEncode[src_first] < 8)
1360            ? 3
1361            : 4); // REX
1362       }
1363     } else if (dst_first_rc == rc_int) {
1364       // gpr -> gpr
1365       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1366           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1367         // 64-bit
1368         if (cbuf) {
1369           if (Matcher::_regEncode[dst_first] < 8) {
1370             if (Matcher::_regEncode[src_first] < 8) {
1371               emit_opcode(*cbuf, Assembler::REX_W);
1372             } else {
1373               emit_opcode(*cbuf, Assembler::REX_WB);
1374             }
1375           } else {
1376             if (Matcher::_regEncode[src_first] < 8) {
1377               emit_opcode(*cbuf, Assembler::REX_WR);
1378             } else {
1379               emit_opcode(*cbuf, Assembler::REX_WRB);
1380             }
1381           }
1382           emit_opcode(*cbuf, 0x8B);
1383           emit_rm(*cbuf, 0x3,
1384                   Matcher::_regEncode[dst_first] & 7,
1385                   Matcher::_regEncode[src_first] & 7);
1386 #ifndef PRODUCT
1387         } else if (!do_size) {
1388           st->print("movq    %s, %s\t# spill",
1389                      Matcher::regName[dst_first],
1390                      Matcher::regName[src_first]);
1391 #endif
1392         }
1393         return 3; // REX
1394       } else {
1395         // 32-bit
1396         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1397         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1398         if (cbuf) {
1399           if (Matcher::_regEncode[dst_first] < 8) {
1400             if (Matcher::_regEncode[src_first] >= 8) {
1401               emit_opcode(*cbuf, Assembler::REX_B);
1402             }
1403           } else {
1404             if (Matcher::_regEncode[src_first] < 8) {
1405               emit_opcode(*cbuf, Assembler::REX_R);
1406             } else {
1407               emit_opcode(*cbuf, Assembler::REX_RB);
1408             }
1409           }
1410           emit_opcode(*cbuf, 0x8B);
1411           emit_rm(*cbuf, 0x3,
1412                   Matcher::_regEncode[dst_first] & 7,
1413                   Matcher::_regEncode[src_first] & 7);
1414 #ifndef PRODUCT
1415         } else if (!do_size) {
1416           st->print("movl    %s, %s\t# spill",
1417                      Matcher::regName[dst_first],
1418                      Matcher::regName[src_first]);
1419 #endif
1420         }
1421         return
1422           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1423           ? 2
1424           : 3; // REX
1425       }
1426     } else if (dst_first_rc == rc_float) {
1427       // gpr -> xmm
1428       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1429           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1430         // 64-bit
1431         if (cbuf) {
1432           emit_opcode(*cbuf, 0x66);
1433           if (Matcher::_regEncode[dst_first] < 8) {
1434             if (Matcher::_regEncode[src_first] < 8) {
1435               emit_opcode(*cbuf, Assembler::REX_W);
1436             } else {
1437               emit_opcode(*cbuf, Assembler::REX_WB);
1438             }
1439           } else {
1440             if (Matcher::_regEncode[src_first] < 8) {
1441               emit_opcode(*cbuf, Assembler::REX_WR);
1442             } else {
1443               emit_opcode(*cbuf, Assembler::REX_WRB);
1444             }
1445           }
1446           emit_opcode(*cbuf, 0x0F);
1447           emit_opcode(*cbuf, 0x6E);
1448           emit_rm(*cbuf, 0x3,
1449                   Matcher::_regEncode[dst_first] & 7,
1450                   Matcher::_regEncode[src_first] & 7);
1451 #ifndef PRODUCT
1452         } else if (!do_size) {
1453           st->print("movdq   %s, %s\t# spill",
1454                      Matcher::regName[dst_first],
1455                      Matcher::regName[src_first]);
1456 #endif
1457         }
1458         return 5; // REX
1459       } else {
1460         // 32-bit
1461         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1462         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1463         if (cbuf) {
1464           emit_opcode(*cbuf, 0x66);
1465           if (Matcher::_regEncode[dst_first] < 8) {
1466             if (Matcher::_regEncode[src_first] >= 8) {
1467               emit_opcode(*cbuf, Assembler::REX_B);
1468             }
1469           } else {
1470             if (Matcher::_regEncode[src_first] < 8) {
1471               emit_opcode(*cbuf, Assembler::REX_R);
1472             } else {
1473               emit_opcode(*cbuf, Assembler::REX_RB);
1474             }
1475           }
1476           emit_opcode(*cbuf, 0x0F);
1477           emit_opcode(*cbuf, 0x6E);
1478           emit_rm(*cbuf, 0x3,
1479                   Matcher::_regEncode[dst_first] & 7,
1480                   Matcher::_regEncode[src_first] & 7);
1481 #ifndef PRODUCT
1482         } else if (!do_size) {
1483           st->print("movdl   %s, %s\t# spill",
1484                      Matcher::regName[dst_first],
1485                      Matcher::regName[src_first]);
1486 #endif
1487         }
1488         return
1489           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1490           ? 4
1491           : 5; // REX
1492       }
1493     }
1494   } else if (src_first_rc == rc_float) {
1495     // xmm ->
1496     if (dst_first_rc == rc_stack) {
1497       // xmm -> mem
1498       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1499           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1500         // 64-bit
1501         int offset = ra_->reg2offset(dst_first);
1502         if (cbuf) {
1503           emit_opcode(*cbuf, 0xF2);
1504           if (Matcher::_regEncode[src_first] >= 8) {
1505               emit_opcode(*cbuf, Assembler::REX_R);
1506           }
1507           emit_opcode(*cbuf, 0x0F);
1508           emit_opcode(*cbuf, 0x11);
1509           encode_RegMem(*cbuf,
1510                         Matcher::_regEncode[src_first],
1511                         RSP_enc, 0x4, 0, offset,
1512                         false);
1513 #ifndef PRODUCT
1514         } else if (!do_size) {
1515           st->print("movsd   [rsp + #%d], %s\t# spill",
1516                      offset,
1517                      Matcher::regName[src_first]);
1518 #endif
1519         }
1520         return
1521           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1522           ((Matcher::_regEncode[src_first] < 8)
1523            ? 5
1524            : 6); // REX
1525       } else {
1526         // 32-bit
1527         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1528         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1529         int offset = ra_->reg2offset(dst_first);
1530         if (cbuf) {
1531           emit_opcode(*cbuf, 0xF3);
1532           if (Matcher::_regEncode[src_first] >= 8) {
1533               emit_opcode(*cbuf, Assembler::REX_R);
1534           }
1535           emit_opcode(*cbuf, 0x0F);
1536           emit_opcode(*cbuf, 0x11);
1537           encode_RegMem(*cbuf,
1538                         Matcher::_regEncode[src_first],
1539                         RSP_enc, 0x4, 0, offset,
1540                         false);
1541 #ifndef PRODUCT
1542         } else if (!do_size) {
1543           st->print("movss   [rsp + #%d], %s\t# spill",
1544                      offset,
1545                      Matcher::regName[src_first]);
1546 #endif
1547         }
1548         return
1549           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1550           ((Matcher::_regEncode[src_first] < 8)
1551            ? 5
1552            : 6); // REX
1553       }
1554     } else if (dst_first_rc == rc_int) {
1555       // xmm -> gpr
1556       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1557           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1558         // 64-bit
1559         if (cbuf) {
1560           emit_opcode(*cbuf, 0x66);
1561           if (Matcher::_regEncode[dst_first] < 8) {
1562             if (Matcher::_regEncode[src_first] < 8) {
1563               emit_opcode(*cbuf, Assembler::REX_W);
1564             } else {
1565               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1566             }
1567           } else {
1568             if (Matcher::_regEncode[src_first] < 8) {
1569               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1570             } else {
1571               emit_opcode(*cbuf, Assembler::REX_WRB);
1572             }
1573           }
1574           emit_opcode(*cbuf, 0x0F);
1575           emit_opcode(*cbuf, 0x7E);
1576           emit_rm(*cbuf, 0x3,
1577                   Matcher::_regEncode[src_first] & 7,
1578                   Matcher::_regEncode[dst_first] & 7);
1579 #ifndef PRODUCT
1580         } else if (!do_size) {
1581           st->print("movdq   %s, %s\t# spill",
1582                      Matcher::regName[dst_first],
1583                      Matcher::regName[src_first]);
1584 #endif
1585         }
1586         return 5; // REX
1587       } else {
1588         // 32-bit
1589         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1590         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1591         if (cbuf) {
1592           emit_opcode(*cbuf, 0x66);
1593           if (Matcher::_regEncode[dst_first] < 8) {
1594             if (Matcher::_regEncode[src_first] >= 8) {
1595               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1596             }
1597           } else {
1598             if (Matcher::_regEncode[src_first] < 8) {
1599               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1600             } else {
1601               emit_opcode(*cbuf, Assembler::REX_RB);
1602             }
1603           }
1604           emit_opcode(*cbuf, 0x0F);
1605           emit_opcode(*cbuf, 0x7E);
1606           emit_rm(*cbuf, 0x3,
1607                   Matcher::_regEncode[src_first] & 7,
1608                   Matcher::_regEncode[dst_first] & 7);
1609 #ifndef PRODUCT
1610         } else if (!do_size) {
1611           st->print("movdl   %s, %s\t# spill",
1612                      Matcher::regName[dst_first],
1613                      Matcher::regName[src_first]);
1614 #endif
1615         }
1616         return
1617           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1618           ? 4
1619           : 5; // REX
1620       }
1621     } else if (dst_first_rc == rc_float) {
1622       // xmm -> xmm
1623       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1624           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1625         // 64-bit
1626         if (cbuf) {
1627           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1628           if (Matcher::_regEncode[dst_first] < 8) {
1629             if (Matcher::_regEncode[src_first] >= 8) {
1630               emit_opcode(*cbuf, Assembler::REX_B);
1631             }
1632           } else {
1633             if (Matcher::_regEncode[src_first] < 8) {
1634               emit_opcode(*cbuf, Assembler::REX_R);
1635             } else {
1636               emit_opcode(*cbuf, Assembler::REX_RB);
1637             }
1638           }
1639           emit_opcode(*cbuf, 0x0F);
1640           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1641           emit_rm(*cbuf, 0x3,
1642                   Matcher::_regEncode[dst_first] & 7,
1643                   Matcher::_regEncode[src_first] & 7);
1644 #ifndef PRODUCT
1645         } else if (!do_size) {
1646           st->print("%s  %s, %s\t# spill",
1647                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1648                      Matcher::regName[dst_first],
1649                      Matcher::regName[src_first]);
1650 #endif
1651         }
1652         return
1653           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1654           ? 4
1655           : 5; // REX
1656       } else {
1657         // 32-bit
1658         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1659         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1660         if (cbuf) {
1661           if (!UseXmmRegToRegMoveAll)
1662             emit_opcode(*cbuf, 0xF3);
1663           if (Matcher::_regEncode[dst_first] < 8) {
1664             if (Matcher::_regEncode[src_first] >= 8) {
1665               emit_opcode(*cbuf, Assembler::REX_B);
1666             }
1667           } else {
1668             if (Matcher::_regEncode[src_first] < 8) {
1669               emit_opcode(*cbuf, Assembler::REX_R);
1670             } else {
1671               emit_opcode(*cbuf, Assembler::REX_RB);
1672             }
1673           }
1674           emit_opcode(*cbuf, 0x0F);
1675           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1676           emit_rm(*cbuf, 0x3,
1677                   Matcher::_regEncode[dst_first] & 7,
1678                   Matcher::_regEncode[src_first] & 7);
1679 #ifndef PRODUCT
1680         } else if (!do_size) {
1681           st->print("%s  %s, %s\t# spill",
1682                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1683                      Matcher::regName[dst_first],
1684                      Matcher::regName[src_first]);
1685 #endif
1686         }
1687         return
1688           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1689           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1690           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1691       }
1692     }
1693   }
1694 
1695   assert(0," foo ");
1696   Unimplemented();
1697 
1698   return 0;
1699 }
1700 
1701 #ifndef PRODUCT
1702 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1703 {
1704   implementation(NULL, ra_, false, st);
1705 }
1706 #endif
1707 
1708 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1709 {
1710   implementation(&cbuf, ra_, false, NULL);
1711 }
1712 
1713 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1714 {
1715   return implementation(NULL, ra_, true, NULL);
1716 }
1717 
1718 //=============================================================================
1719 #ifndef PRODUCT
1720 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1721 {
1722   st->print("nop \t# %d bytes pad for loops and calls", _count);
1723 }
1724 #endif
1725 
1726 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1727 {
1728   MacroAssembler _masm(&cbuf);
1729   __ nop(_count);
1730 }
1731 
1732 uint MachNopNode::size(PhaseRegAlloc*) const
1733 {
1734   return _count;
1735 }
1736 
1737 
1738 //=============================================================================
1739 #ifndef PRODUCT
1740 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1741 {
1742   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1743   int reg = ra_->get_reg_first(this);
1744   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1745             Matcher::regName[reg], offset);
1746 }
1747 #endif
1748 
1749 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1750 {
1751   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1752   int reg = ra_->get_encode(this);
1753   if (offset >= 0x80) {
1754     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1755     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1756     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1757     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1758     emit_d32(cbuf, offset);
1759   } else {
1760     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1761     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1762     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1763     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1764     emit_d8(cbuf, offset);
1765   }
1766 }
1767 
1768 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1769 {
1770   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1771   return (offset < 0x80) ? 5 : 8; // REX
1772 }
1773 
1774 //=============================================================================
1775 
1776 // emit call stub, compiled java to interpreter
1777 void emit_java_to_interp(CodeBuffer& cbuf)
1778 {
1779   // Stub is fixed up when the corresponding call is converted from
1780   // calling compiled code to calling interpreted code.
1781   // movq rbx, 0
1782   // jmp -5 # to self
1783 
1784   address mark = cbuf.insts_mark();  // get mark within main instrs section
1785 
1786   // Note that the code buffer's insts_mark is always relative to insts.
1787   // That's why we must use the macroassembler to generate a stub.
1788   MacroAssembler _masm(&cbuf);
1789 
1790   address base =
1791   __ start_a_stub(Compile::MAX_stubs_size);
1792   if (base == NULL)  return;  // CodeBuffer::expand failed
1793   // static stub relocation stores the instruction address of the call
1794   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1795   // static stub relocation also tags the methodOop in the code-stream.
1796   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1797   // This is recognized as unresolved by relocs/nativeinst/ic code
1798   __ jump(RuntimeAddress(__ pc()));
1799 
1800   // Update current stubs pointer and restore insts_end.
1801   __ end_a_stub();
1802 }
1803 
1804 // size of call stub, compiled java to interpretor
1805 uint size_java_to_interp()
1806 {
1807   return 15;  // movq (1+1+8); jmp (1+4)
1808 }
1809 
1810 // relocation entries for call stub, compiled java to interpretor
1811 uint reloc_java_to_interp()
1812 {
1813   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1814 }
1815 
1816 //=============================================================================
1817 #ifndef PRODUCT
1818 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1819 {
1820   if (UseCompressedOops) {
1821     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1822     if (Universe::narrow_oop_shift() != 0) {
1823       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1824     }
1825     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1826   } else {
1827     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1828                  "# Inline cache check");
1829   }
1830   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1831   st->print_cr("\tnop\t# nops to align entry point");
1832 }
1833 #endif
1834 
1835 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1836 {
1837   MacroAssembler masm(&cbuf);
1838   uint insts_size = cbuf.insts_size();
1839   if (UseCompressedOops) {
1840     masm.load_klass(rscratch1, j_rarg0);
1841     masm.cmpptr(rax, rscratch1);
1842   } else {
1843     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1844   }
1845 
1846   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1847 
1848   /* WARNING these NOPs are critical so that verified entry point is properly
1849      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1850   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1851   if (OptoBreakpoint) {
1852     // Leave space for int3
1853     nops_cnt -= 1;
1854   }
1855   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1856   if (nops_cnt > 0)
1857     masm.nop(nops_cnt);
1858 }
1859 
1860 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1861 {
1862   return MachNode::size(ra_); // too many variables; just compute it
1863                               // the hard way
1864 }
1865 
1866 
1867 //=============================================================================
1868 uint size_exception_handler()
1869 {
1870   // NativeCall instruction size is the same as NativeJump.
1871   // Note that this value is also credited (in output.cpp) to
1872   // the size of the code section.
1873   return NativeJump::instruction_size;
1874 }
1875 
1876 // Emit exception handler code.
1877 int emit_exception_handler(CodeBuffer& cbuf)
1878 {
1879 
1880   // Note that the code buffer's insts_mark is always relative to insts.
1881   // That's why we must use the macroassembler to generate a handler.
1882   MacroAssembler _masm(&cbuf);
1883   address base =
1884   __ start_a_stub(size_exception_handler());
1885   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1886   int offset = __ offset();
1887   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1888   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1889   __ end_a_stub();
1890   return offset;
1891 }
1892 
1893 uint size_deopt_handler()
1894 {
1895   // three 5 byte instructions
1896   return 15;
1897 }
1898 
1899 // Emit deopt handler code.
1900 int emit_deopt_handler(CodeBuffer& cbuf)
1901 {
1902 
1903   // Note that the code buffer's insts_mark is always relative to insts.
1904   // That's why we must use the macroassembler to generate a handler.
1905   MacroAssembler _masm(&cbuf);
1906   address base =
1907   __ start_a_stub(size_deopt_handler());
1908   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1909   int offset = __ offset();
1910   address the_pc = (address) __ pc();
1911   Label next;
1912   // push a "the_pc" on the stack without destroying any registers
1913   // as they all may be live.
1914 
1915   // push address of "next"
1916   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1917   __ bind(next);
1918   // adjust it so it matches "the_pc"
1919   __ subptr(Address(rsp, 0), __ offset() - offset);
1920   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1921   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1922   __ end_a_stub();
1923   return offset;
1924 }
1925 
1926 
1927 const bool Matcher::match_rule_supported(int opcode) {
1928   if (!has_match_rule(opcode))
1929     return false;
1930 
1931   return true;  // Per default match rules are supported.
1932 }
1933 
1934 int Matcher::regnum_to_fpu_offset(int regnum)
1935 {
1936   return regnum - 32; // The FP registers are in the second chunk
1937 }
1938 
1939 // This is UltraSparc specific, true just means we have fast l2f conversion
1940 const bool Matcher::convL2FSupported(void) {
1941   return true;
1942 }
1943 
1944 // Vector width in bytes
1945 const uint Matcher::vector_width_in_bytes(void) {
1946   return 8;
1947 }
1948 
1949 // Vector ideal reg
1950 const uint Matcher::vector_ideal_reg(void) {
1951   return Op_RegD;
1952 }
1953 
1954 // Is this branch offset short enough that a short branch can be used?
1955 //
1956 // NOTE: If the platform does not provide any short branch variants, then
1957 //       this method should return false for offset 0.
1958 bool Matcher::is_short_branch_offset(int rule, int offset) {
1959   // the short version of jmpConUCF2 contains multiple branches,
1960   // making the reach slightly less
1961   if (rule == jmpConUCF2_rule)
1962     return (-126 <= offset && offset <= 125);
1963   return (-128 <= offset && offset <= 127);
1964 }
1965 
1966 const bool Matcher::isSimpleConstant64(jlong value) {
1967   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1968   //return value == (int) value;  // Cf. storeImmL and immL32.
1969 
1970   // Probably always true, even if a temp register is required.
1971   return true;
1972 }
1973 
1974 // The ecx parameter to rep stosq for the ClearArray node is in words.
1975 const bool Matcher::init_array_count_is_in_bytes = false;
1976 
1977 // Threshold size for cleararray.
1978 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1979 
1980 // Should the Matcher clone shifts on addressing modes, expecting them
1981 // to be subsumed into complex addressing expressions or compute them
1982 // into registers?  True for Intel but false for most RISCs
1983 const bool Matcher::clone_shift_expressions = true;
1984 
1985 bool Matcher::narrow_oop_use_complex_address() {
1986   assert(UseCompressedOops, "only for compressed oops code");
1987   return (LogMinObjAlignmentInBytes <= 3);
1988 }
1989 
1990 // Is it better to copy float constants, or load them directly from
1991 // memory?  Intel can load a float constant from a direct address,
1992 // requiring no extra registers.  Most RISCs will have to materialize
1993 // an address into a register first, so they would do better to copy
1994 // the constant from stack.
1995 const bool Matcher::rematerialize_float_constants = true; // XXX
1996 
1997 // If CPU can load and store mis-aligned doubles directly then no
1998 // fixup is needed.  Else we split the double into 2 integer pieces
1999 // and move it piece-by-piece.  Only happens when passing doubles into
2000 // C code as the Java calling convention forces doubles to be aligned.
2001 const bool Matcher::misaligned_doubles_ok = true;
2002 
2003 // No-op on amd64
2004 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2005 
2006 // Advertise here if the CPU requires explicit rounding operations to
2007 // implement the UseStrictFP mode.
2008 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2009 
2010 // Are floats conerted to double when stored to stack during deoptimization?
2011 // On x64 it is stored without convertion so we can use normal access.
2012 bool Matcher::float_in_double() { return false; }
2013 
2014 // Do ints take an entire long register or just half?
2015 const bool Matcher::int_in_long = true;
2016 
2017 // Return whether or not this register is ever used as an argument.
2018 // This function is used on startup to build the trampoline stubs in
2019 // generateOptoStub.  Registers not mentioned will be killed by the VM
2020 // call in the trampoline, and arguments in those registers not be
2021 // available to the callee.
2022 bool Matcher::can_be_java_arg(int reg)
2023 {
2024   return
2025     reg ==  RDI_num || reg ==  RDI_H_num ||
2026     reg ==  RSI_num || reg ==  RSI_H_num ||
2027     reg ==  RDX_num || reg ==  RDX_H_num ||
2028     reg ==  RCX_num || reg ==  RCX_H_num ||
2029     reg ==   R8_num || reg ==   R8_H_num ||
2030     reg ==   R9_num || reg ==   R9_H_num ||
2031     reg ==  R12_num || reg ==  R12_H_num ||
2032     reg == XMM0_num || reg == XMM0_H_num ||
2033     reg == XMM1_num || reg == XMM1_H_num ||
2034     reg == XMM2_num || reg == XMM2_H_num ||
2035     reg == XMM3_num || reg == XMM3_H_num ||
2036     reg == XMM4_num || reg == XMM4_H_num ||
2037     reg == XMM5_num || reg == XMM5_H_num ||
2038     reg == XMM6_num || reg == XMM6_H_num ||
2039     reg == XMM7_num || reg == XMM7_H_num;
2040 }
2041 
2042 bool Matcher::is_spillable_arg(int reg)
2043 {
2044   return can_be_java_arg(reg);
2045 }
2046 
2047 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2048   // In 64 bit mode a code which use multiply when
2049   // devisor is constant is faster than hardware
2050   // DIV instruction (it uses MulHiL).
2051   return false;
2052 }
2053 
2054 // Register for DIVI projection of divmodI
2055 RegMask Matcher::divI_proj_mask() {
2056   return INT_RAX_REG_mask;
2057 }
2058 
2059 // Register for MODI projection of divmodI
2060 RegMask Matcher::modI_proj_mask() {
2061   return INT_RDX_REG_mask;
2062 }
2063 
2064 // Register for DIVL projection of divmodL
2065 RegMask Matcher::divL_proj_mask() {
2066   return LONG_RAX_REG_mask;
2067 }
2068 
2069 // Register for MODL projection of divmodL
2070 RegMask Matcher::modL_proj_mask() {
2071   return LONG_RDX_REG_mask;
2072 }
2073 
2074 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2075   return PTR_RBP_REG_mask;
2076 }
2077 
2078 static Address build_address(int b, int i, int s, int d) {
2079   Register index = as_Register(i);
2080   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2081   if (index == rsp) {
2082     index = noreg;
2083     scale = Address::no_scale;
2084   }
2085   Address addr(as_Register(b), index, scale, d);
2086   return addr;
2087 }
2088 
2089 %}
2090 
2091 //----------ENCODING BLOCK-----------------------------------------------------
2092 // This block specifies the encoding classes used by the compiler to
2093 // output byte streams.  Encoding classes are parameterized macros
2094 // used by Machine Instruction Nodes in order to generate the bit
2095 // encoding of the instruction.  Operands specify their base encoding
2096 // interface with the interface keyword.  There are currently
2097 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2098 // COND_INTER.  REG_INTER causes an operand to generate a function
2099 // which returns its register number when queried.  CONST_INTER causes
2100 // an operand to generate a function which returns the value of the
2101 // constant when queried.  MEMORY_INTER causes an operand to generate
2102 // four functions which return the Base Register, the Index Register,
2103 // the Scale Value, and the Offset Value of the operand when queried.
2104 // COND_INTER causes an operand to generate six functions which return
2105 // the encoding code (ie - encoding bits for the instruction)
2106 // associated with each basic boolean condition for a conditional
2107 // instruction.
2108 //
2109 // Instructions specify two basic values for encoding.  Again, a
2110 // function is available to check if the constant displacement is an
2111 // oop. They use the ins_encode keyword to specify their encoding
2112 // classes (which must be a sequence of enc_class names, and their
2113 // parameters, specified in the encoding block), and they use the
2114 // opcode keyword to specify, in order, their primary, secondary, and
2115 // tertiary opcode.  Only the opcode sections which a particular
2116 // instruction needs for encoding need to be specified.
2117 encode %{
2118   // Build emit functions for each basic byte or larger field in the
2119   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2120   // from C++ code in the enc_class source block.  Emit functions will
2121   // live in the main source block for now.  In future, we can
2122   // generalize this by adding a syntax that specifies the sizes of
2123   // fields in an order, so that the adlc can build the emit functions
2124   // automagically
2125 
2126   // Emit primary opcode
2127   enc_class OpcP
2128   %{
2129     emit_opcode(cbuf, $primary);
2130   %}
2131 
2132   // Emit secondary opcode
2133   enc_class OpcS
2134   %{
2135     emit_opcode(cbuf, $secondary);
2136   %}
2137 
2138   // Emit tertiary opcode
2139   enc_class OpcT
2140   %{
2141     emit_opcode(cbuf, $tertiary);
2142   %}
2143 
2144   // Emit opcode directly
2145   enc_class Opcode(immI d8)
2146   %{
2147     emit_opcode(cbuf, $d8$$constant);
2148   %}
2149 
2150   // Emit size prefix
2151   enc_class SizePrefix
2152   %{
2153     emit_opcode(cbuf, 0x66);
2154   %}
2155 
2156   enc_class reg(rRegI reg)
2157   %{
2158     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2159   %}
2160 
2161   enc_class reg_reg(rRegI dst, rRegI src)
2162   %{
2163     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2164   %}
2165 
2166   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2167   %{
2168     emit_opcode(cbuf, $opcode$$constant);
2169     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2170   %}
2171 
2172   enc_class cmpfp_fixup()
2173   %{
2174     // jnp,s exit
2175     emit_opcode(cbuf, 0x7B);
2176     emit_d8(cbuf, 0x0A);
2177 
2178     // pushfq
2179     emit_opcode(cbuf, 0x9C);
2180 
2181     // andq $0xffffff2b, (%rsp)
2182     emit_opcode(cbuf, Assembler::REX_W);
2183     emit_opcode(cbuf, 0x81);
2184     emit_opcode(cbuf, 0x24);
2185     emit_opcode(cbuf, 0x24);
2186     emit_d32(cbuf, 0xffffff2b);
2187 
2188     // popfq
2189     emit_opcode(cbuf, 0x9D);
2190 
2191     // nop (target for branch to avoid branch to branch)
2192     emit_opcode(cbuf, 0x90);
2193   %}
2194 
2195   enc_class cmpfp3(rRegI dst)
2196   %{
2197     int dstenc = $dst$$reg;
2198 
2199     // movl $dst, -1
2200     if (dstenc >= 8) {
2201       emit_opcode(cbuf, Assembler::REX_B);
2202     }
2203     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2204     emit_d32(cbuf, -1);
2205 
2206     // jp,s done
2207     emit_opcode(cbuf, 0x7A);
2208     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2209 
2210     // jb,s done
2211     emit_opcode(cbuf, 0x72);
2212     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2213 
2214     // setne $dst
2215     if (dstenc >= 4) {
2216       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2217     }
2218     emit_opcode(cbuf, 0x0F);
2219     emit_opcode(cbuf, 0x95);
2220     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2221 
2222     // movzbl $dst, $dst
2223     if (dstenc >= 4) {
2224       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2225     }
2226     emit_opcode(cbuf, 0x0F);
2227     emit_opcode(cbuf, 0xB6);
2228     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2229   %}
2230 
2231   enc_class cdql_enc(no_rax_rdx_RegI div)
2232   %{
2233     // Full implementation of Java idiv and irem; checks for
2234     // special case as described in JVM spec., p.243 & p.271.
2235     //
2236     //         normal case                           special case
2237     //
2238     // input : rax: dividend                         min_int
2239     //         reg: divisor                          -1
2240     //
2241     // output: rax: quotient  (= rax idiv reg)       min_int
2242     //         rdx: remainder (= rax irem reg)       0
2243     //
2244     //  Code sequnce:
2245     //
2246     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2247     //    5:   75 07/08                jne    e <normal>
2248     //    7:   33 d2                   xor    %edx,%edx
2249     //  [div >= 8 -> offset + 1]
2250     //  [REX_B]
2251     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2252     //    c:   74 03/04                je     11 <done>
2253     // 000000000000000e <normal>:
2254     //    e:   99                      cltd
2255     //  [div >= 8 -> offset + 1]
2256     //  [REX_B]
2257     //    f:   f7 f9                   idiv   $div
2258     // 0000000000000011 <done>:
2259 
2260     // cmp    $0x80000000,%eax
2261     emit_opcode(cbuf, 0x3d);
2262     emit_d8(cbuf, 0x00);
2263     emit_d8(cbuf, 0x00);
2264     emit_d8(cbuf, 0x00);
2265     emit_d8(cbuf, 0x80);
2266 
2267     // jne    e <normal>
2268     emit_opcode(cbuf, 0x75);
2269     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2270 
2271     // xor    %edx,%edx
2272     emit_opcode(cbuf, 0x33);
2273     emit_d8(cbuf, 0xD2);
2274 
2275     // cmp    $0xffffffffffffffff,%ecx
2276     if ($div$$reg >= 8) {
2277       emit_opcode(cbuf, Assembler::REX_B);
2278     }
2279     emit_opcode(cbuf, 0x83);
2280     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2281     emit_d8(cbuf, 0xFF);
2282 
2283     // je     11 <done>
2284     emit_opcode(cbuf, 0x74);
2285     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2286 
2287     // <normal>
2288     // cltd
2289     emit_opcode(cbuf, 0x99);
2290 
2291     // idivl (note: must be emitted by the user of this rule)
2292     // <done>
2293   %}
2294 
2295   enc_class cdqq_enc(no_rax_rdx_RegL div)
2296   %{
2297     // Full implementation of Java ldiv and lrem; checks for
2298     // special case as described in JVM spec., p.243 & p.271.
2299     //
2300     //         normal case                           special case
2301     //
2302     // input : rax: dividend                         min_long
2303     //         reg: divisor                          -1
2304     //
2305     // output: rax: quotient  (= rax idiv reg)       min_long
2306     //         rdx: remainder (= rax irem reg)       0
2307     //
2308     //  Code sequnce:
2309     //
2310     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2311     //    7:   00 00 80
2312     //    a:   48 39 d0                cmp    %rdx,%rax
2313     //    d:   75 08                   jne    17 <normal>
2314     //    f:   33 d2                   xor    %edx,%edx
2315     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2316     //   15:   74 05                   je     1c <done>
2317     // 0000000000000017 <normal>:
2318     //   17:   48 99                   cqto
2319     //   19:   48 f7 f9                idiv   $div
2320     // 000000000000001c <done>:
2321 
2322     // mov    $0x8000000000000000,%rdx
2323     emit_opcode(cbuf, Assembler::REX_W);
2324     emit_opcode(cbuf, 0xBA);
2325     emit_d8(cbuf, 0x00);
2326     emit_d8(cbuf, 0x00);
2327     emit_d8(cbuf, 0x00);
2328     emit_d8(cbuf, 0x00);
2329     emit_d8(cbuf, 0x00);
2330     emit_d8(cbuf, 0x00);
2331     emit_d8(cbuf, 0x00);
2332     emit_d8(cbuf, 0x80);
2333 
2334     // cmp    %rdx,%rax
2335     emit_opcode(cbuf, Assembler::REX_W);
2336     emit_opcode(cbuf, 0x39);
2337     emit_d8(cbuf, 0xD0);
2338 
2339     // jne    17 <normal>
2340     emit_opcode(cbuf, 0x75);
2341     emit_d8(cbuf, 0x08);
2342 
2343     // xor    %edx,%edx
2344     emit_opcode(cbuf, 0x33);
2345     emit_d8(cbuf, 0xD2);
2346 
2347     // cmp    $0xffffffffffffffff,$div
2348     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2349     emit_opcode(cbuf, 0x83);
2350     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2351     emit_d8(cbuf, 0xFF);
2352 
2353     // je     1e <done>
2354     emit_opcode(cbuf, 0x74);
2355     emit_d8(cbuf, 0x05);
2356 
2357     // <normal>
2358     // cqto
2359     emit_opcode(cbuf, Assembler::REX_W);
2360     emit_opcode(cbuf, 0x99);
2361 
2362     // idivq (note: must be emitted by the user of this rule)
2363     // <done>
2364   %}
2365 
2366   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2367   enc_class OpcSE(immI imm)
2368   %{
2369     // Emit primary opcode and set sign-extend bit
2370     // Check for 8-bit immediate, and set sign extend bit in opcode
2371     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2372       emit_opcode(cbuf, $primary | 0x02);
2373     } else {
2374       // 32-bit immediate
2375       emit_opcode(cbuf, $primary);
2376     }
2377   %}
2378 
2379   enc_class OpcSErm(rRegI dst, immI imm)
2380   %{
2381     // OpcSEr/m
2382     int dstenc = $dst$$reg;
2383     if (dstenc >= 8) {
2384       emit_opcode(cbuf, Assembler::REX_B);
2385       dstenc -= 8;
2386     }
2387     // Emit primary opcode and set sign-extend bit
2388     // Check for 8-bit immediate, and set sign extend bit in opcode
2389     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2390       emit_opcode(cbuf, $primary | 0x02);
2391     } else {
2392       // 32-bit immediate
2393       emit_opcode(cbuf, $primary);
2394     }
2395     // Emit r/m byte with secondary opcode, after primary opcode.
2396     emit_rm(cbuf, 0x3, $secondary, dstenc);
2397   %}
2398 
2399   enc_class OpcSErm_wide(rRegL dst, immI imm)
2400   %{
2401     // OpcSEr/m
2402     int dstenc = $dst$$reg;
2403     if (dstenc < 8) {
2404       emit_opcode(cbuf, Assembler::REX_W);
2405     } else {
2406       emit_opcode(cbuf, Assembler::REX_WB);
2407       dstenc -= 8;
2408     }
2409     // Emit primary opcode and set sign-extend bit
2410     // Check for 8-bit immediate, and set sign extend bit in opcode
2411     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2412       emit_opcode(cbuf, $primary | 0x02);
2413     } else {
2414       // 32-bit immediate
2415       emit_opcode(cbuf, $primary);
2416     }
2417     // Emit r/m byte with secondary opcode, after primary opcode.
2418     emit_rm(cbuf, 0x3, $secondary, dstenc);
2419   %}
2420 
2421   enc_class Con8or32(immI imm)
2422   %{
2423     // Check for 8-bit immediate, and set sign extend bit in opcode
2424     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2425       $$$emit8$imm$$constant;
2426     } else {
2427       // 32-bit immediate
2428       $$$emit32$imm$$constant;
2429     }
2430   %}
2431 
2432   enc_class Lbl(label labl)
2433   %{
2434     // JMP, CALL
2435     Label* l = $labl$$label;
2436     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2437   %}
2438 
2439   enc_class LblShort(label labl)
2440   %{
2441     // JMP, CALL
2442     Label* l = $labl$$label;
2443     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2444     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2445     emit_d8(cbuf, disp);
2446   %}
2447 
2448   enc_class opc2_reg(rRegI dst)
2449   %{
2450     // BSWAP
2451     emit_cc(cbuf, $secondary, $dst$$reg);
2452   %}
2453 
2454   enc_class opc3_reg(rRegI dst)
2455   %{
2456     // BSWAP
2457     emit_cc(cbuf, $tertiary, $dst$$reg);
2458   %}
2459 
2460   enc_class reg_opc(rRegI div)
2461   %{
2462     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2463     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2464   %}
2465 
2466   enc_class Jcc(cmpOp cop, label labl)
2467   %{
2468     // JCC
2469     Label* l = $labl$$label;
2470     $$$emit8$primary;
2471     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2472     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2473   %}
2474 
2475   enc_class JccShort (cmpOp cop, label labl)
2476   %{
2477   // JCC
2478     Label *l = $labl$$label;
2479     emit_cc(cbuf, $primary, $cop$$cmpcode);
2480     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2481     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2482     emit_d8(cbuf, disp);
2483   %}
2484 
2485   enc_class enc_cmov(cmpOp cop)
2486   %{
2487     // CMOV
2488     $$$emit8$primary;
2489     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2490   %}
2491 
2492   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2493   %{
2494     // Invert sense of branch from sense of cmov
2495     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2496     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2497                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2498                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2499     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2500     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2501     if ($dst$$reg < 8) {
2502       if ($src$$reg >= 8) {
2503         emit_opcode(cbuf, Assembler::REX_B);
2504       }
2505     } else {
2506       if ($src$$reg < 8) {
2507         emit_opcode(cbuf, Assembler::REX_R);
2508       } else {
2509         emit_opcode(cbuf, Assembler::REX_RB);
2510       }
2511     }
2512     emit_opcode(cbuf, 0x0F);
2513     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2514     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2515   %}
2516 
2517   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2518   %{
2519     // Invert sense of branch from sense of cmov
2520     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2521     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2522 
2523     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2524     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2525     if ($dst$$reg < 8) {
2526       if ($src$$reg >= 8) {
2527         emit_opcode(cbuf, Assembler::REX_B);
2528       }
2529     } else {
2530       if ($src$$reg < 8) {
2531         emit_opcode(cbuf, Assembler::REX_R);
2532       } else {
2533         emit_opcode(cbuf, Assembler::REX_RB);
2534       }
2535     }
2536     emit_opcode(cbuf, 0x0F);
2537     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2538     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2539   %}
2540 
2541   enc_class enc_PartialSubtypeCheck()
2542   %{
2543     Register Rrdi = as_Register(RDI_enc); // result register
2544     Register Rrax = as_Register(RAX_enc); // super class
2545     Register Rrcx = as_Register(RCX_enc); // killed
2546     Register Rrsi = as_Register(RSI_enc); // sub class
2547     Label miss;
2548     const bool set_cond_codes = true;
2549 
2550     MacroAssembler _masm(&cbuf);
2551     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2552                                      NULL, &miss,
2553                                      /*set_cond_codes:*/ true);
2554     if ($primary) {
2555       __ xorptr(Rrdi, Rrdi);
2556     }
2557     __ bind(miss);
2558   %}
2559 
2560   enc_class Java_To_Interpreter(method meth)
2561   %{
2562     // CALL Java_To_Interpreter
2563     // This is the instruction starting address for relocation info.
2564     cbuf.set_insts_mark();
2565     $$$emit8$primary;
2566     // CALL directly to the runtime
2567     emit_d32_reloc(cbuf,
2568                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2569                    runtime_call_Relocation::spec(),
2570                    RELOC_DISP32);
2571   %}
2572 
2573   enc_class preserve_SP %{
2574     debug_only(int off0 = cbuf.insts_size());
2575     MacroAssembler _masm(&cbuf);
2576     // RBP is preserved across all calls, even compiled calls.
2577     // Use it to preserve RSP in places where the callee might change the SP.
2578     __ movptr(rbp_mh_SP_save, rsp);
2579     debug_only(int off1 = cbuf.insts_size());
2580     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2581   %}
2582 
2583   enc_class restore_SP %{
2584     MacroAssembler _masm(&cbuf);
2585     __ movptr(rsp, rbp_mh_SP_save);
2586   %}
2587 
2588   enc_class Java_Static_Call(method meth)
2589   %{
2590     // JAVA STATIC CALL
2591     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2592     // determine who we intended to call.
2593     cbuf.set_insts_mark();
2594     $$$emit8$primary;
2595 
2596     if (!_method) {
2597       emit_d32_reloc(cbuf,
2598                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2599                      runtime_call_Relocation::spec(),
2600                      RELOC_DISP32);
2601     } else if (_optimized_virtual) {
2602       emit_d32_reloc(cbuf,
2603                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2604                      opt_virtual_call_Relocation::spec(),
2605                      RELOC_DISP32);
2606     } else {
2607       emit_d32_reloc(cbuf,
2608                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2609                      static_call_Relocation::spec(),
2610                      RELOC_DISP32);
2611     }
2612     if (_method) {
2613       // Emit stub for static call
2614       emit_java_to_interp(cbuf);
2615     }
2616   %}
2617 
2618   enc_class Java_Dynamic_Call(method meth)
2619   %{
2620     // JAVA DYNAMIC CALL
2621     // !!!!!
2622     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2623     // emit_call_dynamic_prologue( cbuf );
2624     cbuf.set_insts_mark();
2625 
2626     // movq rax, -1
2627     emit_opcode(cbuf, Assembler::REX_W);
2628     emit_opcode(cbuf, 0xB8 | RAX_enc);
2629     emit_d64_reloc(cbuf,
2630                    (int64_t) Universe::non_oop_word(),
2631                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2632     address virtual_call_oop_addr = cbuf.insts_mark();
2633     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2634     // who we intended to call.
2635     cbuf.set_insts_mark();
2636     $$$emit8$primary;
2637     emit_d32_reloc(cbuf,
2638                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2639                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2640                    RELOC_DISP32);
2641   %}
2642 
2643   enc_class Java_Compiled_Call(method meth)
2644   %{
2645     // JAVA COMPILED CALL
2646     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2647 
2648     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2649     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2650 
2651     // callq *disp(%rax)
2652     cbuf.set_insts_mark();
2653     $$$emit8$primary;
2654     if (disp < 0x80) {
2655       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2656       emit_d8(cbuf, disp); // Displacement
2657     } else {
2658       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2659       emit_d32(cbuf, disp); // Displacement
2660     }
2661   %}
2662 
2663   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2664   %{
2665     // SAL, SAR, SHR
2666     int dstenc = $dst$$reg;
2667     if (dstenc >= 8) {
2668       emit_opcode(cbuf, Assembler::REX_B);
2669       dstenc -= 8;
2670     }
2671     $$$emit8$primary;
2672     emit_rm(cbuf, 0x3, $secondary, dstenc);
2673     $$$emit8$shift$$constant;
2674   %}
2675 
2676   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2677   %{
2678     // SAL, SAR, SHR
2679     int dstenc = $dst$$reg;
2680     if (dstenc < 8) {
2681       emit_opcode(cbuf, Assembler::REX_W);
2682     } else {
2683       emit_opcode(cbuf, Assembler::REX_WB);
2684       dstenc -= 8;
2685     }
2686     $$$emit8$primary;
2687     emit_rm(cbuf, 0x3, $secondary, dstenc);
2688     $$$emit8$shift$$constant;
2689   %}
2690 
2691   enc_class load_immI(rRegI dst, immI src)
2692   %{
2693     int dstenc = $dst$$reg;
2694     if (dstenc >= 8) {
2695       emit_opcode(cbuf, Assembler::REX_B);
2696       dstenc -= 8;
2697     }
2698     emit_opcode(cbuf, 0xB8 | dstenc);
2699     $$$emit32$src$$constant;
2700   %}
2701 
2702   enc_class load_immL(rRegL dst, immL src)
2703   %{
2704     int dstenc = $dst$$reg;
2705     if (dstenc < 8) {
2706       emit_opcode(cbuf, Assembler::REX_W);
2707     } else {
2708       emit_opcode(cbuf, Assembler::REX_WB);
2709       dstenc -= 8;
2710     }
2711     emit_opcode(cbuf, 0xB8 | dstenc);
2712     emit_d64(cbuf, $src$$constant);
2713   %}
2714 
2715   enc_class load_immUL32(rRegL dst, immUL32 src)
2716   %{
2717     // same as load_immI, but this time we care about zeroes in the high word
2718     int dstenc = $dst$$reg;
2719     if (dstenc >= 8) {
2720       emit_opcode(cbuf, Assembler::REX_B);
2721       dstenc -= 8;
2722     }
2723     emit_opcode(cbuf, 0xB8 | dstenc);
2724     $$$emit32$src$$constant;
2725   %}
2726 
2727   enc_class load_immL32(rRegL dst, immL32 src)
2728   %{
2729     int dstenc = $dst$$reg;
2730     if (dstenc < 8) {
2731       emit_opcode(cbuf, Assembler::REX_W);
2732     } else {
2733       emit_opcode(cbuf, Assembler::REX_WB);
2734       dstenc -= 8;
2735     }
2736     emit_opcode(cbuf, 0xC7);
2737     emit_rm(cbuf, 0x03, 0x00, dstenc);
2738     $$$emit32$src$$constant;
2739   %}
2740 
2741   enc_class load_immP31(rRegP dst, immP32 src)
2742   %{
2743     // same as load_immI, but this time we care about zeroes in the high word
2744     int dstenc = $dst$$reg;
2745     if (dstenc >= 8) {
2746       emit_opcode(cbuf, Assembler::REX_B);
2747       dstenc -= 8;
2748     }
2749     emit_opcode(cbuf, 0xB8 | dstenc);
2750     $$$emit32$src$$constant;
2751   %}
2752 
2753   enc_class load_immP(rRegP dst, immP src)
2754   %{
2755     int dstenc = $dst$$reg;
2756     if (dstenc < 8) {
2757       emit_opcode(cbuf, Assembler::REX_W);
2758     } else {
2759       emit_opcode(cbuf, Assembler::REX_WB);
2760       dstenc -= 8;
2761     }
2762     emit_opcode(cbuf, 0xB8 | dstenc);
2763     // This next line should be generated from ADLC
2764     if ($src->constant_is_oop()) {
2765       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2766     } else {
2767       emit_d64(cbuf, $src$$constant);
2768     }
2769   %}
2770 
2771   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2772   enc_class enc_copy(rRegI dst, rRegI src)
2773   %{
2774     encode_copy(cbuf, $dst$$reg, $src$$reg);
2775   %}
2776 
2777   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2778   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2779     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2780   %}
2781 
2782   enc_class enc_copy_always(rRegI dst, rRegI src)
2783   %{
2784     int srcenc = $src$$reg;
2785     int dstenc = $dst$$reg;
2786 
2787     if (dstenc < 8) {
2788       if (srcenc >= 8) {
2789         emit_opcode(cbuf, Assembler::REX_B);
2790         srcenc -= 8;
2791       }
2792     } else {
2793       if (srcenc < 8) {
2794         emit_opcode(cbuf, Assembler::REX_R);
2795       } else {
2796         emit_opcode(cbuf, Assembler::REX_RB);
2797         srcenc -= 8;
2798       }
2799       dstenc -= 8;
2800     }
2801 
2802     emit_opcode(cbuf, 0x8B);
2803     emit_rm(cbuf, 0x3, dstenc, srcenc);
2804   %}
2805 
2806   enc_class enc_copy_wide(rRegL dst, rRegL src)
2807   %{
2808     int srcenc = $src$$reg;
2809     int dstenc = $dst$$reg;
2810 
2811     if (dstenc != srcenc) {
2812       if (dstenc < 8) {
2813         if (srcenc < 8) {
2814           emit_opcode(cbuf, Assembler::REX_W);
2815         } else {
2816           emit_opcode(cbuf, Assembler::REX_WB);
2817           srcenc -= 8;
2818         }
2819       } else {
2820         if (srcenc < 8) {
2821           emit_opcode(cbuf, Assembler::REX_WR);
2822         } else {
2823           emit_opcode(cbuf, Assembler::REX_WRB);
2824           srcenc -= 8;
2825         }
2826         dstenc -= 8;
2827       }
2828       emit_opcode(cbuf, 0x8B);
2829       emit_rm(cbuf, 0x3, dstenc, srcenc);
2830     }
2831   %}
2832 
2833   enc_class Con32(immI src)
2834   %{
2835     // Output immediate
2836     $$$emit32$src$$constant;
2837   %}
2838 
2839   enc_class Con64(immL src)
2840   %{
2841     // Output immediate
2842     emit_d64($src$$constant);
2843   %}
2844 
2845   enc_class Con32F_as_bits(immF src)
2846   %{
2847     // Output Float immediate bits
2848     jfloat jf = $src$$constant;
2849     jint jf_as_bits = jint_cast(jf);
2850     emit_d32(cbuf, jf_as_bits);
2851   %}
2852 
2853   enc_class Con16(immI src)
2854   %{
2855     // Output immediate
2856     $$$emit16$src$$constant;
2857   %}
2858 
2859   // How is this different from Con32??? XXX
2860   enc_class Con_d32(immI src)
2861   %{
2862     emit_d32(cbuf,$src$$constant);
2863   %}
2864 
2865   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2866     // Output immediate memory reference
2867     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2868     emit_d32(cbuf, 0x00);
2869   %}
2870 
2871   enc_class lock_prefix()
2872   %{
2873     if (os::is_MP()) {
2874       emit_opcode(cbuf, 0xF0); // lock
2875     }
2876   %}
2877 
2878   enc_class REX_mem(memory mem)
2879   %{
2880     if ($mem$$base >= 8) {
2881       if ($mem$$index < 8) {
2882         emit_opcode(cbuf, Assembler::REX_B);
2883       } else {
2884         emit_opcode(cbuf, Assembler::REX_XB);
2885       }
2886     } else {
2887       if ($mem$$index >= 8) {
2888         emit_opcode(cbuf, Assembler::REX_X);
2889       }
2890     }
2891   %}
2892 
2893   enc_class REX_mem_wide(memory mem)
2894   %{
2895     if ($mem$$base >= 8) {
2896       if ($mem$$index < 8) {
2897         emit_opcode(cbuf, Assembler::REX_WB);
2898       } else {
2899         emit_opcode(cbuf, Assembler::REX_WXB);
2900       }
2901     } else {
2902       if ($mem$$index < 8) {
2903         emit_opcode(cbuf, Assembler::REX_W);
2904       } else {
2905         emit_opcode(cbuf, Assembler::REX_WX);
2906       }
2907     }
2908   %}
2909 
2910   // for byte regs
2911   enc_class REX_breg(rRegI reg)
2912   %{
2913     if ($reg$$reg >= 4) {
2914       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2915     }
2916   %}
2917 
2918   // for byte regs
2919   enc_class REX_reg_breg(rRegI dst, rRegI src)
2920   %{
2921     if ($dst$$reg < 8) {
2922       if ($src$$reg >= 4) {
2923         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2924       }
2925     } else {
2926       if ($src$$reg < 8) {
2927         emit_opcode(cbuf, Assembler::REX_R);
2928       } else {
2929         emit_opcode(cbuf, Assembler::REX_RB);
2930       }
2931     }
2932   %}
2933 
2934   // for byte regs
2935   enc_class REX_breg_mem(rRegI reg, memory mem)
2936   %{
2937     if ($reg$$reg < 8) {
2938       if ($mem$$base < 8) {
2939         if ($mem$$index >= 8) {
2940           emit_opcode(cbuf, Assembler::REX_X);
2941         } else if ($reg$$reg >= 4) {
2942           emit_opcode(cbuf, Assembler::REX);
2943         }
2944       } else {
2945         if ($mem$$index < 8) {
2946           emit_opcode(cbuf, Assembler::REX_B);
2947         } else {
2948           emit_opcode(cbuf, Assembler::REX_XB);
2949         }
2950       }
2951     } else {
2952       if ($mem$$base < 8) {
2953         if ($mem$$index < 8) {
2954           emit_opcode(cbuf, Assembler::REX_R);
2955         } else {
2956           emit_opcode(cbuf, Assembler::REX_RX);
2957         }
2958       } else {
2959         if ($mem$$index < 8) {
2960           emit_opcode(cbuf, Assembler::REX_RB);
2961         } else {
2962           emit_opcode(cbuf, Assembler::REX_RXB);
2963         }
2964       }
2965     }
2966   %}
2967 
2968   enc_class REX_reg(rRegI reg)
2969   %{
2970     if ($reg$$reg >= 8) {
2971       emit_opcode(cbuf, Assembler::REX_B);
2972     }
2973   %}
2974 
2975   enc_class REX_reg_wide(rRegI reg)
2976   %{
2977     if ($reg$$reg < 8) {
2978       emit_opcode(cbuf, Assembler::REX_W);
2979     } else {
2980       emit_opcode(cbuf, Assembler::REX_WB);
2981     }
2982   %}
2983 
2984   enc_class REX_reg_reg(rRegI dst, rRegI src)
2985   %{
2986     if ($dst$$reg < 8) {
2987       if ($src$$reg >= 8) {
2988         emit_opcode(cbuf, Assembler::REX_B);
2989       }
2990     } else {
2991       if ($src$$reg < 8) {
2992         emit_opcode(cbuf, Assembler::REX_R);
2993       } else {
2994         emit_opcode(cbuf, Assembler::REX_RB);
2995       }
2996     }
2997   %}
2998 
2999   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3000   %{
3001     if ($dst$$reg < 8) {
3002       if ($src$$reg < 8) {
3003         emit_opcode(cbuf, Assembler::REX_W);
3004       } else {
3005         emit_opcode(cbuf, Assembler::REX_WB);
3006       }
3007     } else {
3008       if ($src$$reg < 8) {
3009         emit_opcode(cbuf, Assembler::REX_WR);
3010       } else {
3011         emit_opcode(cbuf, Assembler::REX_WRB);
3012       }
3013     }
3014   %}
3015 
3016   enc_class REX_reg_mem(rRegI reg, memory mem)
3017   %{
3018     if ($reg$$reg < 8) {
3019       if ($mem$$base < 8) {
3020         if ($mem$$index >= 8) {
3021           emit_opcode(cbuf, Assembler::REX_X);
3022         }
3023       } else {
3024         if ($mem$$index < 8) {
3025           emit_opcode(cbuf, Assembler::REX_B);
3026         } else {
3027           emit_opcode(cbuf, Assembler::REX_XB);
3028         }
3029       }
3030     } else {
3031       if ($mem$$base < 8) {
3032         if ($mem$$index < 8) {
3033           emit_opcode(cbuf, Assembler::REX_R);
3034         } else {
3035           emit_opcode(cbuf, Assembler::REX_RX);
3036         }
3037       } else {
3038         if ($mem$$index < 8) {
3039           emit_opcode(cbuf, Assembler::REX_RB);
3040         } else {
3041           emit_opcode(cbuf, Assembler::REX_RXB);
3042         }
3043       }
3044     }
3045   %}
3046 
3047   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3048   %{
3049     if ($reg$$reg < 8) {
3050       if ($mem$$base < 8) {
3051         if ($mem$$index < 8) {
3052           emit_opcode(cbuf, Assembler::REX_W);
3053         } else {
3054           emit_opcode(cbuf, Assembler::REX_WX);
3055         }
3056       } else {
3057         if ($mem$$index < 8) {
3058           emit_opcode(cbuf, Assembler::REX_WB);
3059         } else {
3060           emit_opcode(cbuf, Assembler::REX_WXB);
3061         }
3062       }
3063     } else {
3064       if ($mem$$base < 8) {
3065         if ($mem$$index < 8) {
3066           emit_opcode(cbuf, Assembler::REX_WR);
3067         } else {
3068           emit_opcode(cbuf, Assembler::REX_WRX);
3069         }
3070       } else {
3071         if ($mem$$index < 8) {
3072           emit_opcode(cbuf, Assembler::REX_WRB);
3073         } else {
3074           emit_opcode(cbuf, Assembler::REX_WRXB);
3075         }
3076       }
3077     }
3078   %}
3079 
3080   enc_class reg_mem(rRegI ereg, memory mem)
3081   %{
3082     // High registers handle in encode_RegMem
3083     int reg = $ereg$$reg;
3084     int base = $mem$$base;
3085     int index = $mem$$index;
3086     int scale = $mem$$scale;
3087     int disp = $mem$$disp;
3088     bool disp_is_oop = $mem->disp_is_oop();
3089 
3090     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3091   %}
3092 
3093   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3094   %{
3095     int rm_byte_opcode = $rm_opcode$$constant;
3096 
3097     // High registers handle in encode_RegMem
3098     int base = $mem$$base;
3099     int index = $mem$$index;
3100     int scale = $mem$$scale;
3101     int displace = $mem$$disp;
3102 
3103     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3104                                             // working with static
3105                                             // globals
3106     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3107                   disp_is_oop);
3108   %}
3109 
3110   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3111   %{
3112     int reg_encoding = $dst$$reg;
3113     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3114     int index        = 0x04;            // 0x04 indicates no index
3115     int scale        = 0x00;            // 0x00 indicates no scale
3116     int displace     = $src1$$constant; // 0x00 indicates no displacement
3117     bool disp_is_oop = false;
3118     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3119                   disp_is_oop);
3120   %}
3121 
3122   enc_class neg_reg(rRegI dst)
3123   %{
3124     int dstenc = $dst$$reg;
3125     if (dstenc >= 8) {
3126       emit_opcode(cbuf, Assembler::REX_B);
3127       dstenc -= 8;
3128     }
3129     // NEG $dst
3130     emit_opcode(cbuf, 0xF7);
3131     emit_rm(cbuf, 0x3, 0x03, dstenc);
3132   %}
3133 
3134   enc_class neg_reg_wide(rRegI dst)
3135   %{
3136     int dstenc = $dst$$reg;
3137     if (dstenc < 8) {
3138       emit_opcode(cbuf, Assembler::REX_W);
3139     } else {
3140       emit_opcode(cbuf, Assembler::REX_WB);
3141       dstenc -= 8;
3142     }
3143     // NEG $dst
3144     emit_opcode(cbuf, 0xF7);
3145     emit_rm(cbuf, 0x3, 0x03, dstenc);
3146   %}
3147 
3148   enc_class setLT_reg(rRegI dst)
3149   %{
3150     int dstenc = $dst$$reg;
3151     if (dstenc >= 8) {
3152       emit_opcode(cbuf, Assembler::REX_B);
3153       dstenc -= 8;
3154     } else if (dstenc >= 4) {
3155       emit_opcode(cbuf, Assembler::REX);
3156     }
3157     // SETLT $dst
3158     emit_opcode(cbuf, 0x0F);
3159     emit_opcode(cbuf, 0x9C);
3160     emit_rm(cbuf, 0x3, 0x0, dstenc);
3161   %}
3162 
3163   enc_class setNZ_reg(rRegI dst)
3164   %{
3165     int dstenc = $dst$$reg;
3166     if (dstenc >= 8) {
3167       emit_opcode(cbuf, Assembler::REX_B);
3168       dstenc -= 8;
3169     } else if (dstenc >= 4) {
3170       emit_opcode(cbuf, Assembler::REX);
3171     }
3172     // SETNZ $dst
3173     emit_opcode(cbuf, 0x0F);
3174     emit_opcode(cbuf, 0x95);
3175     emit_rm(cbuf, 0x3, 0x0, dstenc);
3176   %}
3177 
3178   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3179                        rcx_RegI tmp)
3180   %{
3181     // cadd_cmpLT
3182 
3183     int tmpReg = $tmp$$reg;
3184 
3185     int penc = $p$$reg;
3186     int qenc = $q$$reg;
3187     int yenc = $y$$reg;
3188 
3189     // subl $p,$q
3190     if (penc < 8) {
3191       if (qenc >= 8) {
3192         emit_opcode(cbuf, Assembler::REX_B);
3193       }
3194     } else {
3195       if (qenc < 8) {
3196         emit_opcode(cbuf, Assembler::REX_R);
3197       } else {
3198         emit_opcode(cbuf, Assembler::REX_RB);
3199       }
3200     }
3201     emit_opcode(cbuf, 0x2B);
3202     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3203 
3204     // sbbl $tmp, $tmp
3205     emit_opcode(cbuf, 0x1B);
3206     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3207 
3208     // andl $tmp, $y
3209     if (yenc >= 8) {
3210       emit_opcode(cbuf, Assembler::REX_B);
3211     }
3212     emit_opcode(cbuf, 0x23);
3213     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3214 
3215     // addl $p,$tmp
3216     if (penc >= 8) {
3217         emit_opcode(cbuf, Assembler::REX_R);
3218     }
3219     emit_opcode(cbuf, 0x03);
3220     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3221   %}
3222 
3223   // Compare the lonogs and set -1, 0, or 1 into dst
3224   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3225   %{
3226     int src1enc = $src1$$reg;
3227     int src2enc = $src2$$reg;
3228     int dstenc = $dst$$reg;
3229 
3230     // cmpq $src1, $src2
3231     if (src1enc < 8) {
3232       if (src2enc < 8) {
3233         emit_opcode(cbuf, Assembler::REX_W);
3234       } else {
3235         emit_opcode(cbuf, Assembler::REX_WB);
3236       }
3237     } else {
3238       if (src2enc < 8) {
3239         emit_opcode(cbuf, Assembler::REX_WR);
3240       } else {
3241         emit_opcode(cbuf, Assembler::REX_WRB);
3242       }
3243     }
3244     emit_opcode(cbuf, 0x3B);
3245     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3246 
3247     // movl $dst, -1
3248     if (dstenc >= 8) {
3249       emit_opcode(cbuf, Assembler::REX_B);
3250     }
3251     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3252     emit_d32(cbuf, -1);
3253 
3254     // jl,s done
3255     emit_opcode(cbuf, 0x7C);
3256     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3257 
3258     // setne $dst
3259     if (dstenc >= 4) {
3260       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3261     }
3262     emit_opcode(cbuf, 0x0F);
3263     emit_opcode(cbuf, 0x95);
3264     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3265 
3266     // movzbl $dst, $dst
3267     if (dstenc >= 4) {
3268       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3269     }
3270     emit_opcode(cbuf, 0x0F);
3271     emit_opcode(cbuf, 0xB6);
3272     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3273   %}
3274 
3275   enc_class Push_ResultXD(regD dst) %{
3276     int dstenc = $dst$$reg;
3277 
3278     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3279 
3280     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3281     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3282     if (dstenc >= 8) {
3283       emit_opcode(cbuf, Assembler::REX_R);
3284     }
3285     emit_opcode  (cbuf, 0x0F );
3286     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3287     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3288 
3289     // add rsp,8
3290     emit_opcode(cbuf, Assembler::REX_W);
3291     emit_opcode(cbuf,0x83);
3292     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3293     emit_d8(cbuf,0x08);
3294   %}
3295 
3296   enc_class Push_SrcXD(regD src) %{
3297     int srcenc = $src$$reg;
3298 
3299     // subq rsp,#8
3300     emit_opcode(cbuf, Assembler::REX_W);
3301     emit_opcode(cbuf, 0x83);
3302     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3303     emit_d8(cbuf, 0x8);
3304 
3305     // movsd [rsp],src
3306     emit_opcode(cbuf, 0xF2);
3307     if (srcenc >= 8) {
3308       emit_opcode(cbuf, Assembler::REX_R);
3309     }
3310     emit_opcode(cbuf, 0x0F);
3311     emit_opcode(cbuf, 0x11);
3312     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3313 
3314     // fldd [rsp]
3315     emit_opcode(cbuf, 0x66);
3316     emit_opcode(cbuf, 0xDD);
3317     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3318   %}
3319 
3320 
3321   enc_class movq_ld(regD dst, memory mem) %{
3322     MacroAssembler _masm(&cbuf);
3323     __ movq($dst$$XMMRegister, $mem$$Address);
3324   %}
3325 
3326   enc_class movq_st(memory mem, regD src) %{
3327     MacroAssembler _masm(&cbuf);
3328     __ movq($mem$$Address, $src$$XMMRegister);
3329   %}
3330 
3331   enc_class pshufd_8x8(regF dst, regF src) %{
3332     MacroAssembler _masm(&cbuf);
3333 
3334     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3335     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3336     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3337   %}
3338 
3339   enc_class pshufd_4x16(regF dst, regF src) %{
3340     MacroAssembler _masm(&cbuf);
3341 
3342     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3343   %}
3344 
3345   enc_class pshufd(regD dst, regD src, int mode) %{
3346     MacroAssembler _masm(&cbuf);
3347 
3348     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3349   %}
3350 
3351   enc_class pxor(regD dst, regD src) %{
3352     MacroAssembler _masm(&cbuf);
3353 
3354     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3355   %}
3356 
3357   enc_class mov_i2x(regD dst, rRegI src) %{
3358     MacroAssembler _masm(&cbuf);
3359 
3360     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3361   %}
3362 
3363   // obj: object to lock
3364   // box: box address (header location) -- killed
3365   // tmp: rax -- killed
3366   // scr: rbx -- killed
3367   //
3368   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3369   // from i486.ad.  See that file for comments.
3370   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3371   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3372 
3373 
3374   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3375   %{
3376     Register objReg = as_Register((int)$obj$$reg);
3377     Register boxReg = as_Register((int)$box$$reg);
3378     Register tmpReg = as_Register($tmp$$reg);
3379     Register scrReg = as_Register($scr$$reg);
3380     MacroAssembler masm(&cbuf);
3381 
3382     // Verify uniqueness of register assignments -- necessary but not sufficient
3383     assert (objReg != boxReg && objReg != tmpReg &&
3384             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3385 
3386     if (_counters != NULL) {
3387       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3388     }
3389     if (EmitSync & 1) {
3390         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3391         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3392         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3393     } else
3394     if (EmitSync & 2) {
3395         Label DONE_LABEL;
3396         if (UseBiasedLocking) {
3397            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3398           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3399         }
3400         // QQQ was movl...
3401         masm.movptr(tmpReg, 0x1);
3402         masm.orptr(tmpReg, Address(objReg, 0));
3403         masm.movptr(Address(boxReg, 0), tmpReg);
3404         if (os::is_MP()) {
3405           masm.lock();
3406         }
3407         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3408         masm.jcc(Assembler::equal, DONE_LABEL);
3409 
3410         // Recursive locking
3411         masm.subptr(tmpReg, rsp);
3412         masm.andptr(tmpReg, 7 - os::vm_page_size());
3413         masm.movptr(Address(boxReg, 0), tmpReg);
3414 
3415         masm.bind(DONE_LABEL);
3416         masm.nop(); // avoid branch to branch
3417     } else {
3418         Label DONE_LABEL, IsInflated, Egress;
3419 
3420         masm.movptr(tmpReg, Address(objReg, 0)) ;
3421         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3422         masm.jcc   (Assembler::notZero, IsInflated) ;
3423 
3424         // it's stack-locked, biased or neutral
3425         // TODO: optimize markword triage order to reduce the number of
3426         // conditional branches in the most common cases.
3427         // Beware -- there's a subtle invariant that fetch of the markword
3428         // at [FETCH], below, will never observe a biased encoding (*101b).
3429         // If this invariant is not held we'll suffer exclusion (safety) failure.
3430 
3431         if (UseBiasedLocking && !UseOptoBiasInlining) {
3432           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3433           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3434         }
3435 
3436         // was q will it destroy high?
3437         masm.orl   (tmpReg, 1) ;
3438         masm.movptr(Address(boxReg, 0), tmpReg) ;
3439         if (os::is_MP()) { masm.lock(); }
3440         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3441         if (_counters != NULL) {
3442            masm.cond_inc32(Assembler::equal,
3443                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3444         }
3445         masm.jcc   (Assembler::equal, DONE_LABEL);
3446 
3447         // Recursive locking
3448         masm.subptr(tmpReg, rsp);
3449         masm.andptr(tmpReg, 7 - os::vm_page_size());
3450         masm.movptr(Address(boxReg, 0), tmpReg);
3451         if (_counters != NULL) {
3452            masm.cond_inc32(Assembler::equal,
3453                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3454         }
3455         masm.jmp   (DONE_LABEL) ;
3456 
3457         masm.bind  (IsInflated) ;
3458         // It's inflated
3459 
3460         // TODO: someday avoid the ST-before-CAS penalty by
3461         // relocating (deferring) the following ST.
3462         // We should also think about trying a CAS without having
3463         // fetched _owner.  If the CAS is successful we may
3464         // avoid an RTO->RTS upgrade on the $line.
3465         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3466         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3467 
3468         masm.mov    (boxReg, tmpReg) ;
3469         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3470         masm.testptr(tmpReg, tmpReg) ;
3471         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3472 
3473         // It's inflated and appears unlocked
3474         if (os::is_MP()) { masm.lock(); }
3475         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3476         // Intentional fall-through into DONE_LABEL ...
3477 
3478         masm.bind  (DONE_LABEL) ;
3479         masm.nop   () ;                 // avoid jmp to jmp
3480     }
3481   %}
3482 
3483   // obj: object to unlock
3484   // box: box address (displaced header location), killed
3485   // RBX: killed tmp; cannot be obj nor box
3486   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3487   %{
3488 
3489     Register objReg = as_Register($obj$$reg);
3490     Register boxReg = as_Register($box$$reg);
3491     Register tmpReg = as_Register($tmp$$reg);
3492     MacroAssembler masm(&cbuf);
3493 
3494     if (EmitSync & 4) {
3495        masm.cmpptr(rsp, 0) ;
3496     } else
3497     if (EmitSync & 8) {
3498        Label DONE_LABEL;
3499        if (UseBiasedLocking) {
3500          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3501        }
3502 
3503        // Check whether the displaced header is 0
3504        //(=> recursive unlock)
3505        masm.movptr(tmpReg, Address(boxReg, 0));
3506        masm.testptr(tmpReg, tmpReg);
3507        masm.jcc(Assembler::zero, DONE_LABEL);
3508 
3509        // If not recursive lock, reset the header to displaced header
3510        if (os::is_MP()) {
3511          masm.lock();
3512        }
3513        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3514        masm.bind(DONE_LABEL);
3515        masm.nop(); // avoid branch to branch
3516     } else {
3517        Label DONE_LABEL, Stacked, CheckSucc ;
3518 
3519        if (UseBiasedLocking && !UseOptoBiasInlining) {
3520          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3521        }
3522 
3523        masm.movptr(tmpReg, Address(objReg, 0)) ;
3524        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3525        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3526        masm.testl (tmpReg, 0x02) ;
3527        masm.jcc   (Assembler::zero, Stacked) ;
3528 
3529        // It's inflated
3530        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3531        masm.xorptr(boxReg, r15_thread) ;
3532        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3533        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3534        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3535        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3536        masm.jcc   (Assembler::notZero, CheckSucc) ;
3537        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3538        masm.jmp   (DONE_LABEL) ;
3539 
3540        if ((EmitSync & 65536) == 0) {
3541          Label LSuccess, LGoSlowPath ;
3542          masm.bind  (CheckSucc) ;
3543          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3544          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3545 
3546          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3547          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3548          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3549          // are all faster when the write buffer is populated.
3550          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3551          if (os::is_MP()) {
3552             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3553          }
3554          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3555          masm.jcc   (Assembler::notZero, LSuccess) ;
3556 
3557          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3558          if (os::is_MP()) { masm.lock(); }
3559          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3560          masm.jcc   (Assembler::notEqual, LSuccess) ;
3561          // Intentional fall-through into slow-path
3562 
3563          masm.bind  (LGoSlowPath) ;
3564          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3565          masm.jmp   (DONE_LABEL) ;
3566 
3567          masm.bind  (LSuccess) ;
3568          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3569          masm.jmp   (DONE_LABEL) ;
3570        }
3571 
3572        masm.bind  (Stacked) ;
3573        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3574        if (os::is_MP()) { masm.lock(); }
3575        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3576 
3577        if (EmitSync & 65536) {
3578           masm.bind (CheckSucc) ;
3579        }
3580        masm.bind(DONE_LABEL);
3581        if (EmitSync & 32768) {
3582           masm.nop();                      // avoid branch to branch
3583        }
3584     }
3585   %}
3586 
3587 
3588   enc_class enc_rethrow()
3589   %{
3590     cbuf.set_insts_mark();
3591     emit_opcode(cbuf, 0xE9); // jmp entry
3592     emit_d32_reloc(cbuf,
3593                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3594                    runtime_call_Relocation::spec(),
3595                    RELOC_DISP32);
3596   %}
3597 
3598   enc_class absF_encoding(regF dst)
3599   %{
3600     int dstenc = $dst$$reg;
3601     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3602 
3603     cbuf.set_insts_mark();
3604     if (dstenc >= 8) {
3605       emit_opcode(cbuf, Assembler::REX_R);
3606       dstenc -= 8;
3607     }
3608     // XXX reg_mem doesn't support RIP-relative addressing yet
3609     emit_opcode(cbuf, 0x0F);
3610     emit_opcode(cbuf, 0x54);
3611     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3612     emit_d32_reloc(cbuf, signmask_address);
3613   %}
3614 
3615   enc_class absD_encoding(regD dst)
3616   %{
3617     int dstenc = $dst$$reg;
3618     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3619 
3620     cbuf.set_insts_mark();
3621     emit_opcode(cbuf, 0x66);
3622     if (dstenc >= 8) {
3623       emit_opcode(cbuf, Assembler::REX_R);
3624       dstenc -= 8;
3625     }
3626     // XXX reg_mem doesn't support RIP-relative addressing yet
3627     emit_opcode(cbuf, 0x0F);
3628     emit_opcode(cbuf, 0x54);
3629     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3630     emit_d32_reloc(cbuf, signmask_address);
3631   %}
3632 
3633   enc_class negF_encoding(regF dst)
3634   %{
3635     int dstenc = $dst$$reg;
3636     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3637 
3638     cbuf.set_insts_mark();
3639     if (dstenc >= 8) {
3640       emit_opcode(cbuf, Assembler::REX_R);
3641       dstenc -= 8;
3642     }
3643     // XXX reg_mem doesn't support RIP-relative addressing yet
3644     emit_opcode(cbuf, 0x0F);
3645     emit_opcode(cbuf, 0x57);
3646     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3647     emit_d32_reloc(cbuf, signflip_address);
3648   %}
3649 
3650   enc_class negD_encoding(regD dst)
3651   %{
3652     int dstenc = $dst$$reg;
3653     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3654 
3655     cbuf.set_insts_mark();
3656     emit_opcode(cbuf, 0x66);
3657     if (dstenc >= 8) {
3658       emit_opcode(cbuf, Assembler::REX_R);
3659       dstenc -= 8;
3660     }
3661     // XXX reg_mem doesn't support RIP-relative addressing yet
3662     emit_opcode(cbuf, 0x0F);
3663     emit_opcode(cbuf, 0x57);
3664     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3665     emit_d32_reloc(cbuf, signflip_address);
3666   %}
3667 
3668   enc_class f2i_fixup(rRegI dst, regF src)
3669   %{
3670     int dstenc = $dst$$reg;
3671     int srcenc = $src$$reg;
3672 
3673     // cmpl $dst, #0x80000000
3674     if (dstenc >= 8) {
3675       emit_opcode(cbuf, Assembler::REX_B);
3676     }
3677     emit_opcode(cbuf, 0x81);
3678     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3679     emit_d32(cbuf, 0x80000000);
3680 
3681     // jne,s done
3682     emit_opcode(cbuf, 0x75);
3683     if (srcenc < 8 && dstenc < 8) {
3684       emit_d8(cbuf, 0xF);
3685     } else if (srcenc >= 8 && dstenc >= 8) {
3686       emit_d8(cbuf, 0x11);
3687     } else {
3688       emit_d8(cbuf, 0x10);
3689     }
3690 
3691     // subq rsp, #8
3692     emit_opcode(cbuf, Assembler::REX_W);
3693     emit_opcode(cbuf, 0x83);
3694     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3695     emit_d8(cbuf, 8);
3696 
3697     // movss [rsp], $src
3698     emit_opcode(cbuf, 0xF3);
3699     if (srcenc >= 8) {
3700       emit_opcode(cbuf, Assembler::REX_R);
3701     }
3702     emit_opcode(cbuf, 0x0F);
3703     emit_opcode(cbuf, 0x11);
3704     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3705 
3706     // call f2i_fixup
3707     cbuf.set_insts_mark();
3708     emit_opcode(cbuf, 0xE8);
3709     emit_d32_reloc(cbuf,
3710                    (int)
3711                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3712                    runtime_call_Relocation::spec(),
3713                    RELOC_DISP32);
3714 
3715     // popq $dst
3716     if (dstenc >= 8) {
3717       emit_opcode(cbuf, Assembler::REX_B);
3718     }
3719     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3720 
3721     // done:
3722   %}
3723 
3724   enc_class f2l_fixup(rRegL dst, regF src)
3725   %{
3726     int dstenc = $dst$$reg;
3727     int srcenc = $src$$reg;
3728     address const_address = (address) StubRoutines::x86::double_sign_flip();
3729 
3730     // cmpq $dst, [0x8000000000000000]
3731     cbuf.set_insts_mark();
3732     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3733     emit_opcode(cbuf, 0x39);
3734     // XXX reg_mem doesn't support RIP-relative addressing yet
3735     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3736     emit_d32_reloc(cbuf, const_address);
3737 
3738 
3739     // jne,s done
3740     emit_opcode(cbuf, 0x75);
3741     if (srcenc < 8 && dstenc < 8) {
3742       emit_d8(cbuf, 0xF);
3743     } else if (srcenc >= 8 && dstenc >= 8) {
3744       emit_d8(cbuf, 0x11);
3745     } else {
3746       emit_d8(cbuf, 0x10);
3747     }
3748 
3749     // subq rsp, #8
3750     emit_opcode(cbuf, Assembler::REX_W);
3751     emit_opcode(cbuf, 0x83);
3752     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3753     emit_d8(cbuf, 8);
3754 
3755     // movss [rsp], $src
3756     emit_opcode(cbuf, 0xF3);
3757     if (srcenc >= 8) {
3758       emit_opcode(cbuf, Assembler::REX_R);
3759     }
3760     emit_opcode(cbuf, 0x0F);
3761     emit_opcode(cbuf, 0x11);
3762     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3763 
3764     // call f2l_fixup
3765     cbuf.set_insts_mark();
3766     emit_opcode(cbuf, 0xE8);
3767     emit_d32_reloc(cbuf,
3768                    (int)
3769                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3770                    runtime_call_Relocation::spec(),
3771                    RELOC_DISP32);
3772 
3773     // popq $dst
3774     if (dstenc >= 8) {
3775       emit_opcode(cbuf, Assembler::REX_B);
3776     }
3777     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3778 
3779     // done:
3780   %}
3781 
3782   enc_class d2i_fixup(rRegI dst, regD src)
3783   %{
3784     int dstenc = $dst$$reg;
3785     int srcenc = $src$$reg;
3786 
3787     // cmpl $dst, #0x80000000
3788     if (dstenc >= 8) {
3789       emit_opcode(cbuf, Assembler::REX_B);
3790     }
3791     emit_opcode(cbuf, 0x81);
3792     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3793     emit_d32(cbuf, 0x80000000);
3794 
3795     // jne,s done
3796     emit_opcode(cbuf, 0x75);
3797     if (srcenc < 8 && dstenc < 8) {
3798       emit_d8(cbuf, 0xF);
3799     } else if (srcenc >= 8 && dstenc >= 8) {
3800       emit_d8(cbuf, 0x11);
3801     } else {
3802       emit_d8(cbuf, 0x10);
3803     }
3804 
3805     // subq rsp, #8
3806     emit_opcode(cbuf, Assembler::REX_W);
3807     emit_opcode(cbuf, 0x83);
3808     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3809     emit_d8(cbuf, 8);
3810 
3811     // movsd [rsp], $src
3812     emit_opcode(cbuf, 0xF2);
3813     if (srcenc >= 8) {
3814       emit_opcode(cbuf, Assembler::REX_R);
3815     }
3816     emit_opcode(cbuf, 0x0F);
3817     emit_opcode(cbuf, 0x11);
3818     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3819 
3820     // call d2i_fixup
3821     cbuf.set_insts_mark();
3822     emit_opcode(cbuf, 0xE8);
3823     emit_d32_reloc(cbuf,
3824                    (int)
3825                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3826                    runtime_call_Relocation::spec(),
3827                    RELOC_DISP32);
3828 
3829     // popq $dst
3830     if (dstenc >= 8) {
3831       emit_opcode(cbuf, Assembler::REX_B);
3832     }
3833     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3834 
3835     // done:
3836   %}
3837 
3838   enc_class d2l_fixup(rRegL dst, regD src)
3839   %{
3840     int dstenc = $dst$$reg;
3841     int srcenc = $src$$reg;
3842     address const_address = (address) StubRoutines::x86::double_sign_flip();
3843 
3844     // cmpq $dst, [0x8000000000000000]
3845     cbuf.set_insts_mark();
3846     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3847     emit_opcode(cbuf, 0x39);
3848     // XXX reg_mem doesn't support RIP-relative addressing yet
3849     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3850     emit_d32_reloc(cbuf, const_address);
3851 
3852 
3853     // jne,s done
3854     emit_opcode(cbuf, 0x75);
3855     if (srcenc < 8 && dstenc < 8) {
3856       emit_d8(cbuf, 0xF);
3857     } else if (srcenc >= 8 && dstenc >= 8) {
3858       emit_d8(cbuf, 0x11);
3859     } else {
3860       emit_d8(cbuf, 0x10);
3861     }
3862 
3863     // subq rsp, #8
3864     emit_opcode(cbuf, Assembler::REX_W);
3865     emit_opcode(cbuf, 0x83);
3866     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3867     emit_d8(cbuf, 8);
3868 
3869     // movsd [rsp], $src
3870     emit_opcode(cbuf, 0xF2);
3871     if (srcenc >= 8) {
3872       emit_opcode(cbuf, Assembler::REX_R);
3873     }
3874     emit_opcode(cbuf, 0x0F);
3875     emit_opcode(cbuf, 0x11);
3876     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3877 
3878     // call d2l_fixup
3879     cbuf.set_insts_mark();
3880     emit_opcode(cbuf, 0xE8);
3881     emit_d32_reloc(cbuf,
3882                    (int)
3883                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3884                    runtime_call_Relocation::spec(),
3885                    RELOC_DISP32);
3886 
3887     // popq $dst
3888     if (dstenc >= 8) {
3889       emit_opcode(cbuf, Assembler::REX_B);
3890     }
3891     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3892 
3893     // done:
3894   %}
3895 %}
3896 
3897 
3898 
3899 //----------FRAME--------------------------------------------------------------
3900 // Definition of frame structure and management information.
3901 //
3902 //  S T A C K   L A Y O U T    Allocators stack-slot number
3903 //                             |   (to get allocators register number
3904 //  G  Owned by    |        |  v    add OptoReg::stack0())
3905 //  r   CALLER     |        |
3906 //  o     |        +--------+      pad to even-align allocators stack-slot
3907 //  w     V        |  pad0  |        numbers; owned by CALLER
3908 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3909 //  h     ^        |   in   |  5
3910 //        |        |  args  |  4   Holes in incoming args owned by SELF
3911 //  |     |        |        |  3
3912 //  |     |        +--------+
3913 //  V     |        | old out|      Empty on Intel, window on Sparc
3914 //        |    old |preserve|      Must be even aligned.
3915 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3916 //        |        |   in   |  3   area for Intel ret address
3917 //     Owned by    |preserve|      Empty on Sparc.
3918 //       SELF      +--------+
3919 //        |        |  pad2  |  2   pad to align old SP
3920 //        |        +--------+  1
3921 //        |        | locks  |  0
3922 //        |        +--------+----> OptoReg::stack0(), even aligned
3923 //        |        |  pad1  | 11   pad to align new SP
3924 //        |        +--------+
3925 //        |        |        | 10
3926 //        |        | spills |  9   spills
3927 //        V        |        |  8   (pad0 slot for callee)
3928 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3929 //        ^        |  out   |  7
3930 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3931 //     Owned by    +--------+
3932 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3933 //        |    new |preserve|      Must be even-aligned.
3934 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3935 //        |        |        |
3936 //
3937 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3938 //         known from SELF's arguments and the Java calling convention.
3939 //         Region 6-7 is determined per call site.
3940 // Note 2: If the calling convention leaves holes in the incoming argument
3941 //         area, those holes are owned by SELF.  Holes in the outgoing area
3942 //         are owned by the CALLEE.  Holes should not be nessecary in the
3943 //         incoming area, as the Java calling convention is completely under
3944 //         the control of the AD file.  Doubles can be sorted and packed to
3945 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3946 //         varargs C calling conventions.
3947 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3948 //         even aligned with pad0 as needed.
3949 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3950 //         region 6-11 is even aligned; it may be padded out more so that
3951 //         the region from SP to FP meets the minimum stack alignment.
3952 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3953 //         alignment.  Region 11, pad1, may be dynamically extended so that
3954 //         SP meets the minimum alignment.
3955 
3956 frame
3957 %{
3958   // What direction does stack grow in (assumed to be same for C & Java)
3959   stack_direction(TOWARDS_LOW);
3960 
3961   // These three registers define part of the calling convention
3962   // between compiled code and the interpreter.
3963   inline_cache_reg(RAX);                // Inline Cache Register
3964   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3965                                         // calling interpreter
3966 
3967   // Optional: name the operand used by cisc-spilling to access
3968   // [stack_pointer + offset]
3969   cisc_spilling_operand_name(indOffset32);
3970 
3971   // Number of stack slots consumed by locking an object
3972   sync_stack_slots(2);
3973 
3974   // Compiled code's Frame Pointer
3975   frame_pointer(RSP);
3976 
3977   // Interpreter stores its frame pointer in a register which is
3978   // stored to the stack by I2CAdaptors.
3979   // I2CAdaptors convert from interpreted java to compiled java.
3980   interpreter_frame_pointer(RBP);
3981 
3982   // Stack alignment requirement
3983   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3984 
3985   // Number of stack slots between incoming argument block and the start of
3986   // a new frame.  The PROLOG must add this many slots to the stack.  The
3987   // EPILOG must remove this many slots.  amd64 needs two slots for
3988   // return address.
3989   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3990 
3991   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3992   // for calls to C.  Supports the var-args backing area for register parms.
3993   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3994 
3995   // The after-PROLOG location of the return address.  Location of
3996   // return address specifies a type (REG or STACK) and a number
3997   // representing the register number (i.e. - use a register name) or
3998   // stack slot.
3999   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4000   // Otherwise, it is above the locks and verification slot and alignment word
4001   return_addr(STACK - 2 +
4002               round_to(2 + 2 * VerifyStackAtCalls +
4003                        Compile::current()->fixed_slots(),
4004                        WordsPerLong * 2));
4005 
4006   // Body of function which returns an integer array locating
4007   // arguments either in registers or in stack slots.  Passed an array
4008   // of ideal registers called "sig" and a "length" count.  Stack-slot
4009   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4010   // arguments for a CALLEE.  Incoming stack arguments are
4011   // automatically biased by the preserve_stack_slots field above.
4012 
4013   calling_convention
4014   %{
4015     // No difference between ingoing/outgoing just pass false
4016     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4017   %}
4018 
4019   c_calling_convention
4020   %{
4021     // This is obviously always outgoing
4022     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4023   %}
4024 
4025   // Location of compiled Java return values.  Same as C for now.
4026   return_value
4027   %{
4028     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4029            "only return normal values");
4030 
4031     static const int lo[Op_RegL + 1] = {
4032       0,
4033       0,
4034       RAX_num,  // Op_RegN
4035       RAX_num,  // Op_RegI
4036       RAX_num,  // Op_RegP
4037       XMM0_num, // Op_RegF
4038       XMM0_num, // Op_RegD
4039       RAX_num   // Op_RegL
4040     };
4041     static const int hi[Op_RegL + 1] = {
4042       0,
4043       0,
4044       OptoReg::Bad, // Op_RegN
4045       OptoReg::Bad, // Op_RegI
4046       RAX_H_num,    // Op_RegP
4047       OptoReg::Bad, // Op_RegF
4048       XMM0_H_num,   // Op_RegD
4049       RAX_H_num     // Op_RegL
4050     };
4051     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4052     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4053   %}
4054 %}
4055 
4056 //----------ATTRIBUTES---------------------------------------------------------
4057 //----------Operand Attributes-------------------------------------------------
4058 op_attrib op_cost(0);        // Required cost attribute
4059 
4060 //----------Instruction Attributes---------------------------------------------
4061 ins_attrib ins_cost(100);       // Required cost attribute
4062 ins_attrib ins_size(8);         // Required size attribute (in bits)
4063 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4064 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4065                                 // a non-matching short branch variant
4066                                 // of some long branch?
4067 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4068                                 // be a power of 2) specifies the
4069                                 // alignment that some part of the
4070                                 // instruction (not necessarily the
4071                                 // start) requires.  If > 1, a
4072                                 // compute_padding() function must be
4073                                 // provided for the instruction
4074 
4075 //----------OPERANDS-----------------------------------------------------------
4076 // Operand definitions must precede instruction definitions for correct parsing
4077 // in the ADLC because operands constitute user defined types which are used in
4078 // instruction definitions.
4079 
4080 //----------Simple Operands----------------------------------------------------
4081 // Immediate Operands
4082 // Integer Immediate
4083 operand immI()
4084 %{
4085   match(ConI);
4086 
4087   op_cost(10);
4088   format %{ %}
4089   interface(CONST_INTER);
4090 %}
4091 
4092 // Constant for test vs zero
4093 operand immI0()
4094 %{
4095   predicate(n->get_int() == 0);
4096   match(ConI);
4097 
4098   op_cost(0);
4099   format %{ %}
4100   interface(CONST_INTER);
4101 %}
4102 
4103 // Constant for increment
4104 operand immI1()
4105 %{
4106   predicate(n->get_int() == 1);
4107   match(ConI);
4108 
4109   op_cost(0);
4110   format %{ %}
4111   interface(CONST_INTER);
4112 %}
4113 
4114 // Constant for decrement
4115 operand immI_M1()
4116 %{
4117   predicate(n->get_int() == -1);
4118   match(ConI);
4119 
4120   op_cost(0);
4121   format %{ %}
4122   interface(CONST_INTER);
4123 %}
4124 
4125 // Valid scale values for addressing modes
4126 operand immI2()
4127 %{
4128   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4129   match(ConI);
4130 
4131   format %{ %}
4132   interface(CONST_INTER);
4133 %}
4134 
4135 operand immI8()
4136 %{
4137   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4138   match(ConI);
4139 
4140   op_cost(5);
4141   format %{ %}
4142   interface(CONST_INTER);
4143 %}
4144 
4145 operand immI16()
4146 %{
4147   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4148   match(ConI);
4149 
4150   op_cost(10);
4151   format %{ %}
4152   interface(CONST_INTER);
4153 %}
4154 
4155 // Constant for long shifts
4156 operand immI_32()
4157 %{
4158   predicate( n->get_int() == 32 );
4159   match(ConI);
4160 
4161   op_cost(0);
4162   format %{ %}
4163   interface(CONST_INTER);
4164 %}
4165 
4166 // Constant for long shifts
4167 operand immI_64()
4168 %{
4169   predicate( n->get_int() == 64 );
4170   match(ConI);
4171 
4172   op_cost(0);
4173   format %{ %}
4174   interface(CONST_INTER);
4175 %}
4176 
4177 // Pointer Immediate
4178 operand immP()
4179 %{
4180   match(ConP);
4181 
4182   op_cost(10);
4183   format %{ %}
4184   interface(CONST_INTER);
4185 %}
4186 
4187 // NULL Pointer Immediate
4188 operand immP0()
4189 %{
4190   predicate(n->get_ptr() == 0);
4191   match(ConP);
4192 
4193   op_cost(5);
4194   format %{ %}
4195   interface(CONST_INTER);
4196 %}
4197 
4198 operand immP_poll() %{
4199   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4200   match(ConP);
4201 
4202   // formats are generated automatically for constants and base registers
4203   format %{ %}
4204   interface(CONST_INTER);
4205 %}
4206 
4207 // Pointer Immediate
4208 operand immN() %{
4209   match(ConN);
4210 
4211   op_cost(10);
4212   format %{ %}
4213   interface(CONST_INTER);
4214 %}
4215 
4216 // NULL Pointer Immediate
4217 operand immN0() %{
4218   predicate(n->get_narrowcon() == 0);
4219   match(ConN);
4220 
4221   op_cost(5);
4222   format %{ %}
4223   interface(CONST_INTER);
4224 %}
4225 
4226 operand immP31()
4227 %{
4228   predicate(!n->as_Type()->type()->isa_oopptr()
4229             && (n->get_ptr() >> 31) == 0);
4230   match(ConP);
4231 
4232   op_cost(5);
4233   format %{ %}
4234   interface(CONST_INTER);
4235 %}
4236 
4237 
4238 // Long Immediate
4239 operand immL()
4240 %{
4241   match(ConL);
4242 
4243   op_cost(20);
4244   format %{ %}
4245   interface(CONST_INTER);
4246 %}
4247 
4248 // Long Immediate 8-bit
4249 operand immL8()
4250 %{
4251   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4252   match(ConL);
4253 
4254   op_cost(5);
4255   format %{ %}
4256   interface(CONST_INTER);
4257 %}
4258 
4259 // Long Immediate 32-bit unsigned
4260 operand immUL32()
4261 %{
4262   predicate(n->get_long() == (unsigned int) (n->get_long()));
4263   match(ConL);
4264 
4265   op_cost(10);
4266   format %{ %}
4267   interface(CONST_INTER);
4268 %}
4269 
4270 // Long Immediate 32-bit signed
4271 operand immL32()
4272 %{
4273   predicate(n->get_long() == (int) (n->get_long()));
4274   match(ConL);
4275 
4276   op_cost(15);
4277   format %{ %}
4278   interface(CONST_INTER);
4279 %}
4280 
4281 // Long Immediate zero
4282 operand immL0()
4283 %{
4284   predicate(n->get_long() == 0L);
4285   match(ConL);
4286 
4287   op_cost(10);
4288   format %{ %}
4289   interface(CONST_INTER);
4290 %}
4291 
4292 // Constant for increment
4293 operand immL1()
4294 %{
4295   predicate(n->get_long() == 1);
4296   match(ConL);
4297 
4298   format %{ %}
4299   interface(CONST_INTER);
4300 %}
4301 
4302 // Constant for decrement
4303 operand immL_M1()
4304 %{
4305   predicate(n->get_long() == -1);
4306   match(ConL);
4307 
4308   format %{ %}
4309   interface(CONST_INTER);
4310 %}
4311 
4312 // Long Immediate: the value 10
4313 operand immL10()
4314 %{
4315   predicate(n->get_long() == 10);
4316   match(ConL);
4317 
4318   format %{ %}
4319   interface(CONST_INTER);
4320 %}
4321 
4322 // Long immediate from 0 to 127.
4323 // Used for a shorter form of long mul by 10.
4324 operand immL_127()
4325 %{
4326   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4327   match(ConL);
4328 
4329   op_cost(10);
4330   format %{ %}
4331   interface(CONST_INTER);
4332 %}
4333 
4334 // Long Immediate: low 32-bit mask
4335 operand immL_32bits()
4336 %{
4337   predicate(n->get_long() == 0xFFFFFFFFL);
4338   match(ConL);
4339   op_cost(20);
4340 
4341   format %{ %}
4342   interface(CONST_INTER);
4343 %}
4344 
4345 // Float Immediate zero
4346 operand immF0()
4347 %{
4348   predicate(jint_cast(n->getf()) == 0);
4349   match(ConF);
4350 
4351   op_cost(5);
4352   format %{ %}
4353   interface(CONST_INTER);
4354 %}
4355 
4356 // Float Immediate
4357 operand immF()
4358 %{
4359   match(ConF);
4360 
4361   op_cost(15);
4362   format %{ %}
4363   interface(CONST_INTER);
4364 %}
4365 
4366 // Double Immediate zero
4367 operand immD0()
4368 %{
4369   predicate(jlong_cast(n->getd()) == 0);
4370   match(ConD);
4371 
4372   op_cost(5);
4373   format %{ %}
4374   interface(CONST_INTER);
4375 %}
4376 
4377 // Double Immediate
4378 operand immD()
4379 %{
4380   match(ConD);
4381 
4382   op_cost(15);
4383   format %{ %}
4384   interface(CONST_INTER);
4385 %}
4386 
4387 // Immediates for special shifts (sign extend)
4388 
4389 // Constants for increment
4390 operand immI_16()
4391 %{
4392   predicate(n->get_int() == 16);
4393   match(ConI);
4394 
4395   format %{ %}
4396   interface(CONST_INTER);
4397 %}
4398 
4399 operand immI_24()
4400 %{
4401   predicate(n->get_int() == 24);
4402   match(ConI);
4403 
4404   format %{ %}
4405   interface(CONST_INTER);
4406 %}
4407 
4408 // Constant for byte-wide masking
4409 operand immI_255()
4410 %{
4411   predicate(n->get_int() == 255);
4412   match(ConI);
4413 
4414   format %{ %}
4415   interface(CONST_INTER);
4416 %}
4417 
4418 // Constant for short-wide masking
4419 operand immI_65535()
4420 %{
4421   predicate(n->get_int() == 65535);
4422   match(ConI);
4423 
4424   format %{ %}
4425   interface(CONST_INTER);
4426 %}
4427 
4428 // Constant for byte-wide masking
4429 operand immL_255()
4430 %{
4431   predicate(n->get_long() == 255);
4432   match(ConL);
4433 
4434   format %{ %}
4435   interface(CONST_INTER);
4436 %}
4437 
4438 // Constant for short-wide masking
4439 operand immL_65535()
4440 %{
4441   predicate(n->get_long() == 65535);
4442   match(ConL);
4443 
4444   format %{ %}
4445   interface(CONST_INTER);
4446 %}
4447 
4448 // Register Operands
4449 // Integer Register
4450 operand rRegI()
4451 %{
4452   constraint(ALLOC_IN_RC(int_reg));
4453   match(RegI);
4454 
4455   match(rax_RegI);
4456   match(rbx_RegI);
4457   match(rcx_RegI);
4458   match(rdx_RegI);
4459   match(rdi_RegI);
4460 
4461   format %{ %}
4462   interface(REG_INTER);
4463 %}
4464 
4465 // Special Registers
4466 operand rax_RegI()
4467 %{
4468   constraint(ALLOC_IN_RC(int_rax_reg));
4469   match(RegI);
4470   match(rRegI);
4471 
4472   format %{ "RAX" %}
4473   interface(REG_INTER);
4474 %}
4475 
4476 // Special Registers
4477 operand rbx_RegI()
4478 %{
4479   constraint(ALLOC_IN_RC(int_rbx_reg));
4480   match(RegI);
4481   match(rRegI);
4482 
4483   format %{ "RBX" %}
4484   interface(REG_INTER);
4485 %}
4486 
4487 operand rcx_RegI()
4488 %{
4489   constraint(ALLOC_IN_RC(int_rcx_reg));
4490   match(RegI);
4491   match(rRegI);
4492 
4493   format %{ "RCX" %}
4494   interface(REG_INTER);
4495 %}
4496 
4497 operand rdx_RegI()
4498 %{
4499   constraint(ALLOC_IN_RC(int_rdx_reg));
4500   match(RegI);
4501   match(rRegI);
4502 
4503   format %{ "RDX" %}
4504   interface(REG_INTER);
4505 %}
4506 
4507 operand rdi_RegI()
4508 %{
4509   constraint(ALLOC_IN_RC(int_rdi_reg));
4510   match(RegI);
4511   match(rRegI);
4512 
4513   format %{ "RDI" %}
4514   interface(REG_INTER);
4515 %}
4516 
4517 operand no_rcx_RegI()
4518 %{
4519   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4520   match(RegI);
4521   match(rax_RegI);
4522   match(rbx_RegI);
4523   match(rdx_RegI);
4524   match(rdi_RegI);
4525 
4526   format %{ %}
4527   interface(REG_INTER);
4528 %}
4529 
4530 operand no_rax_rdx_RegI()
4531 %{
4532   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4533   match(RegI);
4534   match(rbx_RegI);
4535   match(rcx_RegI);
4536   match(rdi_RegI);
4537 
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 // Pointer Register
4543 operand any_RegP()
4544 %{
4545   constraint(ALLOC_IN_RC(any_reg));
4546   match(RegP);
4547   match(rax_RegP);
4548   match(rbx_RegP);
4549   match(rdi_RegP);
4550   match(rsi_RegP);
4551   match(rbp_RegP);
4552   match(r15_RegP);
4553   match(rRegP);
4554 
4555   format %{ %}
4556   interface(REG_INTER);
4557 %}
4558 
4559 operand rRegP()
4560 %{
4561   constraint(ALLOC_IN_RC(ptr_reg));
4562   match(RegP);
4563   match(rax_RegP);
4564   match(rbx_RegP);
4565   match(rdi_RegP);
4566   match(rsi_RegP);
4567   match(rbp_RegP);
4568   match(r15_RegP);  // See Q&A below about r15_RegP.
4569 
4570   format %{ %}
4571   interface(REG_INTER);
4572 %}
4573 
4574 operand rRegN() %{
4575   constraint(ALLOC_IN_RC(int_reg));
4576   match(RegN);
4577 
4578   format %{ %}
4579   interface(REG_INTER);
4580 %}
4581 
4582 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4583 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4584 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4585 // The output of an instruction is controlled by the allocator, which respects
4586 // register class masks, not match rules.  Unless an instruction mentions
4587 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4588 // by the allocator as an input.
4589 
4590 operand no_rax_RegP()
4591 %{
4592   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4593   match(RegP);
4594   match(rbx_RegP);
4595   match(rsi_RegP);
4596   match(rdi_RegP);
4597 
4598   format %{ %}
4599   interface(REG_INTER);
4600 %}
4601 
4602 operand no_rbp_RegP()
4603 %{
4604   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4605   match(RegP);
4606   match(rbx_RegP);
4607   match(rsi_RegP);
4608   match(rdi_RegP);
4609 
4610   format %{ %}
4611   interface(REG_INTER);
4612 %}
4613 
4614 operand no_rax_rbx_RegP()
4615 %{
4616   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4617   match(RegP);
4618   match(rsi_RegP);
4619   match(rdi_RegP);
4620 
4621   format %{ %}
4622   interface(REG_INTER);
4623 %}
4624 
4625 // Special Registers
4626 // Return a pointer value
4627 operand rax_RegP()
4628 %{
4629   constraint(ALLOC_IN_RC(ptr_rax_reg));
4630   match(RegP);
4631   match(rRegP);
4632 
4633   format %{ %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 // Special Registers
4638 // Return a compressed pointer value
4639 operand rax_RegN()
4640 %{
4641   constraint(ALLOC_IN_RC(int_rax_reg));
4642   match(RegN);
4643   match(rRegN);
4644 
4645   format %{ %}
4646   interface(REG_INTER);
4647 %}
4648 
4649 // Used in AtomicAdd
4650 operand rbx_RegP()
4651 %{
4652   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4653   match(RegP);
4654   match(rRegP);
4655 
4656   format %{ %}
4657   interface(REG_INTER);
4658 %}
4659 
4660 operand rsi_RegP()
4661 %{
4662   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4663   match(RegP);
4664   match(rRegP);
4665 
4666   format %{ %}
4667   interface(REG_INTER);
4668 %}
4669 
4670 // Used in rep stosq
4671 operand rdi_RegP()
4672 %{
4673   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4674   match(RegP);
4675   match(rRegP);
4676 
4677   format %{ %}
4678   interface(REG_INTER);
4679 %}
4680 
4681 operand rbp_RegP()
4682 %{
4683   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4684   match(RegP);
4685   match(rRegP);
4686 
4687   format %{ %}
4688   interface(REG_INTER);
4689 %}
4690 
4691 operand r15_RegP()
4692 %{
4693   constraint(ALLOC_IN_RC(ptr_r15_reg));
4694   match(RegP);
4695   match(rRegP);
4696 
4697   format %{ %}
4698   interface(REG_INTER);
4699 %}
4700 
4701 operand rRegL()
4702 %{
4703   constraint(ALLOC_IN_RC(long_reg));
4704   match(RegL);
4705   match(rax_RegL);
4706   match(rdx_RegL);
4707 
4708   format %{ %}
4709   interface(REG_INTER);
4710 %}
4711 
4712 // Special Registers
4713 operand no_rax_rdx_RegL()
4714 %{
4715   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4716   match(RegL);
4717   match(rRegL);
4718 
4719   format %{ %}
4720   interface(REG_INTER);
4721 %}
4722 
4723 operand no_rax_RegL()
4724 %{
4725   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4726   match(RegL);
4727   match(rRegL);
4728   match(rdx_RegL);
4729 
4730   format %{ %}
4731   interface(REG_INTER);
4732 %}
4733 
4734 operand no_rcx_RegL()
4735 %{
4736   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4737   match(RegL);
4738   match(rRegL);
4739 
4740   format %{ %}
4741   interface(REG_INTER);
4742 %}
4743 
4744 operand rax_RegL()
4745 %{
4746   constraint(ALLOC_IN_RC(long_rax_reg));
4747   match(RegL);
4748   match(rRegL);
4749 
4750   format %{ "RAX" %}
4751   interface(REG_INTER);
4752 %}
4753 
4754 operand rcx_RegL()
4755 %{
4756   constraint(ALLOC_IN_RC(long_rcx_reg));
4757   match(RegL);
4758   match(rRegL);
4759 
4760   format %{ %}
4761   interface(REG_INTER);
4762 %}
4763 
4764 operand rdx_RegL()
4765 %{
4766   constraint(ALLOC_IN_RC(long_rdx_reg));
4767   match(RegL);
4768   match(rRegL);
4769 
4770   format %{ %}
4771   interface(REG_INTER);
4772 %}
4773 
4774 // Flags register, used as output of compare instructions
4775 operand rFlagsReg()
4776 %{
4777   constraint(ALLOC_IN_RC(int_flags));
4778   match(RegFlags);
4779 
4780   format %{ "RFLAGS" %}
4781   interface(REG_INTER);
4782 %}
4783 
4784 // Flags register, used as output of FLOATING POINT compare instructions
4785 operand rFlagsRegU()
4786 %{
4787   constraint(ALLOC_IN_RC(int_flags));
4788   match(RegFlags);
4789 
4790   format %{ "RFLAGS_U" %}
4791   interface(REG_INTER);
4792 %}
4793 
4794 operand rFlagsRegUCF() %{
4795   constraint(ALLOC_IN_RC(int_flags));
4796   match(RegFlags);
4797   predicate(false);
4798 
4799   format %{ "RFLAGS_U_CF" %}
4800   interface(REG_INTER);
4801 %}
4802 
4803 // Float register operands
4804 operand regF()
4805 %{
4806   constraint(ALLOC_IN_RC(float_reg));
4807   match(RegF);
4808 
4809   format %{ %}
4810   interface(REG_INTER);
4811 %}
4812 
4813 // Double register operands
4814 operand regD()
4815 %{
4816   constraint(ALLOC_IN_RC(double_reg));
4817   match(RegD);
4818 
4819   format %{ %}
4820   interface(REG_INTER);
4821 %}
4822 
4823 
4824 //----------Memory Operands----------------------------------------------------
4825 // Direct Memory Operand
4826 // operand direct(immP addr)
4827 // %{
4828 //   match(addr);
4829 
4830 //   format %{ "[$addr]" %}
4831 //   interface(MEMORY_INTER) %{
4832 //     base(0xFFFFFFFF);
4833 //     index(0x4);
4834 //     scale(0x0);
4835 //     disp($addr);
4836 //   %}
4837 // %}
4838 
4839 // Indirect Memory Operand
4840 operand indirect(any_RegP reg)
4841 %{
4842   constraint(ALLOC_IN_RC(ptr_reg));
4843   match(reg);
4844 
4845   format %{ "[$reg]" %}
4846   interface(MEMORY_INTER) %{
4847     base($reg);
4848     index(0x4);
4849     scale(0x0);
4850     disp(0x0);
4851   %}
4852 %}
4853 
4854 // Indirect Memory Plus Short Offset Operand
4855 operand indOffset8(any_RegP reg, immL8 off)
4856 %{
4857   constraint(ALLOC_IN_RC(ptr_reg));
4858   match(AddP reg off);
4859 
4860   format %{ "[$reg + $off (8-bit)]" %}
4861   interface(MEMORY_INTER) %{
4862     base($reg);
4863     index(0x4);
4864     scale(0x0);
4865     disp($off);
4866   %}
4867 %}
4868 
4869 // Indirect Memory Plus Long Offset Operand
4870 operand indOffset32(any_RegP reg, immL32 off)
4871 %{
4872   constraint(ALLOC_IN_RC(ptr_reg));
4873   match(AddP reg off);
4874 
4875   format %{ "[$reg + $off (32-bit)]" %}
4876   interface(MEMORY_INTER) %{
4877     base($reg);
4878     index(0x4);
4879     scale(0x0);
4880     disp($off);
4881   %}
4882 %}
4883 
4884 // Indirect Memory Plus Index Register Plus Offset Operand
4885 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4886 %{
4887   constraint(ALLOC_IN_RC(ptr_reg));
4888   match(AddP (AddP reg lreg) off);
4889 
4890   op_cost(10);
4891   format %{"[$reg + $off + $lreg]" %}
4892   interface(MEMORY_INTER) %{
4893     base($reg);
4894     index($lreg);
4895     scale(0x0);
4896     disp($off);
4897   %}
4898 %}
4899 
4900 // Indirect Memory Plus Index Register Plus Offset Operand
4901 operand indIndex(any_RegP reg, rRegL lreg)
4902 %{
4903   constraint(ALLOC_IN_RC(ptr_reg));
4904   match(AddP reg lreg);
4905 
4906   op_cost(10);
4907   format %{"[$reg + $lreg]" %}
4908   interface(MEMORY_INTER) %{
4909     base($reg);
4910     index($lreg);
4911     scale(0x0);
4912     disp(0x0);
4913   %}
4914 %}
4915 
4916 // Indirect Memory Times Scale Plus Index Register
4917 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4918 %{
4919   constraint(ALLOC_IN_RC(ptr_reg));
4920   match(AddP reg (LShiftL lreg scale));
4921 
4922   op_cost(10);
4923   format %{"[$reg + $lreg << $scale]" %}
4924   interface(MEMORY_INTER) %{
4925     base($reg);
4926     index($lreg);
4927     scale($scale);
4928     disp(0x0);
4929   %}
4930 %}
4931 
4932 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4933 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4934 %{
4935   constraint(ALLOC_IN_RC(ptr_reg));
4936   match(AddP (AddP reg (LShiftL lreg scale)) off);
4937 
4938   op_cost(10);
4939   format %{"[$reg + $off + $lreg << $scale]" %}
4940   interface(MEMORY_INTER) %{
4941     base($reg);
4942     index($lreg);
4943     scale($scale);
4944     disp($off);
4945   %}
4946 %}
4947 
4948 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4949 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4950 %{
4951   constraint(ALLOC_IN_RC(ptr_reg));
4952   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4953   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4954 
4955   op_cost(10);
4956   format %{"[$reg + $off + $idx << $scale]" %}
4957   interface(MEMORY_INTER) %{
4958     base($reg);
4959     index($idx);
4960     scale($scale);
4961     disp($off);
4962   %}
4963 %}
4964 
4965 // Indirect Narrow Oop Plus Offset Operand
4966 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4967 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4968 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4969   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4970   constraint(ALLOC_IN_RC(ptr_reg));
4971   match(AddP (DecodeN reg) off);
4972 
4973   op_cost(10);
4974   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4975   interface(MEMORY_INTER) %{
4976     base(0xc); // R12
4977     index($reg);
4978     scale(0x3);
4979     disp($off);
4980   %}
4981 %}
4982 
4983 // Indirect Memory Operand
4984 operand indirectNarrow(rRegN reg)
4985 %{
4986   predicate(Universe::narrow_oop_shift() == 0);
4987   constraint(ALLOC_IN_RC(ptr_reg));
4988   match(DecodeN reg);
4989 
4990   format %{ "[$reg]" %}
4991   interface(MEMORY_INTER) %{
4992     base($reg);
4993     index(0x4);
4994     scale(0x0);
4995     disp(0x0);
4996   %}
4997 %}
4998 
4999 // Indirect Memory Plus Short Offset Operand
5000 operand indOffset8Narrow(rRegN reg, immL8 off)
5001 %{
5002   predicate(Universe::narrow_oop_shift() == 0);
5003   constraint(ALLOC_IN_RC(ptr_reg));
5004   match(AddP (DecodeN reg) off);
5005 
5006   format %{ "[$reg + $off (8-bit)]" %}
5007   interface(MEMORY_INTER) %{
5008     base($reg);
5009     index(0x4);
5010     scale(0x0);
5011     disp($off);
5012   %}
5013 %}
5014 
5015 // Indirect Memory Plus Long Offset Operand
5016 operand indOffset32Narrow(rRegN reg, immL32 off)
5017 %{
5018   predicate(Universe::narrow_oop_shift() == 0);
5019   constraint(ALLOC_IN_RC(ptr_reg));
5020   match(AddP (DecodeN reg) off);
5021 
5022   format %{ "[$reg + $off (32-bit)]" %}
5023   interface(MEMORY_INTER) %{
5024     base($reg);
5025     index(0x4);
5026     scale(0x0);
5027     disp($off);
5028   %}
5029 %}
5030 
5031 // Indirect Memory Plus Index Register Plus Offset Operand
5032 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5033 %{
5034   predicate(Universe::narrow_oop_shift() == 0);
5035   constraint(ALLOC_IN_RC(ptr_reg));
5036   match(AddP (AddP (DecodeN reg) lreg) off);
5037 
5038   op_cost(10);
5039   format %{"[$reg + $off + $lreg]" %}
5040   interface(MEMORY_INTER) %{
5041     base($reg);
5042     index($lreg);
5043     scale(0x0);
5044     disp($off);
5045   %}
5046 %}
5047 
5048 // Indirect Memory Plus Index Register Plus Offset Operand
5049 operand indIndexNarrow(rRegN reg, rRegL lreg)
5050 %{
5051   predicate(Universe::narrow_oop_shift() == 0);
5052   constraint(ALLOC_IN_RC(ptr_reg));
5053   match(AddP (DecodeN reg) lreg);
5054 
5055   op_cost(10);
5056   format %{"[$reg + $lreg]" %}
5057   interface(MEMORY_INTER) %{
5058     base($reg);
5059     index($lreg);
5060     scale(0x0);
5061     disp(0x0);
5062   %}
5063 %}
5064 
5065 // Indirect Memory Times Scale Plus Index Register
5066 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5067 %{
5068   predicate(Universe::narrow_oop_shift() == 0);
5069   constraint(ALLOC_IN_RC(ptr_reg));
5070   match(AddP (DecodeN reg) (LShiftL lreg scale));
5071 
5072   op_cost(10);
5073   format %{"[$reg + $lreg << $scale]" %}
5074   interface(MEMORY_INTER) %{
5075     base($reg);
5076     index($lreg);
5077     scale($scale);
5078     disp(0x0);
5079   %}
5080 %}
5081 
5082 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5083 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5084 %{
5085   predicate(Universe::narrow_oop_shift() == 0);
5086   constraint(ALLOC_IN_RC(ptr_reg));
5087   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5088 
5089   op_cost(10);
5090   format %{"[$reg + $off + $lreg << $scale]" %}
5091   interface(MEMORY_INTER) %{
5092     base($reg);
5093     index($lreg);
5094     scale($scale);
5095     disp($off);
5096   %}
5097 %}
5098 
5099 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5100 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5101 %{
5102   constraint(ALLOC_IN_RC(ptr_reg));
5103   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5104   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5105 
5106   op_cost(10);
5107   format %{"[$reg + $off + $idx << $scale]" %}
5108   interface(MEMORY_INTER) %{
5109     base($reg);
5110     index($idx);
5111     scale($scale);
5112     disp($off);
5113   %}
5114 %}
5115 
5116 
5117 //----------Special Memory Operands--------------------------------------------
5118 // Stack Slot Operand - This operand is used for loading and storing temporary
5119 //                      values on the stack where a match requires a value to
5120 //                      flow through memory.
5121 operand stackSlotP(sRegP reg)
5122 %{
5123   constraint(ALLOC_IN_RC(stack_slots));
5124   // No match rule because this operand is only generated in matching
5125 
5126   format %{ "[$reg]" %}
5127   interface(MEMORY_INTER) %{
5128     base(0x4);   // RSP
5129     index(0x4);  // No Index
5130     scale(0x0);  // No Scale
5131     disp($reg);  // Stack Offset
5132   %}
5133 %}
5134 
5135 operand stackSlotI(sRegI reg)
5136 %{
5137   constraint(ALLOC_IN_RC(stack_slots));
5138   // No match rule because this operand is only generated in matching
5139 
5140   format %{ "[$reg]" %}
5141   interface(MEMORY_INTER) %{
5142     base(0x4);   // RSP
5143     index(0x4);  // No Index
5144     scale(0x0);  // No Scale
5145     disp($reg);  // Stack Offset
5146   %}
5147 %}
5148 
5149 operand stackSlotF(sRegF reg)
5150 %{
5151   constraint(ALLOC_IN_RC(stack_slots));
5152   // No match rule because this operand is only generated in matching
5153 
5154   format %{ "[$reg]" %}
5155   interface(MEMORY_INTER) %{
5156     base(0x4);   // RSP
5157     index(0x4);  // No Index
5158     scale(0x0);  // No Scale
5159     disp($reg);  // Stack Offset
5160   %}
5161 %}
5162 
5163 operand stackSlotD(sRegD reg)
5164 %{
5165   constraint(ALLOC_IN_RC(stack_slots));
5166   // No match rule because this operand is only generated in matching
5167 
5168   format %{ "[$reg]" %}
5169   interface(MEMORY_INTER) %{
5170     base(0x4);   // RSP
5171     index(0x4);  // No Index
5172     scale(0x0);  // No Scale
5173     disp($reg);  // Stack Offset
5174   %}
5175 %}
5176 operand stackSlotL(sRegL reg)
5177 %{
5178   constraint(ALLOC_IN_RC(stack_slots));
5179   // No match rule because this operand is only generated in matching
5180 
5181   format %{ "[$reg]" %}
5182   interface(MEMORY_INTER) %{
5183     base(0x4);   // RSP
5184     index(0x4);  // No Index
5185     scale(0x0);  // No Scale
5186     disp($reg);  // Stack Offset
5187   %}
5188 %}
5189 
5190 //----------Conditional Branch Operands----------------------------------------
5191 // Comparison Op  - This is the operation of the comparison, and is limited to
5192 //                  the following set of codes:
5193 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5194 //
5195 // Other attributes of the comparison, such as unsignedness, are specified
5196 // by the comparison instruction that sets a condition code flags register.
5197 // That result is represented by a flags operand whose subtype is appropriate
5198 // to the unsignedness (etc.) of the comparison.
5199 //
5200 // Later, the instruction which matches both the Comparison Op (a Bool) and
5201 // the flags (produced by the Cmp) specifies the coding of the comparison op
5202 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5203 
5204 // Comparision Code
5205 operand cmpOp()
5206 %{
5207   match(Bool);
5208 
5209   format %{ "" %}
5210   interface(COND_INTER) %{
5211     equal(0x4, "e");
5212     not_equal(0x5, "ne");
5213     less(0xC, "l");
5214     greater_equal(0xD, "ge");
5215     less_equal(0xE, "le");
5216     greater(0xF, "g");
5217   %}
5218 %}
5219 
5220 // Comparison Code, unsigned compare.  Used by FP also, with
5221 // C2 (unordered) turned into GT or LT already.  The other bits
5222 // C0 and C3 are turned into Carry & Zero flags.
5223 operand cmpOpU()
5224 %{
5225   match(Bool);
5226 
5227   format %{ "" %}
5228   interface(COND_INTER) %{
5229     equal(0x4, "e");
5230     not_equal(0x5, "ne");
5231     less(0x2, "b");
5232     greater_equal(0x3, "nb");
5233     less_equal(0x6, "be");
5234     greater(0x7, "nbe");
5235   %}
5236 %}
5237 
5238 
5239 // Floating comparisons that don't require any fixup for the unordered case
5240 operand cmpOpUCF() %{
5241   match(Bool);
5242   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5243             n->as_Bool()->_test._test == BoolTest::ge ||
5244             n->as_Bool()->_test._test == BoolTest::le ||
5245             n->as_Bool()->_test._test == BoolTest::gt);
5246   format %{ "" %}
5247   interface(COND_INTER) %{
5248     equal(0x4, "e");
5249     not_equal(0x5, "ne");
5250     less(0x2, "b");
5251     greater_equal(0x3, "nb");
5252     less_equal(0x6, "be");
5253     greater(0x7, "nbe");
5254   %}
5255 %}
5256 
5257 
5258 // Floating comparisons that can be fixed up with extra conditional jumps
5259 operand cmpOpUCF2() %{
5260   match(Bool);
5261   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5262             n->as_Bool()->_test._test == BoolTest::eq);
5263   format %{ "" %}
5264   interface(COND_INTER) %{
5265     equal(0x4, "e");
5266     not_equal(0x5, "ne");
5267     less(0x2, "b");
5268     greater_equal(0x3, "nb");
5269     less_equal(0x6, "be");
5270     greater(0x7, "nbe");
5271   %}
5272 %}
5273 
5274 
5275 //----------OPERAND CLASSES----------------------------------------------------
5276 // Operand Classes are groups of operands that are used as to simplify
5277 // instruction definitions by not requiring the AD writer to specify separate
5278 // instructions for every form of operand when the instruction accepts
5279 // multiple operand types with the same basic encoding and format.  The classic
5280 // case of this is memory operands.
5281 
5282 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5283                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5284                indCompressedOopOffset,
5285                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5286                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5287                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5288 
5289 //----------PIPELINE-----------------------------------------------------------
5290 // Rules which define the behavior of the target architectures pipeline.
5291 pipeline %{
5292 
5293 //----------ATTRIBUTES---------------------------------------------------------
5294 attributes %{
5295   variable_size_instructions;        // Fixed size instructions
5296   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5297   instruction_unit_size = 1;         // An instruction is 1 bytes long
5298   instruction_fetch_unit_size = 16;  // The processor fetches one line
5299   instruction_fetch_units = 1;       // of 16 bytes
5300 
5301   // List of nop instructions
5302   nops( MachNop );
5303 %}
5304 
5305 //----------RESOURCES----------------------------------------------------------
5306 // Resources are the functional units available to the machine
5307 
5308 // Generic P2/P3 pipeline
5309 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5310 // 3 instructions decoded per cycle.
5311 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5312 // 3 ALU op, only ALU0 handles mul instructions.
5313 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5314            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5315            BR, FPU,
5316            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5317 
5318 //----------PIPELINE DESCRIPTION-----------------------------------------------
5319 // Pipeline Description specifies the stages in the machine's pipeline
5320 
5321 // Generic P2/P3 pipeline
5322 pipe_desc(S0, S1, S2, S3, S4, S5);
5323 
5324 //----------PIPELINE CLASSES---------------------------------------------------
5325 // Pipeline Classes describe the stages in which input and output are
5326 // referenced by the hardware pipeline.
5327 
5328 // Naming convention: ialu or fpu
5329 // Then: _reg
5330 // Then: _reg if there is a 2nd register
5331 // Then: _long if it's a pair of instructions implementing a long
5332 // Then: _fat if it requires the big decoder
5333 //   Or: _mem if it requires the big decoder and a memory unit.
5334 
5335 // Integer ALU reg operation
5336 pipe_class ialu_reg(rRegI dst)
5337 %{
5338     single_instruction;
5339     dst    : S4(write);
5340     dst    : S3(read);
5341     DECODE : S0;        // any decoder
5342     ALU    : S3;        // any alu
5343 %}
5344 
5345 // Long ALU reg operation
5346 pipe_class ialu_reg_long(rRegL dst)
5347 %{
5348     instruction_count(2);
5349     dst    : S4(write);
5350     dst    : S3(read);
5351     DECODE : S0(2);     // any 2 decoders
5352     ALU    : S3(2);     // both alus
5353 %}
5354 
5355 // Integer ALU reg operation using big decoder
5356 pipe_class ialu_reg_fat(rRegI dst)
5357 %{
5358     single_instruction;
5359     dst    : S4(write);
5360     dst    : S3(read);
5361     D0     : S0;        // big decoder only
5362     ALU    : S3;        // any alu
5363 %}
5364 
5365 // Long ALU reg operation using big decoder
5366 pipe_class ialu_reg_long_fat(rRegL dst)
5367 %{
5368     instruction_count(2);
5369     dst    : S4(write);
5370     dst    : S3(read);
5371     D0     : S0(2);     // big decoder only; twice
5372     ALU    : S3(2);     // any 2 alus
5373 %}
5374 
5375 // Integer ALU reg-reg operation
5376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5377 %{
5378     single_instruction;
5379     dst    : S4(write);
5380     src    : S3(read);
5381     DECODE : S0;        // any decoder
5382     ALU    : S3;        // any alu
5383 %}
5384 
5385 // Long ALU reg-reg operation
5386 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5387 %{
5388     instruction_count(2);
5389     dst    : S4(write);
5390     src    : S3(read);
5391     DECODE : S0(2);     // any 2 decoders
5392     ALU    : S3(2);     // both alus
5393 %}
5394 
5395 // Integer ALU reg-reg operation
5396 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5397 %{
5398     single_instruction;
5399     dst    : S4(write);
5400     src    : S3(read);
5401     D0     : S0;        // big decoder only
5402     ALU    : S3;        // any alu
5403 %}
5404 
5405 // Long ALU reg-reg operation
5406 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5407 %{
5408     instruction_count(2);
5409     dst    : S4(write);
5410     src    : S3(read);
5411     D0     : S0(2);     // big decoder only; twice
5412     ALU    : S3(2);     // both alus
5413 %}
5414 
5415 // Integer ALU reg-mem operation
5416 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5417 %{
5418     single_instruction;
5419     dst    : S5(write);
5420     mem    : S3(read);
5421     D0     : S0;        // big decoder only
5422     ALU    : S4;        // any alu
5423     MEM    : S3;        // any mem
5424 %}
5425 
5426 // Integer mem operation (prefetch)
5427 pipe_class ialu_mem(memory mem)
5428 %{
5429     single_instruction;
5430     mem    : S3(read);
5431     D0     : S0;        // big decoder only
5432     MEM    : S3;        // any mem
5433 %}
5434 
5435 // Integer Store to Memory
5436 pipe_class ialu_mem_reg(memory mem, rRegI src)
5437 %{
5438     single_instruction;
5439     mem    : S3(read);
5440     src    : S5(read);
5441     D0     : S0;        // big decoder only
5442     ALU    : S4;        // any alu
5443     MEM    : S3;
5444 %}
5445 
5446 // // Long Store to Memory
5447 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5448 // %{
5449 //     instruction_count(2);
5450 //     mem    : S3(read);
5451 //     src    : S5(read);
5452 //     D0     : S0(2);          // big decoder only; twice
5453 //     ALU    : S4(2);     // any 2 alus
5454 //     MEM    : S3(2);  // Both mems
5455 // %}
5456 
5457 // Integer Store to Memory
5458 pipe_class ialu_mem_imm(memory mem)
5459 %{
5460     single_instruction;
5461     mem    : S3(read);
5462     D0     : S0;        // big decoder only
5463     ALU    : S4;        // any alu
5464     MEM    : S3;
5465 %}
5466 
5467 // Integer ALU0 reg-reg operation
5468 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5469 %{
5470     single_instruction;
5471     dst    : S4(write);
5472     src    : S3(read);
5473     D0     : S0;        // Big decoder only
5474     ALU0   : S3;        // only alu0
5475 %}
5476 
5477 // Integer ALU0 reg-mem operation
5478 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5479 %{
5480     single_instruction;
5481     dst    : S5(write);
5482     mem    : S3(read);
5483     D0     : S0;        // big decoder only
5484     ALU0   : S4;        // ALU0 only
5485     MEM    : S3;        // any mem
5486 %}
5487 
5488 // Integer ALU reg-reg operation
5489 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5490 %{
5491     single_instruction;
5492     cr     : S4(write);
5493     src1   : S3(read);
5494     src2   : S3(read);
5495     DECODE : S0;        // any decoder
5496     ALU    : S3;        // any alu
5497 %}
5498 
5499 // Integer ALU reg-imm operation
5500 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5501 %{
5502     single_instruction;
5503     cr     : S4(write);
5504     src1   : S3(read);
5505     DECODE : S0;        // any decoder
5506     ALU    : S3;        // any alu
5507 %}
5508 
5509 // Integer ALU reg-mem operation
5510 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5511 %{
5512     single_instruction;
5513     cr     : S4(write);
5514     src1   : S3(read);
5515     src2   : S3(read);
5516     D0     : S0;        // big decoder only
5517     ALU    : S4;        // any alu
5518     MEM    : S3;
5519 %}
5520 
5521 // Conditional move reg-reg
5522 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5523 %{
5524     instruction_count(4);
5525     y      : S4(read);
5526     q      : S3(read);
5527     p      : S3(read);
5528     DECODE : S0(4);     // any decoder
5529 %}
5530 
5531 // Conditional move reg-reg
5532 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5533 %{
5534     single_instruction;
5535     dst    : S4(write);
5536     src    : S3(read);
5537     cr     : S3(read);
5538     DECODE : S0;        // any decoder
5539 %}
5540 
5541 // Conditional move reg-mem
5542 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5543 %{
5544     single_instruction;
5545     dst    : S4(write);
5546     src    : S3(read);
5547     cr     : S3(read);
5548     DECODE : S0;        // any decoder
5549     MEM    : S3;
5550 %}
5551 
5552 // Conditional move reg-reg long
5553 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5554 %{
5555     single_instruction;
5556     dst    : S4(write);
5557     src    : S3(read);
5558     cr     : S3(read);
5559     DECODE : S0(2);     // any 2 decoders
5560 %}
5561 
5562 // XXX
5563 // // Conditional move double reg-reg
5564 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5565 // %{
5566 //     single_instruction;
5567 //     dst    : S4(write);
5568 //     src    : S3(read);
5569 //     cr     : S3(read);
5570 //     DECODE : S0;     // any decoder
5571 // %}
5572 
5573 // Float reg-reg operation
5574 pipe_class fpu_reg(regD dst)
5575 %{
5576     instruction_count(2);
5577     dst    : S3(read);
5578     DECODE : S0(2);     // any 2 decoders
5579     FPU    : S3;
5580 %}
5581 
5582 // Float reg-reg operation
5583 pipe_class fpu_reg_reg(regD dst, regD src)
5584 %{
5585     instruction_count(2);
5586     dst    : S4(write);
5587     src    : S3(read);
5588     DECODE : S0(2);     // any 2 decoders
5589     FPU    : S3;
5590 %}
5591 
5592 // Float reg-reg operation
5593 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5594 %{
5595     instruction_count(3);
5596     dst    : S4(write);
5597     src1   : S3(read);
5598     src2   : S3(read);
5599     DECODE : S0(3);     // any 3 decoders
5600     FPU    : S3(2);
5601 %}
5602 
5603 // Float reg-reg operation
5604 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5605 %{
5606     instruction_count(4);
5607     dst    : S4(write);
5608     src1   : S3(read);
5609     src2   : S3(read);
5610     src3   : S3(read);
5611     DECODE : S0(4);     // any 3 decoders
5612     FPU    : S3(2);
5613 %}
5614 
5615 // Float reg-reg operation
5616 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5617 %{
5618     instruction_count(4);
5619     dst    : S4(write);
5620     src1   : S3(read);
5621     src2   : S3(read);
5622     src3   : S3(read);
5623     DECODE : S1(3);     // any 3 decoders
5624     D0     : S0;        // Big decoder only
5625     FPU    : S3(2);
5626     MEM    : S3;
5627 %}
5628 
5629 // Float reg-mem operation
5630 pipe_class fpu_reg_mem(regD dst, memory mem)
5631 %{
5632     instruction_count(2);
5633     dst    : S5(write);
5634     mem    : S3(read);
5635     D0     : S0;        // big decoder only
5636     DECODE : S1;        // any decoder for FPU POP
5637     FPU    : S4;
5638     MEM    : S3;        // any mem
5639 %}
5640 
5641 // Float reg-mem operation
5642 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5643 %{
5644     instruction_count(3);
5645     dst    : S5(write);
5646     src1   : S3(read);
5647     mem    : S3(read);
5648     D0     : S0;        // big decoder only
5649     DECODE : S1(2);     // any decoder for FPU POP
5650     FPU    : S4;
5651     MEM    : S3;        // any mem
5652 %}
5653 
5654 // Float mem-reg operation
5655 pipe_class fpu_mem_reg(memory mem, regD src)
5656 %{
5657     instruction_count(2);
5658     src    : S5(read);
5659     mem    : S3(read);
5660     DECODE : S0;        // any decoder for FPU PUSH
5661     D0     : S1;        // big decoder only
5662     FPU    : S4;
5663     MEM    : S3;        // any mem
5664 %}
5665 
5666 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5667 %{
5668     instruction_count(3);
5669     src1   : S3(read);
5670     src2   : S3(read);
5671     mem    : S3(read);
5672     DECODE : S0(2);     // any decoder for FPU PUSH
5673     D0     : S1;        // big decoder only
5674     FPU    : S4;
5675     MEM    : S3;        // any mem
5676 %}
5677 
5678 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5679 %{
5680     instruction_count(3);
5681     src1   : S3(read);
5682     src2   : S3(read);
5683     mem    : S4(read);
5684     DECODE : S0;        // any decoder for FPU PUSH
5685     D0     : S0(2);     // big decoder only
5686     FPU    : S4;
5687     MEM    : S3(2);     // any mem
5688 %}
5689 
5690 pipe_class fpu_mem_mem(memory dst, memory src1)
5691 %{
5692     instruction_count(2);
5693     src1   : S3(read);
5694     dst    : S4(read);
5695     D0     : S0(2);     // big decoder only
5696     MEM    : S3(2);     // any mem
5697 %}
5698 
5699 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5700 %{
5701     instruction_count(3);
5702     src1   : S3(read);
5703     src2   : S3(read);
5704     dst    : S4(read);
5705     D0     : S0(3);     // big decoder only
5706     FPU    : S4;
5707     MEM    : S3(3);     // any mem
5708 %}
5709 
5710 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5711 %{
5712     instruction_count(3);
5713     src1   : S4(read);
5714     mem    : S4(read);
5715     DECODE : S0;        // any decoder for FPU PUSH
5716     D0     : S0(2);     // big decoder only
5717     FPU    : S4;
5718     MEM    : S3(2);     // any mem
5719 %}
5720 
5721 // Float load constant
5722 pipe_class fpu_reg_con(regD dst)
5723 %{
5724     instruction_count(2);
5725     dst    : S5(write);
5726     D0     : S0;        // big decoder only for the load
5727     DECODE : S1;        // any decoder for FPU POP
5728     FPU    : S4;
5729     MEM    : S3;        // any mem
5730 %}
5731 
5732 // Float load constant
5733 pipe_class fpu_reg_reg_con(regD dst, regD src)
5734 %{
5735     instruction_count(3);
5736     dst    : S5(write);
5737     src    : S3(read);
5738     D0     : S0;        // big decoder only for the load
5739     DECODE : S1(2);     // any decoder for FPU POP
5740     FPU    : S4;
5741     MEM    : S3;        // any mem
5742 %}
5743 
5744 // UnConditional branch
5745 pipe_class pipe_jmp(label labl)
5746 %{
5747     single_instruction;
5748     BR   : S3;
5749 %}
5750 
5751 // Conditional branch
5752 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5753 %{
5754     single_instruction;
5755     cr    : S1(read);
5756     BR    : S3;
5757 %}
5758 
5759 // Allocation idiom
5760 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5761 %{
5762     instruction_count(1); force_serialization;
5763     fixed_latency(6);
5764     heap_ptr : S3(read);
5765     DECODE   : S0(3);
5766     D0       : S2;
5767     MEM      : S3;
5768     ALU      : S3(2);
5769     dst      : S5(write);
5770     BR       : S5;
5771 %}
5772 
5773 // Generic big/slow expanded idiom
5774 pipe_class pipe_slow()
5775 %{
5776     instruction_count(10); multiple_bundles; force_serialization;
5777     fixed_latency(100);
5778     D0  : S0(2);
5779     MEM : S3(2);
5780 %}
5781 
5782 // The real do-nothing guy
5783 pipe_class empty()
5784 %{
5785     instruction_count(0);
5786 %}
5787 
5788 // Define the class for the Nop node
5789 define
5790 %{
5791    MachNop = empty;
5792 %}
5793 
5794 %}
5795 
5796 //----------INSTRUCTIONS-------------------------------------------------------
5797 //
5798 // match      -- States which machine-independent subtree may be replaced
5799 //               by this instruction.
5800 // ins_cost   -- The estimated cost of this instruction is used by instruction
5801 //               selection to identify a minimum cost tree of machine
5802 //               instructions that matches a tree of machine-independent
5803 //               instructions.
5804 // format     -- A string providing the disassembly for this instruction.
5805 //               The value of an instruction's operand may be inserted
5806 //               by referring to it with a '$' prefix.
5807 // opcode     -- Three instruction opcodes may be provided.  These are referred
5808 //               to within an encode class as $primary, $secondary, and $tertiary
5809 //               rrspectively.  The primary opcode is commonly used to
5810 //               indicate the type of machine instruction, while secondary
5811 //               and tertiary are often used for prefix options or addressing
5812 //               modes.
5813 // ins_encode -- A list of encode classes with parameters. The encode class
5814 //               name must have been defined in an 'enc_class' specification
5815 //               in the encode section of the architecture description.
5816 
5817 
5818 //----------Load/Store/Move Instructions---------------------------------------
5819 //----------Load Instructions--------------------------------------------------
5820 
5821 // Load Byte (8 bit signed)
5822 instruct loadB(rRegI dst, memory mem)
5823 %{
5824   match(Set dst (LoadB mem));
5825 
5826   ins_cost(125);
5827   format %{ "movsbl  $dst, $mem\t# byte" %}
5828 
5829   ins_encode %{
5830     __ movsbl($dst$$Register, $mem$$Address);
5831   %}
5832 
5833   ins_pipe(ialu_reg_mem);
5834 %}
5835 
5836 // Load Byte (8 bit signed) into Long Register
5837 instruct loadB2L(rRegL dst, memory mem)
5838 %{
5839   match(Set dst (ConvI2L (LoadB mem)));
5840 
5841   ins_cost(125);
5842   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5843 
5844   ins_encode %{
5845     __ movsbq($dst$$Register, $mem$$Address);
5846   %}
5847 
5848   ins_pipe(ialu_reg_mem);
5849 %}
5850 
5851 // Load Unsigned Byte (8 bit UNsigned)
5852 instruct loadUB(rRegI dst, memory mem)
5853 %{
5854   match(Set dst (LoadUB mem));
5855 
5856   ins_cost(125);
5857   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5858 
5859   ins_encode %{
5860     __ movzbl($dst$$Register, $mem$$Address);
5861   %}
5862 
5863   ins_pipe(ialu_reg_mem);
5864 %}
5865 
5866 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5867 instruct loadUB2L(rRegL dst, memory mem)
5868 %{
5869   match(Set dst (ConvI2L (LoadUB mem)));
5870 
5871   ins_cost(125);
5872   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5873 
5874   ins_encode %{
5875     __ movzbq($dst$$Register, $mem$$Address);
5876   %}
5877 
5878   ins_pipe(ialu_reg_mem);
5879 %}
5880 
5881 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5882 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5883   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5884   effect(KILL cr);
5885 
5886   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5887             "andl    $dst, $mask" %}
5888   ins_encode %{
5889     Register Rdst = $dst$$Register;
5890     __ movzbq(Rdst, $mem$$Address);
5891     __ andl(Rdst, $mask$$constant);
5892   %}
5893   ins_pipe(ialu_reg_mem);
5894 %}
5895 
5896 // Load Short (16 bit signed)
5897 instruct loadS(rRegI dst, memory mem)
5898 %{
5899   match(Set dst (LoadS mem));
5900 
5901   ins_cost(125);
5902   format %{ "movswl $dst, $mem\t# short" %}
5903 
5904   ins_encode %{
5905     __ movswl($dst$$Register, $mem$$Address);
5906   %}
5907 
5908   ins_pipe(ialu_reg_mem);
5909 %}
5910 
5911 // Load Short (16 bit signed) to Byte (8 bit signed)
5912 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5913   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5914 
5915   ins_cost(125);
5916   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5917   ins_encode %{
5918     __ movsbl($dst$$Register, $mem$$Address);
5919   %}
5920   ins_pipe(ialu_reg_mem);
5921 %}
5922 
5923 // Load Short (16 bit signed) into Long Register
5924 instruct loadS2L(rRegL dst, memory mem)
5925 %{
5926   match(Set dst (ConvI2L (LoadS mem)));
5927 
5928   ins_cost(125);
5929   format %{ "movswq $dst, $mem\t# short -> long" %}
5930 
5931   ins_encode %{
5932     __ movswq($dst$$Register, $mem$$Address);
5933   %}
5934 
5935   ins_pipe(ialu_reg_mem);
5936 %}
5937 
5938 // Load Unsigned Short/Char (16 bit UNsigned)
5939 instruct loadUS(rRegI dst, memory mem)
5940 %{
5941   match(Set dst (LoadUS mem));
5942 
5943   ins_cost(125);
5944   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5945 
5946   ins_encode %{
5947     __ movzwl($dst$$Register, $mem$$Address);
5948   %}
5949 
5950   ins_pipe(ialu_reg_mem);
5951 %}
5952 
5953 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5954 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5955   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5956 
5957   ins_cost(125);
5958   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5959   ins_encode %{
5960     __ movsbl($dst$$Register, $mem$$Address);
5961   %}
5962   ins_pipe(ialu_reg_mem);
5963 %}
5964 
5965 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5966 instruct loadUS2L(rRegL dst, memory mem)
5967 %{
5968   match(Set dst (ConvI2L (LoadUS mem)));
5969 
5970   ins_cost(125);
5971   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5972 
5973   ins_encode %{
5974     __ movzwq($dst$$Register, $mem$$Address);
5975   %}
5976 
5977   ins_pipe(ialu_reg_mem);
5978 %}
5979 
5980 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5981 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5982   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5983 
5984   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5985   ins_encode %{
5986     __ movzbq($dst$$Register, $mem$$Address);
5987   %}
5988   ins_pipe(ialu_reg_mem);
5989 %}
5990 
5991 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5992 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5993   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5994   effect(KILL cr);
5995 
5996   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5997             "andl    $dst, $mask" %}
5998   ins_encode %{
5999     Register Rdst = $dst$$Register;
6000     __ movzwq(Rdst, $mem$$Address);
6001     __ andl(Rdst, $mask$$constant);
6002   %}
6003   ins_pipe(ialu_reg_mem);
6004 %}
6005 
6006 // Load Integer
6007 instruct loadI(rRegI dst, memory mem)
6008 %{
6009   match(Set dst (LoadI mem));
6010 
6011   ins_cost(125);
6012   format %{ "movl    $dst, $mem\t# int" %}
6013 
6014   ins_encode %{
6015     __ movl($dst$$Register, $mem$$Address);
6016   %}
6017 
6018   ins_pipe(ialu_reg_mem);
6019 %}
6020 
6021 // Load Integer (32 bit signed) to Byte (8 bit signed)
6022 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6023   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6024 
6025   ins_cost(125);
6026   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6027   ins_encode %{
6028     __ movsbl($dst$$Register, $mem$$Address);
6029   %}
6030   ins_pipe(ialu_reg_mem);
6031 %}
6032 
6033 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6034 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6035   match(Set dst (AndI (LoadI mem) mask));
6036 
6037   ins_cost(125);
6038   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6039   ins_encode %{
6040     __ movzbl($dst$$Register, $mem$$Address);
6041   %}
6042   ins_pipe(ialu_reg_mem);
6043 %}
6044 
6045 // Load Integer (32 bit signed) to Short (16 bit signed)
6046 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6047   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6048 
6049   ins_cost(125);
6050   format %{ "movswl  $dst, $mem\t# int -> short" %}
6051   ins_encode %{
6052     __ movswl($dst$$Register, $mem$$Address);
6053   %}
6054   ins_pipe(ialu_reg_mem);
6055 %}
6056 
6057 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6058 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6059   match(Set dst (AndI (LoadI mem) mask));
6060 
6061   ins_cost(125);
6062   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6063   ins_encode %{
6064     __ movzwl($dst$$Register, $mem$$Address);
6065   %}
6066   ins_pipe(ialu_reg_mem);
6067 %}
6068 
6069 // Load Integer into Long Register
6070 instruct loadI2L(rRegL dst, memory mem)
6071 %{
6072   match(Set dst (ConvI2L (LoadI mem)));
6073 
6074   ins_cost(125);
6075   format %{ "movslq  $dst, $mem\t# int -> long" %}
6076 
6077   ins_encode %{
6078     __ movslq($dst$$Register, $mem$$Address);
6079   %}
6080 
6081   ins_pipe(ialu_reg_mem);
6082 %}
6083 
6084 // Load Integer with mask 0xFF into Long Register
6085 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6086   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6087 
6088   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6089   ins_encode %{
6090     __ movzbq($dst$$Register, $mem$$Address);
6091   %}
6092   ins_pipe(ialu_reg_mem);
6093 %}
6094 
6095 // Load Integer with mask 0xFFFF into Long Register
6096 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6097   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6098 
6099   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6100   ins_encode %{
6101     __ movzwq($dst$$Register, $mem$$Address);
6102   %}
6103   ins_pipe(ialu_reg_mem);
6104 %}
6105 
6106 // Load Integer with a 32-bit mask into Long Register
6107 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6108   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6109   effect(KILL cr);
6110 
6111   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6112             "andl    $dst, $mask" %}
6113   ins_encode %{
6114     Register Rdst = $dst$$Register;
6115     __ movl(Rdst, $mem$$Address);
6116     __ andl(Rdst, $mask$$constant);
6117   %}
6118   ins_pipe(ialu_reg_mem);
6119 %}
6120 
6121 // Load Unsigned Integer into Long Register
6122 instruct loadUI2L(rRegL dst, memory mem)
6123 %{
6124   match(Set dst (LoadUI2L mem));
6125 
6126   ins_cost(125);
6127   format %{ "movl    $dst, $mem\t# uint -> long" %}
6128 
6129   ins_encode %{
6130     __ movl($dst$$Register, $mem$$Address);
6131   %}
6132 
6133   ins_pipe(ialu_reg_mem);
6134 %}
6135 
6136 // Load Long
6137 instruct loadL(rRegL dst, memory mem)
6138 %{
6139   match(Set dst (LoadL mem));
6140 
6141   ins_cost(125);
6142   format %{ "movq    $dst, $mem\t# long" %}
6143 
6144   ins_encode %{
6145     __ movq($dst$$Register, $mem$$Address);
6146   %}
6147 
6148   ins_pipe(ialu_reg_mem); // XXX
6149 %}
6150 
6151 // Load Range
6152 instruct loadRange(rRegI dst, memory mem)
6153 %{
6154   match(Set dst (LoadRange mem));
6155 
6156   ins_cost(125); // XXX
6157   format %{ "movl    $dst, $mem\t# range" %}
6158   opcode(0x8B);
6159   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6160   ins_pipe(ialu_reg_mem);
6161 %}
6162 
6163 // Load Pointer
6164 instruct loadP(rRegP dst, memory mem)
6165 %{
6166   match(Set dst (LoadP mem));
6167 
6168   ins_cost(125); // XXX
6169   format %{ "movq    $dst, $mem\t# ptr" %}
6170   opcode(0x8B);
6171   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6172   ins_pipe(ialu_reg_mem); // XXX
6173 %}
6174 
6175 // Load Compressed Pointer
6176 instruct loadN(rRegN dst, memory mem)
6177 %{
6178    match(Set dst (LoadN mem));
6179 
6180    ins_cost(125); // XXX
6181    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6182    ins_encode %{
6183      __ movl($dst$$Register, $mem$$Address);
6184    %}
6185    ins_pipe(ialu_reg_mem); // XXX
6186 %}
6187 
6188 
6189 // Load Klass Pointer
6190 instruct loadKlass(rRegP dst, memory mem)
6191 %{
6192   match(Set dst (LoadKlass mem));
6193 
6194   ins_cost(125); // XXX
6195   format %{ "movq    $dst, $mem\t# class" %}
6196   opcode(0x8B);
6197   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6198   ins_pipe(ialu_reg_mem); // XXX
6199 %}
6200 
6201 // Load narrow Klass Pointer
6202 instruct loadNKlass(rRegN dst, memory mem)
6203 %{
6204   match(Set dst (LoadNKlass mem));
6205 
6206   ins_cost(125); // XXX
6207   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6208   ins_encode %{
6209     __ movl($dst$$Register, $mem$$Address);
6210   %}
6211   ins_pipe(ialu_reg_mem); // XXX
6212 %}
6213 
6214 // Load Float
6215 instruct loadF(regF dst, memory mem)
6216 %{
6217   match(Set dst (LoadF mem));
6218 
6219   ins_cost(145); // XXX
6220   format %{ "movss   $dst, $mem\t# float" %}
6221   opcode(0xF3, 0x0F, 0x10);
6222   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6223   ins_pipe(pipe_slow); // XXX
6224 %}
6225 
6226 // Load Double
6227 instruct loadD_partial(regD dst, memory mem)
6228 %{
6229   predicate(!UseXmmLoadAndClearUpper);
6230   match(Set dst (LoadD mem));
6231 
6232   ins_cost(145); // XXX
6233   format %{ "movlpd  $dst, $mem\t# double" %}
6234   opcode(0x66, 0x0F, 0x12);
6235   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6236   ins_pipe(pipe_slow); // XXX
6237 %}
6238 
6239 instruct loadD(regD dst, memory mem)
6240 %{
6241   predicate(UseXmmLoadAndClearUpper);
6242   match(Set dst (LoadD mem));
6243 
6244   ins_cost(145); // XXX
6245   format %{ "movsd   $dst, $mem\t# double" %}
6246   opcode(0xF2, 0x0F, 0x10);
6247   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6248   ins_pipe(pipe_slow); // XXX
6249 %}
6250 
6251 // Load Aligned Packed Byte to XMM register
6252 instruct loadA8B(regD dst, memory mem) %{
6253   match(Set dst (Load8B mem));
6254   ins_cost(125);
6255   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6256   ins_encode( movq_ld(dst, mem));
6257   ins_pipe( pipe_slow );
6258 %}
6259 
6260 // Load Aligned Packed Short to XMM register
6261 instruct loadA4S(regD dst, memory mem) %{
6262   match(Set dst (Load4S mem));
6263   ins_cost(125);
6264   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6265   ins_encode( movq_ld(dst, mem));
6266   ins_pipe( pipe_slow );
6267 %}
6268 
6269 // Load Aligned Packed Char to XMM register
6270 instruct loadA4C(regD dst, memory mem) %{
6271   match(Set dst (Load4C mem));
6272   ins_cost(125);
6273   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6274   ins_encode( movq_ld(dst, mem));
6275   ins_pipe( pipe_slow );
6276 %}
6277 
6278 // Load Aligned Packed Integer to XMM register
6279 instruct load2IU(regD dst, memory mem) %{
6280   match(Set dst (Load2I mem));
6281   ins_cost(125);
6282   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6283   ins_encode( movq_ld(dst, mem));
6284   ins_pipe( pipe_slow );
6285 %}
6286 
6287 // Load Aligned Packed Single to XMM
6288 instruct loadA2F(regD dst, memory mem) %{
6289   match(Set dst (Load2F mem));
6290   ins_cost(145);
6291   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6292   ins_encode( movq_ld(dst, mem));
6293   ins_pipe( pipe_slow );
6294 %}
6295 
6296 // Load Effective Address
6297 instruct leaP8(rRegP dst, indOffset8 mem)
6298 %{
6299   match(Set dst mem);
6300 
6301   ins_cost(110); // XXX
6302   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6303   opcode(0x8D);
6304   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6305   ins_pipe(ialu_reg_reg_fat);
6306 %}
6307 
6308 instruct leaP32(rRegP dst, indOffset32 mem)
6309 %{
6310   match(Set dst mem);
6311 
6312   ins_cost(110);
6313   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6314   opcode(0x8D);
6315   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6316   ins_pipe(ialu_reg_reg_fat);
6317 %}
6318 
6319 // instruct leaPIdx(rRegP dst, indIndex mem)
6320 // %{
6321 //   match(Set dst mem);
6322 
6323 //   ins_cost(110);
6324 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6325 //   opcode(0x8D);
6326 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6327 //   ins_pipe(ialu_reg_reg_fat);
6328 // %}
6329 
6330 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6331 %{
6332   match(Set dst mem);
6333 
6334   ins_cost(110);
6335   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6336   opcode(0x8D);
6337   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6338   ins_pipe(ialu_reg_reg_fat);
6339 %}
6340 
6341 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6342 %{
6343   match(Set dst mem);
6344 
6345   ins_cost(110);
6346   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6347   opcode(0x8D);
6348   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6349   ins_pipe(ialu_reg_reg_fat);
6350 %}
6351 
6352 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6353 %{
6354   match(Set dst mem);
6355 
6356   ins_cost(110);
6357   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6358   opcode(0x8D);
6359   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6360   ins_pipe(ialu_reg_reg_fat);
6361 %}
6362 
6363 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6364 %{
6365   match(Set dst mem);
6366 
6367   ins_cost(110);
6368   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6369   opcode(0x8D);
6370   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6371   ins_pipe(ialu_reg_reg_fat);
6372 %}
6373 
6374 // Load Effective Address which uses Narrow (32-bits) oop
6375 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6376 %{
6377   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6378   match(Set dst mem);
6379 
6380   ins_cost(110);
6381   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6382   opcode(0x8D);
6383   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6384   ins_pipe(ialu_reg_reg_fat);
6385 %}
6386 
6387 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6388 %{
6389   predicate(Universe::narrow_oop_shift() == 0);
6390   match(Set dst mem);
6391 
6392   ins_cost(110); // XXX
6393   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6394   opcode(0x8D);
6395   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6396   ins_pipe(ialu_reg_reg_fat);
6397 %}
6398 
6399 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6400 %{
6401   predicate(Universe::narrow_oop_shift() == 0);
6402   match(Set dst mem);
6403 
6404   ins_cost(110);
6405   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6406   opcode(0x8D);
6407   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6408   ins_pipe(ialu_reg_reg_fat);
6409 %}
6410 
6411 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6412 %{
6413   predicate(Universe::narrow_oop_shift() == 0);
6414   match(Set dst mem);
6415 
6416   ins_cost(110);
6417   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6418   opcode(0x8D);
6419   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6420   ins_pipe(ialu_reg_reg_fat);
6421 %}
6422 
6423 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6424 %{
6425   predicate(Universe::narrow_oop_shift() == 0);
6426   match(Set dst mem);
6427 
6428   ins_cost(110);
6429   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6430   opcode(0x8D);
6431   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6432   ins_pipe(ialu_reg_reg_fat);
6433 %}
6434 
6435 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6436 %{
6437   predicate(Universe::narrow_oop_shift() == 0);
6438   match(Set dst mem);
6439 
6440   ins_cost(110);
6441   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6442   opcode(0x8D);
6443   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6444   ins_pipe(ialu_reg_reg_fat);
6445 %}
6446 
6447 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6448 %{
6449   predicate(Universe::narrow_oop_shift() == 0);
6450   match(Set dst mem);
6451 
6452   ins_cost(110);
6453   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6454   opcode(0x8D);
6455   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6456   ins_pipe(ialu_reg_reg_fat);
6457 %}
6458 
6459 instruct loadConI(rRegI dst, immI src)
6460 %{
6461   match(Set dst src);
6462 
6463   format %{ "movl    $dst, $src\t# int" %}
6464   ins_encode(load_immI(dst, src));
6465   ins_pipe(ialu_reg_fat); // XXX
6466 %}
6467 
6468 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6469 %{
6470   match(Set dst src);
6471   effect(KILL cr);
6472 
6473   ins_cost(50);
6474   format %{ "xorl    $dst, $dst\t# int" %}
6475   opcode(0x33); /* + rd */
6476   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6477   ins_pipe(ialu_reg);
6478 %}
6479 
6480 instruct loadConL(rRegL dst, immL src)
6481 %{
6482   match(Set dst src);
6483 
6484   ins_cost(150);
6485   format %{ "movq    $dst, $src\t# long" %}
6486   ins_encode(load_immL(dst, src));
6487   ins_pipe(ialu_reg);
6488 %}
6489 
6490 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6491 %{
6492   match(Set dst src);
6493   effect(KILL cr);
6494 
6495   ins_cost(50);
6496   format %{ "xorl    $dst, $dst\t# long" %}
6497   opcode(0x33); /* + rd */
6498   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6499   ins_pipe(ialu_reg); // XXX
6500 %}
6501 
6502 instruct loadConUL32(rRegL dst, immUL32 src)
6503 %{
6504   match(Set dst src);
6505 
6506   ins_cost(60);
6507   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6508   ins_encode(load_immUL32(dst, src));
6509   ins_pipe(ialu_reg);
6510 %}
6511 
6512 instruct loadConL32(rRegL dst, immL32 src)
6513 %{
6514   match(Set dst src);
6515 
6516   ins_cost(70);
6517   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6518   ins_encode(load_immL32(dst, src));
6519   ins_pipe(ialu_reg);
6520 %}
6521 
6522 instruct loadConP(rRegP dst, immP con) %{
6523   match(Set dst con);
6524 
6525   format %{ "movq    $dst, $con\t# ptr" %}
6526   ins_encode(load_immP(dst, con));
6527   ins_pipe(ialu_reg_fat); // XXX
6528 %}
6529 
6530 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6531 %{
6532   match(Set dst src);
6533   effect(KILL cr);
6534 
6535   ins_cost(50);
6536   format %{ "xorl    $dst, $dst\t# ptr" %}
6537   opcode(0x33); /* + rd */
6538   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6539   ins_pipe(ialu_reg);
6540 %}
6541 
6542 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6543   match(Set dst src);
6544   format %{ "movq    $dst, $src\t!ptr" %}
6545   ins_encode %{
6546     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6547     __ lea($dst$$Register, polling_page);
6548   %}
6549   ins_pipe(ialu_reg_fat);
6550 %}
6551 
6552 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6553 %{
6554   match(Set dst src);
6555   effect(KILL cr);
6556 
6557   ins_cost(60);
6558   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6559   ins_encode(load_immP31(dst, src));
6560   ins_pipe(ialu_reg);
6561 %}
6562 
6563 instruct loadConF(regF dst, immF con) %{
6564   match(Set dst con);
6565   ins_cost(125);
6566   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6567   ins_encode %{
6568     __ movflt($dst$$XMMRegister, $constantaddress($con));
6569   %}
6570   ins_pipe(pipe_slow);
6571 %}
6572 
6573 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6574   match(Set dst src);
6575   effect(KILL cr);
6576   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6577   ins_encode %{
6578     __ xorq($dst$$Register, $dst$$Register);
6579   %}
6580   ins_pipe(ialu_reg);
6581 %}
6582 
6583 instruct loadConN(rRegN dst, immN src) %{
6584   match(Set dst src);
6585 
6586   ins_cost(125);
6587   format %{ "movl    $dst, $src\t# compressed ptr" %}
6588   ins_encode %{
6589     address con = (address)$src$$constant;
6590     if (con == NULL) {
6591       ShouldNotReachHere();
6592     } else {
6593       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6594     }
6595   %}
6596   ins_pipe(ialu_reg_fat); // XXX
6597 %}
6598 
6599 instruct loadConF0(regF dst, immF0 src)
6600 %{
6601   match(Set dst src);
6602   ins_cost(100);
6603 
6604   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6605   opcode(0x0F, 0x57);
6606   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6607   ins_pipe(pipe_slow);
6608 %}
6609 
6610 // Use the same format since predicate() can not be used here.
6611 instruct loadConD(regD dst, immD con) %{
6612   match(Set dst con);
6613   ins_cost(125);
6614   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6615   ins_encode %{
6616     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6617   %}
6618   ins_pipe(pipe_slow);
6619 %}
6620 
6621 instruct loadConD0(regD dst, immD0 src)
6622 %{
6623   match(Set dst src);
6624   ins_cost(100);
6625 
6626   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6627   opcode(0x66, 0x0F, 0x57);
6628   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6629   ins_pipe(pipe_slow);
6630 %}
6631 
6632 instruct loadSSI(rRegI dst, stackSlotI src)
6633 %{
6634   match(Set dst src);
6635 
6636   ins_cost(125);
6637   format %{ "movl    $dst, $src\t# int stk" %}
6638   opcode(0x8B);
6639   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6640   ins_pipe(ialu_reg_mem);
6641 %}
6642 
6643 instruct loadSSL(rRegL dst, stackSlotL src)
6644 %{
6645   match(Set dst src);
6646 
6647   ins_cost(125);
6648   format %{ "movq    $dst, $src\t# long stk" %}
6649   opcode(0x8B);
6650   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6651   ins_pipe(ialu_reg_mem);
6652 %}
6653 
6654 instruct loadSSP(rRegP dst, stackSlotP src)
6655 %{
6656   match(Set dst src);
6657 
6658   ins_cost(125);
6659   format %{ "movq    $dst, $src\t# ptr stk" %}
6660   opcode(0x8B);
6661   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6662   ins_pipe(ialu_reg_mem);
6663 %}
6664 
6665 instruct loadSSF(regF dst, stackSlotF src)
6666 %{
6667   match(Set dst src);
6668 
6669   ins_cost(125);
6670   format %{ "movss   $dst, $src\t# float stk" %}
6671   opcode(0xF3, 0x0F, 0x10);
6672   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6673   ins_pipe(pipe_slow); // XXX
6674 %}
6675 
6676 // Use the same format since predicate() can not be used here.
6677 instruct loadSSD(regD dst, stackSlotD src)
6678 %{
6679   match(Set dst src);
6680 
6681   ins_cost(125);
6682   format %{ "movsd   $dst, $src\t# double stk" %}
6683   ins_encode  %{
6684     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6685   %}
6686   ins_pipe(pipe_slow); // XXX
6687 %}
6688 
6689 // Prefetch instructions.
6690 // Must be safe to execute with invalid address (cannot fault).
6691 
6692 instruct prefetchr( memory mem ) %{
6693   predicate(ReadPrefetchInstr==3);
6694   match(PrefetchRead mem);
6695   ins_cost(125);
6696 
6697   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6698   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6699   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6700   ins_pipe(ialu_mem);
6701 %}
6702 
6703 instruct prefetchrNTA( memory mem ) %{
6704   predicate(ReadPrefetchInstr==0);
6705   match(PrefetchRead mem);
6706   ins_cost(125);
6707 
6708   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6709   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6710   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6711   ins_pipe(ialu_mem);
6712 %}
6713 
6714 instruct prefetchrT0( memory mem ) %{
6715   predicate(ReadPrefetchInstr==1);
6716   match(PrefetchRead mem);
6717   ins_cost(125);
6718 
6719   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6720   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6721   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6722   ins_pipe(ialu_mem);
6723 %}
6724 
6725 instruct prefetchrT2( memory mem ) %{
6726   predicate(ReadPrefetchInstr==2);
6727   match(PrefetchRead mem);
6728   ins_cost(125);
6729 
6730   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6731   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6732   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6733   ins_pipe(ialu_mem);
6734 %}
6735 
6736 instruct prefetchw( memory mem ) %{
6737   predicate(AllocatePrefetchInstr==3);
6738   match(PrefetchWrite mem);
6739   ins_cost(125);
6740 
6741   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6742   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6743   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6744   ins_pipe(ialu_mem);
6745 %}
6746 
6747 instruct prefetchwNTA( memory mem ) %{
6748   predicate(AllocatePrefetchInstr==0);
6749   match(PrefetchWrite mem);
6750   ins_cost(125);
6751 
6752   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6753   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6754   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6755   ins_pipe(ialu_mem);
6756 %}
6757 
6758 instruct prefetchwT0( memory mem ) %{
6759   predicate(AllocatePrefetchInstr==1);
6760   match(PrefetchWrite mem);
6761   ins_cost(125);
6762 
6763   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6764   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6765   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6766   ins_pipe(ialu_mem);
6767 %}
6768 
6769 instruct prefetchwT2( memory mem ) %{
6770   predicate(AllocatePrefetchInstr==2);
6771   match(PrefetchWrite mem);
6772   ins_cost(125);
6773 
6774   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6775   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6776   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6777   ins_pipe(ialu_mem);
6778 %}
6779 
6780 //----------Store Instructions-------------------------------------------------
6781 
6782 // Store Byte
6783 instruct storeB(memory mem, rRegI src)
6784 %{
6785   match(Set mem (StoreB mem src));
6786 
6787   ins_cost(125); // XXX
6788   format %{ "movb    $mem, $src\t# byte" %}
6789   opcode(0x88);
6790   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6791   ins_pipe(ialu_mem_reg);
6792 %}
6793 
6794 // Store Char/Short
6795 instruct storeC(memory mem, rRegI src)
6796 %{
6797   match(Set mem (StoreC mem src));
6798 
6799   ins_cost(125); // XXX
6800   format %{ "movw    $mem, $src\t# char/short" %}
6801   opcode(0x89);
6802   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6803   ins_pipe(ialu_mem_reg);
6804 %}
6805 
6806 // Store Integer
6807 instruct storeI(memory mem, rRegI src)
6808 %{
6809   match(Set mem (StoreI mem src));
6810 
6811   ins_cost(125); // XXX
6812   format %{ "movl    $mem, $src\t# int" %}
6813   opcode(0x89);
6814   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6815   ins_pipe(ialu_mem_reg);
6816 %}
6817 
6818 // Store Long
6819 instruct storeL(memory mem, rRegL src)
6820 %{
6821   match(Set mem (StoreL mem src));
6822 
6823   ins_cost(125); // XXX
6824   format %{ "movq    $mem, $src\t# long" %}
6825   opcode(0x89);
6826   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6827   ins_pipe(ialu_mem_reg); // XXX
6828 %}
6829 
6830 // Store Pointer
6831 instruct storeP(memory mem, any_RegP src)
6832 %{
6833   match(Set mem (StoreP mem src));
6834 
6835   ins_cost(125); // XXX
6836   format %{ "movq    $mem, $src\t# ptr" %}
6837   opcode(0x89);
6838   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6839   ins_pipe(ialu_mem_reg);
6840 %}
6841 
6842 instruct storeImmP0(memory mem, immP0 zero)
6843 %{
6844   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6845   match(Set mem (StoreP mem zero));
6846 
6847   ins_cost(125); // XXX
6848   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6849   ins_encode %{
6850     __ movq($mem$$Address, r12);
6851   %}
6852   ins_pipe(ialu_mem_reg);
6853 %}
6854 
6855 // Store NULL Pointer, mark word, or other simple pointer constant.
6856 instruct storeImmP(memory mem, immP31 src)
6857 %{
6858   match(Set mem (StoreP mem src));
6859 
6860   ins_cost(150); // XXX
6861   format %{ "movq    $mem, $src\t# ptr" %}
6862   opcode(0xC7); /* C7 /0 */
6863   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6864   ins_pipe(ialu_mem_imm);
6865 %}
6866 
6867 // Store Compressed Pointer
6868 instruct storeN(memory mem, rRegN src)
6869 %{
6870   match(Set mem (StoreN mem src));
6871 
6872   ins_cost(125); // XXX
6873   format %{ "movl    $mem, $src\t# compressed ptr" %}
6874   ins_encode %{
6875     __ movl($mem$$Address, $src$$Register);
6876   %}
6877   ins_pipe(ialu_mem_reg);
6878 %}
6879 
6880 instruct storeImmN0(memory mem, immN0 zero)
6881 %{
6882   predicate(Universe::narrow_oop_base() == NULL);
6883   match(Set mem (StoreN mem zero));
6884 
6885   ins_cost(125); // XXX
6886   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6887   ins_encode %{
6888     __ movl($mem$$Address, r12);
6889   %}
6890   ins_pipe(ialu_mem_reg);
6891 %}
6892 
6893 instruct storeImmN(memory mem, immN src)
6894 %{
6895   match(Set mem (StoreN mem src));
6896 
6897   ins_cost(150); // XXX
6898   format %{ "movl    $mem, $src\t# compressed ptr" %}
6899   ins_encode %{
6900     address con = (address)$src$$constant;
6901     if (con == NULL) {
6902       __ movl($mem$$Address, (int32_t)0);
6903     } else {
6904       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6905     }
6906   %}
6907   ins_pipe(ialu_mem_imm);
6908 %}
6909 
6910 // Store Integer Immediate
6911 instruct storeImmI0(memory mem, immI0 zero)
6912 %{
6913   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6914   match(Set mem (StoreI mem zero));
6915 
6916   ins_cost(125); // XXX
6917   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6918   ins_encode %{
6919     __ movl($mem$$Address, r12);
6920   %}
6921   ins_pipe(ialu_mem_reg);
6922 %}
6923 
6924 instruct storeImmI(memory mem, immI src)
6925 %{
6926   match(Set mem (StoreI mem src));
6927 
6928   ins_cost(150);
6929   format %{ "movl    $mem, $src\t# int" %}
6930   opcode(0xC7); /* C7 /0 */
6931   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6932   ins_pipe(ialu_mem_imm);
6933 %}
6934 
6935 // Store Long Immediate
6936 instruct storeImmL0(memory mem, immL0 zero)
6937 %{
6938   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6939   match(Set mem (StoreL mem zero));
6940 
6941   ins_cost(125); // XXX
6942   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6943   ins_encode %{
6944     __ movq($mem$$Address, r12);
6945   %}
6946   ins_pipe(ialu_mem_reg);
6947 %}
6948 
6949 instruct storeImmL(memory mem, immL32 src)
6950 %{
6951   match(Set mem (StoreL mem src));
6952 
6953   ins_cost(150);
6954   format %{ "movq    $mem, $src\t# long" %}
6955   opcode(0xC7); /* C7 /0 */
6956   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6957   ins_pipe(ialu_mem_imm);
6958 %}
6959 
6960 // Store Short/Char Immediate
6961 instruct storeImmC0(memory mem, immI0 zero)
6962 %{
6963   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6964   match(Set mem (StoreC mem zero));
6965 
6966   ins_cost(125); // XXX
6967   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6968   ins_encode %{
6969     __ movw($mem$$Address, r12);
6970   %}
6971   ins_pipe(ialu_mem_reg);
6972 %}
6973 
6974 instruct storeImmI16(memory mem, immI16 src)
6975 %{
6976   predicate(UseStoreImmI16);
6977   match(Set mem (StoreC mem src));
6978 
6979   ins_cost(150);
6980   format %{ "movw    $mem, $src\t# short/char" %}
6981   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6982   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6983   ins_pipe(ialu_mem_imm);
6984 %}
6985 
6986 // Store Byte Immediate
6987 instruct storeImmB0(memory mem, immI0 zero)
6988 %{
6989   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6990   match(Set mem (StoreB mem zero));
6991 
6992   ins_cost(125); // XXX
6993   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6994   ins_encode %{
6995     __ movb($mem$$Address, r12);
6996   %}
6997   ins_pipe(ialu_mem_reg);
6998 %}
6999 
7000 instruct storeImmB(memory mem, immI8 src)
7001 %{
7002   match(Set mem (StoreB mem src));
7003 
7004   ins_cost(150); // XXX
7005   format %{ "movb    $mem, $src\t# byte" %}
7006   opcode(0xC6); /* C6 /0 */
7007   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7008   ins_pipe(ialu_mem_imm);
7009 %}
7010 
7011 // Store Aligned Packed Byte XMM register to memory
7012 instruct storeA8B(memory mem, regD src) %{
7013   match(Set mem (Store8B mem src));
7014   ins_cost(145);
7015   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7016   ins_encode( movq_st(mem, src));
7017   ins_pipe( pipe_slow );
7018 %}
7019 
7020 // Store Aligned Packed Char/Short XMM register to memory
7021 instruct storeA4C(memory mem, regD src) %{
7022   match(Set mem (Store4C mem src));
7023   ins_cost(145);
7024   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7025   ins_encode( movq_st(mem, src));
7026   ins_pipe( pipe_slow );
7027 %}
7028 
7029 // Store Aligned Packed Integer XMM register to memory
7030 instruct storeA2I(memory mem, regD src) %{
7031   match(Set mem (Store2I mem src));
7032   ins_cost(145);
7033   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7034   ins_encode( movq_st(mem, src));
7035   ins_pipe( pipe_slow );
7036 %}
7037 
7038 // Store CMS card-mark Immediate
7039 instruct storeImmCM0_reg(memory mem, immI0 zero)
7040 %{
7041   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7042   match(Set mem (StoreCM mem zero));
7043 
7044   ins_cost(125); // XXX
7045   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7046   ins_encode %{
7047     __ movb($mem$$Address, r12);
7048   %}
7049   ins_pipe(ialu_mem_reg);
7050 %}
7051 
7052 instruct storeImmCM0(memory mem, immI0 src)
7053 %{
7054   match(Set mem (StoreCM mem src));
7055 
7056   ins_cost(150); // XXX
7057   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7058   opcode(0xC6); /* C6 /0 */
7059   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7060   ins_pipe(ialu_mem_imm);
7061 %}
7062 
7063 // Store Aligned Packed Single Float XMM register to memory
7064 instruct storeA2F(memory mem, regD src) %{
7065   match(Set mem (Store2F mem src));
7066   ins_cost(145);
7067   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7068   ins_encode( movq_st(mem, src));
7069   ins_pipe( pipe_slow );
7070 %}
7071 
7072 // Store Float
7073 instruct storeF(memory mem, regF src)
7074 %{
7075   match(Set mem (StoreF mem src));
7076 
7077   ins_cost(95); // XXX
7078   format %{ "movss   $mem, $src\t# float" %}
7079   opcode(0xF3, 0x0F, 0x11);
7080   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7081   ins_pipe(pipe_slow); // XXX
7082 %}
7083 
7084 // Store immediate Float value (it is faster than store from XMM register)
7085 instruct storeF0(memory mem, immF0 zero)
7086 %{
7087   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7088   match(Set mem (StoreF mem zero));
7089 
7090   ins_cost(25); // XXX
7091   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7092   ins_encode %{
7093     __ movl($mem$$Address, r12);
7094   %}
7095   ins_pipe(ialu_mem_reg);
7096 %}
7097 
7098 instruct storeF_imm(memory mem, immF src)
7099 %{
7100   match(Set mem (StoreF mem src));
7101 
7102   ins_cost(50);
7103   format %{ "movl    $mem, $src\t# float" %}
7104   opcode(0xC7); /* C7 /0 */
7105   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7106   ins_pipe(ialu_mem_imm);
7107 %}
7108 
7109 // Store Double
7110 instruct storeD(memory mem, regD src)
7111 %{
7112   match(Set mem (StoreD mem src));
7113 
7114   ins_cost(95); // XXX
7115   format %{ "movsd   $mem, $src\t# double" %}
7116   opcode(0xF2, 0x0F, 0x11);
7117   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7118   ins_pipe(pipe_slow); // XXX
7119 %}
7120 
7121 // Store immediate double 0.0 (it is faster than store from XMM register)
7122 instruct storeD0_imm(memory mem, immD0 src)
7123 %{
7124   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7125   match(Set mem (StoreD mem src));
7126 
7127   ins_cost(50);
7128   format %{ "movq    $mem, $src\t# double 0." %}
7129   opcode(0xC7); /* C7 /0 */
7130   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7131   ins_pipe(ialu_mem_imm);
7132 %}
7133 
7134 instruct storeD0(memory mem, immD0 zero)
7135 %{
7136   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7137   match(Set mem (StoreD mem zero));
7138 
7139   ins_cost(25); // XXX
7140   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7141   ins_encode %{
7142     __ movq($mem$$Address, r12);
7143   %}
7144   ins_pipe(ialu_mem_reg);
7145 %}
7146 
7147 instruct storeSSI(stackSlotI dst, rRegI src)
7148 %{
7149   match(Set dst src);
7150 
7151   ins_cost(100);
7152   format %{ "movl    $dst, $src\t# int stk" %}
7153   opcode(0x89);
7154   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7155   ins_pipe( ialu_mem_reg );
7156 %}
7157 
7158 instruct storeSSL(stackSlotL dst, rRegL src)
7159 %{
7160   match(Set dst src);
7161 
7162   ins_cost(100);
7163   format %{ "movq    $dst, $src\t# long stk" %}
7164   opcode(0x89);
7165   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7166   ins_pipe(ialu_mem_reg);
7167 %}
7168 
7169 instruct storeSSP(stackSlotP dst, rRegP src)
7170 %{
7171   match(Set dst src);
7172 
7173   ins_cost(100);
7174   format %{ "movq    $dst, $src\t# ptr stk" %}
7175   opcode(0x89);
7176   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7177   ins_pipe(ialu_mem_reg);
7178 %}
7179 
7180 instruct storeSSF(stackSlotF dst, regF src)
7181 %{
7182   match(Set dst src);
7183 
7184   ins_cost(95); // XXX
7185   format %{ "movss   $dst, $src\t# float stk" %}
7186   opcode(0xF3, 0x0F, 0x11);
7187   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7188   ins_pipe(pipe_slow); // XXX
7189 %}
7190 
7191 instruct storeSSD(stackSlotD dst, regD src)
7192 %{
7193   match(Set dst src);
7194 
7195   ins_cost(95); // XXX
7196   format %{ "movsd   $dst, $src\t# double stk" %}
7197   opcode(0xF2, 0x0F, 0x11);
7198   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7199   ins_pipe(pipe_slow); // XXX
7200 %}
7201 
7202 //----------BSWAP Instructions-------------------------------------------------
7203 instruct bytes_reverse_int(rRegI dst) %{
7204   match(Set dst (ReverseBytesI dst));
7205 
7206   format %{ "bswapl  $dst" %}
7207   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7208   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7209   ins_pipe( ialu_reg );
7210 %}
7211 
7212 instruct bytes_reverse_long(rRegL dst) %{
7213   match(Set dst (ReverseBytesL dst));
7214 
7215   format %{ "bswapq  $dst" %}
7216 
7217   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7218   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7219   ins_pipe( ialu_reg);
7220 %}
7221 
7222 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7223   match(Set dst (ReverseBytesUS dst));
7224 
7225   format %{ "bswapl  $dst\n\t"
7226             "shrl    $dst,16\n\t" %}
7227   ins_encode %{
7228     __ bswapl($dst$$Register);
7229     __ shrl($dst$$Register, 16);
7230   %}
7231   ins_pipe( ialu_reg );
7232 %}
7233 
7234 instruct bytes_reverse_short(rRegI dst) %{
7235   match(Set dst (ReverseBytesS dst));
7236 
7237   format %{ "bswapl  $dst\n\t"
7238             "sar     $dst,16\n\t" %}
7239   ins_encode %{
7240     __ bswapl($dst$$Register);
7241     __ sarl($dst$$Register, 16);
7242   %}
7243   ins_pipe( ialu_reg );
7244 %}
7245 
7246 //---------- Zeros Count Instructions ------------------------------------------
7247 
7248 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7249   predicate(UseCountLeadingZerosInstruction);
7250   match(Set dst (CountLeadingZerosI src));
7251   effect(KILL cr);
7252 
7253   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7254   ins_encode %{
7255     __ lzcntl($dst$$Register, $src$$Register);
7256   %}
7257   ins_pipe(ialu_reg);
7258 %}
7259 
7260 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7261   predicate(!UseCountLeadingZerosInstruction);
7262   match(Set dst (CountLeadingZerosI src));
7263   effect(KILL cr);
7264 
7265   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7266             "jnz     skip\n\t"
7267             "movl    $dst, -1\n"
7268       "skip:\n\t"
7269             "negl    $dst\n\t"
7270             "addl    $dst, 31" %}
7271   ins_encode %{
7272     Register Rdst = $dst$$Register;
7273     Register Rsrc = $src$$Register;
7274     Label skip;
7275     __ bsrl(Rdst, Rsrc);
7276     __ jccb(Assembler::notZero, skip);
7277     __ movl(Rdst, -1);
7278     __ bind(skip);
7279     __ negl(Rdst);
7280     __ addl(Rdst, BitsPerInt - 1);
7281   %}
7282   ins_pipe(ialu_reg);
7283 %}
7284 
7285 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7286   predicate(UseCountLeadingZerosInstruction);
7287   match(Set dst (CountLeadingZerosL src));
7288   effect(KILL cr);
7289 
7290   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7291   ins_encode %{
7292     __ lzcntq($dst$$Register, $src$$Register);
7293   %}
7294   ins_pipe(ialu_reg);
7295 %}
7296 
7297 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7298   predicate(!UseCountLeadingZerosInstruction);
7299   match(Set dst (CountLeadingZerosL src));
7300   effect(KILL cr);
7301 
7302   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7303             "jnz     skip\n\t"
7304             "movl    $dst, -1\n"
7305       "skip:\n\t"
7306             "negl    $dst\n\t"
7307             "addl    $dst, 63" %}
7308   ins_encode %{
7309     Register Rdst = $dst$$Register;
7310     Register Rsrc = $src$$Register;
7311     Label skip;
7312     __ bsrq(Rdst, Rsrc);
7313     __ jccb(Assembler::notZero, skip);
7314     __ movl(Rdst, -1);
7315     __ bind(skip);
7316     __ negl(Rdst);
7317     __ addl(Rdst, BitsPerLong - 1);
7318   %}
7319   ins_pipe(ialu_reg);
7320 %}
7321 
7322 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7323   match(Set dst (CountTrailingZerosI src));
7324   effect(KILL cr);
7325 
7326   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7327             "jnz     done\n\t"
7328             "movl    $dst, 32\n"
7329       "done:" %}
7330   ins_encode %{
7331     Register Rdst = $dst$$Register;
7332     Label done;
7333     __ bsfl(Rdst, $src$$Register);
7334     __ jccb(Assembler::notZero, done);
7335     __ movl(Rdst, BitsPerInt);
7336     __ bind(done);
7337   %}
7338   ins_pipe(ialu_reg);
7339 %}
7340 
7341 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7342   match(Set dst (CountTrailingZerosL src));
7343   effect(KILL cr);
7344 
7345   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7346             "jnz     done\n\t"
7347             "movl    $dst, 64\n"
7348       "done:" %}
7349   ins_encode %{
7350     Register Rdst = $dst$$Register;
7351     Label done;
7352     __ bsfq(Rdst, $src$$Register);
7353     __ jccb(Assembler::notZero, done);
7354     __ movl(Rdst, BitsPerLong);
7355     __ bind(done);
7356   %}
7357   ins_pipe(ialu_reg);
7358 %}
7359 
7360 
7361 //---------- Population Count Instructions -------------------------------------
7362 
7363 instruct popCountI(rRegI dst, rRegI src) %{
7364   predicate(UsePopCountInstruction);
7365   match(Set dst (PopCountI src));
7366 
7367   format %{ "popcnt  $dst, $src" %}
7368   ins_encode %{
7369     __ popcntl($dst$$Register, $src$$Register);
7370   %}
7371   ins_pipe(ialu_reg);
7372 %}
7373 
7374 instruct popCountI_mem(rRegI dst, memory mem) %{
7375   predicate(UsePopCountInstruction);
7376   match(Set dst (PopCountI (LoadI mem)));
7377 
7378   format %{ "popcnt  $dst, $mem" %}
7379   ins_encode %{
7380     __ popcntl($dst$$Register, $mem$$Address);
7381   %}
7382   ins_pipe(ialu_reg);
7383 %}
7384 
7385 // Note: Long.bitCount(long) returns an int.
7386 instruct popCountL(rRegI dst, rRegL src) %{
7387   predicate(UsePopCountInstruction);
7388   match(Set dst (PopCountL src));
7389 
7390   format %{ "popcnt  $dst, $src" %}
7391   ins_encode %{
7392     __ popcntq($dst$$Register, $src$$Register);
7393   %}
7394   ins_pipe(ialu_reg);
7395 %}
7396 
7397 // Note: Long.bitCount(long) returns an int.
7398 instruct popCountL_mem(rRegI dst, memory mem) %{
7399   predicate(UsePopCountInstruction);
7400   match(Set dst (PopCountL (LoadL mem)));
7401 
7402   format %{ "popcnt  $dst, $mem" %}
7403   ins_encode %{
7404     __ popcntq($dst$$Register, $mem$$Address);
7405   %}
7406   ins_pipe(ialu_reg);
7407 %}
7408 
7409 
7410 //----------MemBar Instructions-----------------------------------------------
7411 // Memory barrier flavors
7412 
7413 instruct membar_acquire()
7414 %{
7415   match(MemBarAcquire);
7416   ins_cost(0);
7417 
7418   size(0);
7419   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7420   ins_encode();
7421   ins_pipe(empty);
7422 %}
7423 
7424 instruct membar_acquire_lock()
7425 %{
7426   match(MemBarAcquire);
7427   predicate(Matcher::prior_fast_lock(n));
7428   ins_cost(0);
7429 
7430   size(0);
7431   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7432   ins_encode();
7433   ins_pipe(empty);
7434 %}
7435 
7436 instruct membar_release()
7437 %{
7438   match(MemBarRelease);
7439   ins_cost(0);
7440 
7441   size(0);
7442   format %{ "MEMBAR-release ! (empty encoding)" %}
7443   ins_encode();
7444   ins_pipe(empty);
7445 %}
7446 
7447 instruct membar_release_lock()
7448 %{
7449   match(MemBarRelease);
7450   predicate(Matcher::post_fast_unlock(n));
7451   ins_cost(0);
7452 
7453   size(0);
7454   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7455   ins_encode();
7456   ins_pipe(empty);
7457 %}
7458 
7459 instruct membar_volatile(rFlagsReg cr) %{
7460   match(MemBarVolatile);
7461   effect(KILL cr);
7462   ins_cost(400);
7463 
7464   format %{
7465     $$template
7466     if (os::is_MP()) {
7467       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7468     } else {
7469       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7470     }
7471   %}
7472   ins_encode %{
7473     __ membar(Assembler::StoreLoad);
7474   %}
7475   ins_pipe(pipe_slow);
7476 %}
7477 
7478 instruct unnecessary_membar_volatile()
7479 %{
7480   match(MemBarVolatile);
7481   predicate(Matcher::post_store_load_barrier(n));
7482   ins_cost(0);
7483 
7484   size(0);
7485   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7486   ins_encode();
7487   ins_pipe(empty);
7488 %}
7489 
7490 //----------Move Instructions--------------------------------------------------
7491 
7492 instruct castX2P(rRegP dst, rRegL src)
7493 %{
7494   match(Set dst (CastX2P src));
7495 
7496   format %{ "movq    $dst, $src\t# long->ptr" %}
7497   ins_encode(enc_copy_wide(dst, src));
7498   ins_pipe(ialu_reg_reg); // XXX
7499 %}
7500 
7501 instruct castP2X(rRegL dst, rRegP src)
7502 %{
7503   match(Set dst (CastP2X src));
7504 
7505   format %{ "movq    $dst, $src\t# ptr -> long" %}
7506   ins_encode(enc_copy_wide(dst, src));
7507   ins_pipe(ialu_reg_reg); // XXX
7508 %}
7509 
7510 
7511 // Convert oop pointer into compressed form
7512 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7513   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7514   match(Set dst (EncodeP src));
7515   effect(KILL cr);
7516   format %{ "encode_heap_oop $dst,$src" %}
7517   ins_encode %{
7518     Register s = $src$$Register;
7519     Register d = $dst$$Register;
7520     if (s != d) {
7521       __ movq(d, s);
7522     }
7523     __ encode_heap_oop(d);
7524   %}
7525   ins_pipe(ialu_reg_long);
7526 %}
7527 
7528 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7529   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7530   match(Set dst (EncodeP src));
7531   effect(KILL cr);
7532   format %{ "encode_heap_oop_not_null $dst,$src" %}
7533   ins_encode %{
7534     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7535   %}
7536   ins_pipe(ialu_reg_long);
7537 %}
7538 
7539 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7540   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7541             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7542   match(Set dst (DecodeN src));
7543   effect(KILL cr);
7544   format %{ "decode_heap_oop $dst,$src" %}
7545   ins_encode %{
7546     Register s = $src$$Register;
7547     Register d = $dst$$Register;
7548     if (s != d) {
7549       __ movq(d, s);
7550     }
7551     __ decode_heap_oop(d);
7552   %}
7553   ins_pipe(ialu_reg_long);
7554 %}
7555 
7556 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7557   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7558             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7559   match(Set dst (DecodeN src));
7560   effect(KILL cr);
7561   format %{ "decode_heap_oop_not_null $dst,$src" %}
7562   ins_encode %{
7563     Register s = $src$$Register;
7564     Register d = $dst$$Register;
7565     if (s != d) {
7566       __ decode_heap_oop_not_null(d, s);
7567     } else {
7568       __ decode_heap_oop_not_null(d);
7569     }
7570   %}
7571   ins_pipe(ialu_reg_long);
7572 %}
7573 
7574 
7575 //----------Conditional Move---------------------------------------------------
7576 // Jump
7577 // dummy instruction for generating temp registers
7578 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7579   match(Jump (LShiftL switch_val shift));
7580   ins_cost(350);
7581   predicate(false);
7582   effect(TEMP dest);
7583 
7584   format %{ "leaq    $dest, [$constantaddress]\n\t"
7585             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7586   ins_encode %{
7587     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7588     // to do that and the compiler is using that register as one it can allocate.
7589     // So we build it all by hand.
7590     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7591     // ArrayAddress dispatch(table, index);
7592     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7593     __ lea($dest$$Register, $constantaddress);
7594     __ jmp(dispatch);
7595   %}
7596   ins_pipe(pipe_jmp);
7597   ins_pc_relative(1);
7598 %}
7599 
7600 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7601   match(Jump (AddL (LShiftL switch_val shift) offset));
7602   ins_cost(350);
7603   effect(TEMP dest);
7604 
7605   format %{ "leaq    $dest, [$constantaddress]\n\t"
7606             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7607   ins_encode %{
7608     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7609     // to do that and the compiler is using that register as one it can allocate.
7610     // So we build it all by hand.
7611     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7612     // ArrayAddress dispatch(table, index);
7613     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7614     __ lea($dest$$Register, $constantaddress);
7615     __ jmp(dispatch);
7616   %}
7617   ins_pipe(pipe_jmp);
7618   ins_pc_relative(1);
7619 %}
7620 
7621 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7622   match(Jump switch_val);
7623   ins_cost(350);
7624   effect(TEMP dest);
7625 
7626   format %{ "leaq    $dest, [$constantaddress]\n\t"
7627             "jmp     [$dest + $switch_val]\n\t" %}
7628   ins_encode %{
7629     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7630     // to do that and the compiler is using that register as one it can allocate.
7631     // So we build it all by hand.
7632     // Address index(noreg, switch_reg, Address::times_1);
7633     // ArrayAddress dispatch(table, index);
7634     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7635     __ lea($dest$$Register, $constantaddress);
7636     __ jmp(dispatch);
7637   %}
7638   ins_pipe(pipe_jmp);
7639   ins_pc_relative(1);
7640 %}
7641 
7642 // Conditional move
7643 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7644 %{
7645   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7646 
7647   ins_cost(200); // XXX
7648   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7649   opcode(0x0F, 0x40);
7650   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7651   ins_pipe(pipe_cmov_reg);
7652 %}
7653 
7654 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7655   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7656 
7657   ins_cost(200); // XXX
7658   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7659   opcode(0x0F, 0x40);
7660   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7661   ins_pipe(pipe_cmov_reg);
7662 %}
7663 
7664 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7665   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7666   ins_cost(200);
7667   expand %{
7668     cmovI_regU(cop, cr, dst, src);
7669   %}
7670 %}
7671 
7672 // Conditional move
7673 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7674   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7675 
7676   ins_cost(250); // XXX
7677   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7678   opcode(0x0F, 0x40);
7679   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7680   ins_pipe(pipe_cmov_mem);
7681 %}
7682 
7683 // Conditional move
7684 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7685 %{
7686   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7687 
7688   ins_cost(250); // XXX
7689   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7690   opcode(0x0F, 0x40);
7691   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7692   ins_pipe(pipe_cmov_mem);
7693 %}
7694 
7695 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7696   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7697   ins_cost(250);
7698   expand %{
7699     cmovI_memU(cop, cr, dst, src);
7700   %}
7701 %}
7702 
7703 // Conditional move
7704 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7705 %{
7706   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7707 
7708   ins_cost(200); // XXX
7709   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7710   opcode(0x0F, 0x40);
7711   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7712   ins_pipe(pipe_cmov_reg);
7713 %}
7714 
7715 // Conditional move
7716 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7717 %{
7718   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7719 
7720   ins_cost(200); // XXX
7721   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7722   opcode(0x0F, 0x40);
7723   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7724   ins_pipe(pipe_cmov_reg);
7725 %}
7726 
7727 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7728   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7729   ins_cost(200);
7730   expand %{
7731     cmovN_regU(cop, cr, dst, src);
7732   %}
7733 %}
7734 
7735 // Conditional move
7736 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7737 %{
7738   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7739 
7740   ins_cost(200); // XXX
7741   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7742   opcode(0x0F, 0x40);
7743   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7744   ins_pipe(pipe_cmov_reg);  // XXX
7745 %}
7746 
7747 // Conditional move
7748 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7749 %{
7750   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7751 
7752   ins_cost(200); // XXX
7753   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7754   opcode(0x0F, 0x40);
7755   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7756   ins_pipe(pipe_cmov_reg); // XXX
7757 %}
7758 
7759 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7760   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7761   ins_cost(200);
7762   expand %{
7763     cmovP_regU(cop, cr, dst, src);
7764   %}
7765 %}
7766 
7767 // DISABLED: Requires the ADLC to emit a bottom_type call that
7768 // correctly meets the two pointer arguments; one is an incoming
7769 // register but the other is a memory operand.  ALSO appears to
7770 // be buggy with implicit null checks.
7771 //
7772 //// Conditional move
7773 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7774 //%{
7775 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7776 //  ins_cost(250);
7777 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7778 //  opcode(0x0F,0x40);
7779 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7780 //  ins_pipe( pipe_cmov_mem );
7781 //%}
7782 //
7783 //// Conditional move
7784 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7785 //%{
7786 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7787 //  ins_cost(250);
7788 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7789 //  opcode(0x0F,0x40);
7790 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7791 //  ins_pipe( pipe_cmov_mem );
7792 //%}
7793 
7794 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7795 %{
7796   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7797 
7798   ins_cost(200); // XXX
7799   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7800   opcode(0x0F, 0x40);
7801   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7802   ins_pipe(pipe_cmov_reg);  // XXX
7803 %}
7804 
7805 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7806 %{
7807   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7808 
7809   ins_cost(200); // XXX
7810   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7811   opcode(0x0F, 0x40);
7812   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7813   ins_pipe(pipe_cmov_mem);  // XXX
7814 %}
7815 
7816 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7817 %{
7818   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7819 
7820   ins_cost(200); // XXX
7821   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7822   opcode(0x0F, 0x40);
7823   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7824   ins_pipe(pipe_cmov_reg); // XXX
7825 %}
7826 
7827 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7828   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7829   ins_cost(200);
7830   expand %{
7831     cmovL_regU(cop, cr, dst, src);
7832   %}
7833 %}
7834 
7835 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7836 %{
7837   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7838 
7839   ins_cost(200); // XXX
7840   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7841   opcode(0x0F, 0x40);
7842   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7843   ins_pipe(pipe_cmov_mem); // XXX
7844 %}
7845 
7846 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7847   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7848   ins_cost(200);
7849   expand %{
7850     cmovL_memU(cop, cr, dst, src);
7851   %}
7852 %}
7853 
7854 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7855 %{
7856   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7857 
7858   ins_cost(200); // XXX
7859   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7860             "movss     $dst, $src\n"
7861     "skip:" %}
7862   ins_encode(enc_cmovf_branch(cop, dst, src));
7863   ins_pipe(pipe_slow);
7864 %}
7865 
7866 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7867 // %{
7868 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7869 
7870 //   ins_cost(200); // XXX
7871 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7872 //             "movss     $dst, $src\n"
7873 //     "skip:" %}
7874 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7875 //   ins_pipe(pipe_slow);
7876 // %}
7877 
7878 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7879 %{
7880   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7881 
7882   ins_cost(200); // XXX
7883   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7884             "movss     $dst, $src\n"
7885     "skip:" %}
7886   ins_encode(enc_cmovf_branch(cop, dst, src));
7887   ins_pipe(pipe_slow);
7888 %}
7889 
7890 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7891   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7892   ins_cost(200);
7893   expand %{
7894     cmovF_regU(cop, cr, dst, src);
7895   %}
7896 %}
7897 
7898 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7899 %{
7900   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7901 
7902   ins_cost(200); // XXX
7903   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7904             "movsd     $dst, $src\n"
7905     "skip:" %}
7906   ins_encode(enc_cmovd_branch(cop, dst, src));
7907   ins_pipe(pipe_slow);
7908 %}
7909 
7910 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7911 %{
7912   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7913 
7914   ins_cost(200); // XXX
7915   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7916             "movsd     $dst, $src\n"
7917     "skip:" %}
7918   ins_encode(enc_cmovd_branch(cop, dst, src));
7919   ins_pipe(pipe_slow);
7920 %}
7921 
7922 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7923   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7924   ins_cost(200);
7925   expand %{
7926     cmovD_regU(cop, cr, dst, src);
7927   %}
7928 %}
7929 
7930 //----------Arithmetic Instructions--------------------------------------------
7931 //----------Addition Instructions----------------------------------------------
7932 
7933 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7934 %{
7935   match(Set dst (AddI dst src));
7936   effect(KILL cr);
7937 
7938   format %{ "addl    $dst, $src\t# int" %}
7939   opcode(0x03);
7940   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7941   ins_pipe(ialu_reg_reg);
7942 %}
7943 
7944 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7945 %{
7946   match(Set dst (AddI dst src));
7947   effect(KILL cr);
7948 
7949   format %{ "addl    $dst, $src\t# int" %}
7950   opcode(0x81, 0x00); /* /0 id */
7951   ins_encode(OpcSErm(dst, src), Con8or32(src));
7952   ins_pipe( ialu_reg );
7953 %}
7954 
7955 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7956 %{
7957   match(Set dst (AddI dst (LoadI src)));
7958   effect(KILL cr);
7959 
7960   ins_cost(125); // XXX
7961   format %{ "addl    $dst, $src\t# int" %}
7962   opcode(0x03);
7963   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7964   ins_pipe(ialu_reg_mem);
7965 %}
7966 
7967 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7968 %{
7969   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7970   effect(KILL cr);
7971 
7972   ins_cost(150); // XXX
7973   format %{ "addl    $dst, $src\t# int" %}
7974   opcode(0x01); /* Opcode 01 /r */
7975   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7976   ins_pipe(ialu_mem_reg);
7977 %}
7978 
7979 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7980 %{
7981   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7982   effect(KILL cr);
7983 
7984   ins_cost(125); // XXX
7985   format %{ "addl    $dst, $src\t# int" %}
7986   opcode(0x81); /* Opcode 81 /0 id */
7987   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7988   ins_pipe(ialu_mem_imm);
7989 %}
7990 
7991 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7992 %{
7993   predicate(UseIncDec);
7994   match(Set dst (AddI dst src));
7995   effect(KILL cr);
7996 
7997   format %{ "incl    $dst\t# int" %}
7998   opcode(0xFF, 0x00); // FF /0
7999   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8000   ins_pipe(ialu_reg);
8001 %}
8002 
8003 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8004 %{
8005   predicate(UseIncDec);
8006   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8007   effect(KILL cr);
8008 
8009   ins_cost(125); // XXX
8010   format %{ "incl    $dst\t# int" %}
8011   opcode(0xFF); /* Opcode FF /0 */
8012   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8013   ins_pipe(ialu_mem_imm);
8014 %}
8015 
8016 // XXX why does that use AddI
8017 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8018 %{
8019   predicate(UseIncDec);
8020   match(Set dst (AddI dst src));
8021   effect(KILL cr);
8022 
8023   format %{ "decl    $dst\t# int" %}
8024   opcode(0xFF, 0x01); // FF /1
8025   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8026   ins_pipe(ialu_reg);
8027 %}
8028 
8029 // XXX why does that use AddI
8030 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8031 %{
8032   predicate(UseIncDec);
8033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8034   effect(KILL cr);
8035 
8036   ins_cost(125); // XXX
8037   format %{ "decl    $dst\t# int" %}
8038   opcode(0xFF); /* Opcode FF /1 */
8039   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8040   ins_pipe(ialu_mem_imm);
8041 %}
8042 
8043 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8044 %{
8045   match(Set dst (AddI src0 src1));
8046 
8047   ins_cost(110);
8048   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8049   opcode(0x8D); /* 0x8D /r */
8050   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8051   ins_pipe(ialu_reg_reg);
8052 %}
8053 
8054 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8055 %{
8056   match(Set dst (AddL dst src));
8057   effect(KILL cr);
8058 
8059   format %{ "addq    $dst, $src\t# long" %}
8060   opcode(0x03);
8061   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8062   ins_pipe(ialu_reg_reg);
8063 %}
8064 
8065 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8066 %{
8067   match(Set dst (AddL dst src));
8068   effect(KILL cr);
8069 
8070   format %{ "addq    $dst, $src\t# long" %}
8071   opcode(0x81, 0x00); /* /0 id */
8072   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8073   ins_pipe( ialu_reg );
8074 %}
8075 
8076 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8077 %{
8078   match(Set dst (AddL dst (LoadL src)));
8079   effect(KILL cr);
8080 
8081   ins_cost(125); // XXX
8082   format %{ "addq    $dst, $src\t# long" %}
8083   opcode(0x03);
8084   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8085   ins_pipe(ialu_reg_mem);
8086 %}
8087 
8088 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8089 %{
8090   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8091   effect(KILL cr);
8092 
8093   ins_cost(150); // XXX
8094   format %{ "addq    $dst, $src\t# long" %}
8095   opcode(0x01); /* Opcode 01 /r */
8096   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8097   ins_pipe(ialu_mem_reg);
8098 %}
8099 
8100 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8101 %{
8102   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8103   effect(KILL cr);
8104 
8105   ins_cost(125); // XXX
8106   format %{ "addq    $dst, $src\t# long" %}
8107   opcode(0x81); /* Opcode 81 /0 id */
8108   ins_encode(REX_mem_wide(dst),
8109              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8110   ins_pipe(ialu_mem_imm);
8111 %}
8112 
8113 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8114 %{
8115   predicate(UseIncDec);
8116   match(Set dst (AddL dst src));
8117   effect(KILL cr);
8118 
8119   format %{ "incq    $dst\t# long" %}
8120   opcode(0xFF, 0x00); // FF /0
8121   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8122   ins_pipe(ialu_reg);
8123 %}
8124 
8125 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8126 %{
8127   predicate(UseIncDec);
8128   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8129   effect(KILL cr);
8130 
8131   ins_cost(125); // XXX
8132   format %{ "incq    $dst\t# long" %}
8133   opcode(0xFF); /* Opcode FF /0 */
8134   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8135   ins_pipe(ialu_mem_imm);
8136 %}
8137 
8138 // XXX why does that use AddL
8139 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8140 %{
8141   predicate(UseIncDec);
8142   match(Set dst (AddL dst src));
8143   effect(KILL cr);
8144 
8145   format %{ "decq    $dst\t# long" %}
8146   opcode(0xFF, 0x01); // FF /1
8147   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8148   ins_pipe(ialu_reg);
8149 %}
8150 
8151 // XXX why does that use AddL
8152 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8153 %{
8154   predicate(UseIncDec);
8155   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8156   effect(KILL cr);
8157 
8158   ins_cost(125); // XXX
8159   format %{ "decq    $dst\t# long" %}
8160   opcode(0xFF); /* Opcode FF /1 */
8161   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8162   ins_pipe(ialu_mem_imm);
8163 %}
8164 
8165 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8166 %{
8167   match(Set dst (AddL src0 src1));
8168 
8169   ins_cost(110);
8170   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8171   opcode(0x8D); /* 0x8D /r */
8172   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8173   ins_pipe(ialu_reg_reg);
8174 %}
8175 
8176 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8177 %{
8178   match(Set dst (AddP dst src));
8179   effect(KILL cr);
8180 
8181   format %{ "addq    $dst, $src\t# ptr" %}
8182   opcode(0x03);
8183   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8184   ins_pipe(ialu_reg_reg);
8185 %}
8186 
8187 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8188 %{
8189   match(Set dst (AddP dst src));
8190   effect(KILL cr);
8191 
8192   format %{ "addq    $dst, $src\t# ptr" %}
8193   opcode(0x81, 0x00); /* /0 id */
8194   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8195   ins_pipe( ialu_reg );
8196 %}
8197 
8198 // XXX addP mem ops ????
8199 
8200 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8201 %{
8202   match(Set dst (AddP src0 src1));
8203 
8204   ins_cost(110);
8205   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8206   opcode(0x8D); /* 0x8D /r */
8207   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8208   ins_pipe(ialu_reg_reg);
8209 %}
8210 
8211 instruct checkCastPP(rRegP dst)
8212 %{
8213   match(Set dst (CheckCastPP dst));
8214 
8215   size(0);
8216   format %{ "# checkcastPP of $dst" %}
8217   ins_encode(/* empty encoding */);
8218   ins_pipe(empty);
8219 %}
8220 
8221 instruct castPP(rRegP dst)
8222 %{
8223   match(Set dst (CastPP dst));
8224 
8225   size(0);
8226   format %{ "# castPP of $dst" %}
8227   ins_encode(/* empty encoding */);
8228   ins_pipe(empty);
8229 %}
8230 
8231 instruct castII(rRegI dst)
8232 %{
8233   match(Set dst (CastII dst));
8234 
8235   size(0);
8236   format %{ "# castII of $dst" %}
8237   ins_encode(/* empty encoding */);
8238   ins_cost(0);
8239   ins_pipe(empty);
8240 %}
8241 
8242 // LoadP-locked same as a regular LoadP when used with compare-swap
8243 instruct loadPLocked(rRegP dst, memory mem)
8244 %{
8245   match(Set dst (LoadPLocked mem));
8246 
8247   ins_cost(125); // XXX
8248   format %{ "movq    $dst, $mem\t# ptr locked" %}
8249   opcode(0x8B);
8250   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8251   ins_pipe(ialu_reg_mem); // XXX
8252 %}
8253 
8254 // LoadL-locked - same as a regular LoadL when used with compare-swap
8255 instruct loadLLocked(rRegL dst, memory mem)
8256 %{
8257   match(Set dst (LoadLLocked mem));
8258 
8259   ins_cost(125); // XXX
8260   format %{ "movq    $dst, $mem\t# long locked" %}
8261   opcode(0x8B);
8262   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8263   ins_pipe(ialu_reg_mem); // XXX
8264 %}
8265 
8266 // Conditional-store of the updated heap-top.
8267 // Used during allocation of the shared heap.
8268 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8269 
8270 instruct storePConditional(memory heap_top_ptr,
8271                            rax_RegP oldval, rRegP newval,
8272                            rFlagsReg cr)
8273 %{
8274   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8275 
8276   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8277             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8278   opcode(0x0F, 0xB1);
8279   ins_encode(lock_prefix,
8280              REX_reg_mem_wide(newval, heap_top_ptr),
8281              OpcP, OpcS,
8282              reg_mem(newval, heap_top_ptr));
8283   ins_pipe(pipe_cmpxchg);
8284 %}
8285 
8286 // Conditional-store of an int value.
8287 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8288 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8289 %{
8290   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8291   effect(KILL oldval);
8292 
8293   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8294   opcode(0x0F, 0xB1);
8295   ins_encode(lock_prefix,
8296              REX_reg_mem(newval, mem),
8297              OpcP, OpcS,
8298              reg_mem(newval, mem));
8299   ins_pipe(pipe_cmpxchg);
8300 %}
8301 
8302 // Conditional-store of a long value.
8303 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8304 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8305 %{
8306   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8307   effect(KILL oldval);
8308 
8309   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8310   opcode(0x0F, 0xB1);
8311   ins_encode(lock_prefix,
8312              REX_reg_mem_wide(newval, mem),
8313              OpcP, OpcS,
8314              reg_mem(newval, mem));
8315   ins_pipe(pipe_cmpxchg);
8316 %}
8317 
8318 
8319 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8320 instruct compareAndSwapP(rRegI res,
8321                          memory mem_ptr,
8322                          rax_RegP oldval, rRegP newval,
8323                          rFlagsReg cr)
8324 %{
8325   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8326   effect(KILL cr, KILL oldval);
8327 
8328   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8329             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8330             "sete    $res\n\t"
8331             "movzbl  $res, $res" %}
8332   opcode(0x0F, 0xB1);
8333   ins_encode(lock_prefix,
8334              REX_reg_mem_wide(newval, mem_ptr),
8335              OpcP, OpcS,
8336              reg_mem(newval, mem_ptr),
8337              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8338              REX_reg_breg(res, res), // movzbl
8339              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8340   ins_pipe( pipe_cmpxchg );
8341 %}
8342 
8343 instruct compareAndSwapL(rRegI res,
8344                          memory mem_ptr,
8345                          rax_RegL oldval, rRegL newval,
8346                          rFlagsReg cr)
8347 %{
8348   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8349   effect(KILL cr, KILL oldval);
8350 
8351   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8352             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8353             "sete    $res\n\t"
8354             "movzbl  $res, $res" %}
8355   opcode(0x0F, 0xB1);
8356   ins_encode(lock_prefix,
8357              REX_reg_mem_wide(newval, mem_ptr),
8358              OpcP, OpcS,
8359              reg_mem(newval, mem_ptr),
8360              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8361              REX_reg_breg(res, res), // movzbl
8362              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8363   ins_pipe( pipe_cmpxchg );
8364 %}
8365 
8366 instruct compareAndSwapI(rRegI res,
8367                          memory mem_ptr,
8368                          rax_RegI oldval, rRegI newval,
8369                          rFlagsReg cr)
8370 %{
8371   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8372   effect(KILL cr, KILL oldval);
8373 
8374   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8375             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8376             "sete    $res\n\t"
8377             "movzbl  $res, $res" %}
8378   opcode(0x0F, 0xB1);
8379   ins_encode(lock_prefix,
8380              REX_reg_mem(newval, mem_ptr),
8381              OpcP, OpcS,
8382              reg_mem(newval, mem_ptr),
8383              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8384              REX_reg_breg(res, res), // movzbl
8385              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8386   ins_pipe( pipe_cmpxchg );
8387 %}
8388 
8389 
8390 instruct compareAndSwapN(rRegI res,
8391                           memory mem_ptr,
8392                           rax_RegN oldval, rRegN newval,
8393                           rFlagsReg cr) %{
8394   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8395   effect(KILL cr, KILL oldval);
8396 
8397   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8398             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8399             "sete    $res\n\t"
8400             "movzbl  $res, $res" %}
8401   opcode(0x0F, 0xB1);
8402   ins_encode(lock_prefix,
8403              REX_reg_mem(newval, mem_ptr),
8404              OpcP, OpcS,
8405              reg_mem(newval, mem_ptr),
8406              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8407              REX_reg_breg(res, res), // movzbl
8408              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8409   ins_pipe( pipe_cmpxchg );
8410 %}
8411 
8412 //----------Subtraction Instructions-------------------------------------------
8413 
8414 // Integer Subtraction Instructions
8415 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8416 %{
8417   match(Set dst (SubI dst src));
8418   effect(KILL cr);
8419 
8420   format %{ "subl    $dst, $src\t# int" %}
8421   opcode(0x2B);
8422   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8423   ins_pipe(ialu_reg_reg);
8424 %}
8425 
8426 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8427 %{
8428   match(Set dst (SubI dst src));
8429   effect(KILL cr);
8430 
8431   format %{ "subl    $dst, $src\t# int" %}
8432   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8433   ins_encode(OpcSErm(dst, src), Con8or32(src));
8434   ins_pipe(ialu_reg);
8435 %}
8436 
8437 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8438 %{
8439   match(Set dst (SubI dst (LoadI src)));
8440   effect(KILL cr);
8441 
8442   ins_cost(125);
8443   format %{ "subl    $dst, $src\t# int" %}
8444   opcode(0x2B);
8445   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8446   ins_pipe(ialu_reg_mem);
8447 %}
8448 
8449 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8450 %{
8451   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8452   effect(KILL cr);
8453 
8454   ins_cost(150);
8455   format %{ "subl    $dst, $src\t# int" %}
8456   opcode(0x29); /* Opcode 29 /r */
8457   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8458   ins_pipe(ialu_mem_reg);
8459 %}
8460 
8461 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8462 %{
8463   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8464   effect(KILL cr);
8465 
8466   ins_cost(125); // XXX
8467   format %{ "subl    $dst, $src\t# int" %}
8468   opcode(0x81); /* Opcode 81 /5 id */
8469   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8470   ins_pipe(ialu_mem_imm);
8471 %}
8472 
8473 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8474 %{
8475   match(Set dst (SubL dst src));
8476   effect(KILL cr);
8477 
8478   format %{ "subq    $dst, $src\t# long" %}
8479   opcode(0x2B);
8480   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8481   ins_pipe(ialu_reg_reg);
8482 %}
8483 
8484 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8485 %{
8486   match(Set dst (SubL dst src));
8487   effect(KILL cr);
8488 
8489   format %{ "subq    $dst, $src\t# long" %}
8490   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8491   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8492   ins_pipe(ialu_reg);
8493 %}
8494 
8495 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8496 %{
8497   match(Set dst (SubL dst (LoadL src)));
8498   effect(KILL cr);
8499 
8500   ins_cost(125);
8501   format %{ "subq    $dst, $src\t# long" %}
8502   opcode(0x2B);
8503   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8504   ins_pipe(ialu_reg_mem);
8505 %}
8506 
8507 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8508 %{
8509   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8510   effect(KILL cr);
8511 
8512   ins_cost(150);
8513   format %{ "subq    $dst, $src\t# long" %}
8514   opcode(0x29); /* Opcode 29 /r */
8515   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8516   ins_pipe(ialu_mem_reg);
8517 %}
8518 
8519 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8520 %{
8521   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8522   effect(KILL cr);
8523 
8524   ins_cost(125); // XXX
8525   format %{ "subq    $dst, $src\t# long" %}
8526   opcode(0x81); /* Opcode 81 /5 id */
8527   ins_encode(REX_mem_wide(dst),
8528              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8529   ins_pipe(ialu_mem_imm);
8530 %}
8531 
8532 // Subtract from a pointer
8533 // XXX hmpf???
8534 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8535 %{
8536   match(Set dst (AddP dst (SubI zero src)));
8537   effect(KILL cr);
8538 
8539   format %{ "subq    $dst, $src\t# ptr - int" %}
8540   opcode(0x2B);
8541   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8542   ins_pipe(ialu_reg_reg);
8543 %}
8544 
8545 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8546 %{
8547   match(Set dst (SubI zero dst));
8548   effect(KILL cr);
8549 
8550   format %{ "negl    $dst\t# int" %}
8551   opcode(0xF7, 0x03);  // Opcode F7 /3
8552   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8553   ins_pipe(ialu_reg);
8554 %}
8555 
8556 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8557 %{
8558   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8559   effect(KILL cr);
8560 
8561   format %{ "negl    $dst\t# int" %}
8562   opcode(0xF7, 0x03);  // Opcode F7 /3
8563   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8564   ins_pipe(ialu_reg);
8565 %}
8566 
8567 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8568 %{
8569   match(Set dst (SubL zero dst));
8570   effect(KILL cr);
8571 
8572   format %{ "negq    $dst\t# long" %}
8573   opcode(0xF7, 0x03);  // Opcode F7 /3
8574   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8575   ins_pipe(ialu_reg);
8576 %}
8577 
8578 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8579 %{
8580   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8581   effect(KILL cr);
8582 
8583   format %{ "negq    $dst\t# long" %}
8584   opcode(0xF7, 0x03);  // Opcode F7 /3
8585   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8586   ins_pipe(ialu_reg);
8587 %}
8588 
8589 
8590 //----------Multiplication/Division Instructions-------------------------------
8591 // Integer Multiplication Instructions
8592 // Multiply Register
8593 
8594 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8595 %{
8596   match(Set dst (MulI dst src));
8597   effect(KILL cr);
8598 
8599   ins_cost(300);
8600   format %{ "imull   $dst, $src\t# int" %}
8601   opcode(0x0F, 0xAF);
8602   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8603   ins_pipe(ialu_reg_reg_alu0);
8604 %}
8605 
8606 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8607 %{
8608   match(Set dst (MulI src imm));
8609   effect(KILL cr);
8610 
8611   ins_cost(300);
8612   format %{ "imull   $dst, $src, $imm\t# int" %}
8613   opcode(0x69); /* 69 /r id */
8614   ins_encode(REX_reg_reg(dst, src),
8615              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8616   ins_pipe(ialu_reg_reg_alu0);
8617 %}
8618 
8619 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8620 %{
8621   match(Set dst (MulI dst (LoadI src)));
8622   effect(KILL cr);
8623 
8624   ins_cost(350);
8625   format %{ "imull   $dst, $src\t# int" %}
8626   opcode(0x0F, 0xAF);
8627   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8628   ins_pipe(ialu_reg_mem_alu0);
8629 %}
8630 
8631 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8632 %{
8633   match(Set dst (MulI (LoadI src) imm));
8634   effect(KILL cr);
8635 
8636   ins_cost(300);
8637   format %{ "imull   $dst, $src, $imm\t# int" %}
8638   opcode(0x69); /* 69 /r id */
8639   ins_encode(REX_reg_mem(dst, src),
8640              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8641   ins_pipe(ialu_reg_mem_alu0);
8642 %}
8643 
8644 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8645 %{
8646   match(Set dst (MulL dst src));
8647   effect(KILL cr);
8648 
8649   ins_cost(300);
8650   format %{ "imulq   $dst, $src\t# long" %}
8651   opcode(0x0F, 0xAF);
8652   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8653   ins_pipe(ialu_reg_reg_alu0);
8654 %}
8655 
8656 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8657 %{
8658   match(Set dst (MulL src imm));
8659   effect(KILL cr);
8660 
8661   ins_cost(300);
8662   format %{ "imulq   $dst, $src, $imm\t# long" %}
8663   opcode(0x69); /* 69 /r id */
8664   ins_encode(REX_reg_reg_wide(dst, src),
8665              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8666   ins_pipe(ialu_reg_reg_alu0);
8667 %}
8668 
8669 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8670 %{
8671   match(Set dst (MulL dst (LoadL src)));
8672   effect(KILL cr);
8673 
8674   ins_cost(350);
8675   format %{ "imulq   $dst, $src\t# long" %}
8676   opcode(0x0F, 0xAF);
8677   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8678   ins_pipe(ialu_reg_mem_alu0);
8679 %}
8680 
8681 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8682 %{
8683   match(Set dst (MulL (LoadL src) imm));
8684   effect(KILL cr);
8685 
8686   ins_cost(300);
8687   format %{ "imulq   $dst, $src, $imm\t# long" %}
8688   opcode(0x69); /* 69 /r id */
8689   ins_encode(REX_reg_mem_wide(dst, src),
8690              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8691   ins_pipe(ialu_reg_mem_alu0);
8692 %}
8693 
8694 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8695 %{
8696   match(Set dst (MulHiL src rax));
8697   effect(USE_KILL rax, KILL cr);
8698 
8699   ins_cost(300);
8700   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8701   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8702   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8703   ins_pipe(ialu_reg_reg_alu0);
8704 %}
8705 
8706 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8707                    rFlagsReg cr)
8708 %{
8709   match(Set rax (DivI rax div));
8710   effect(KILL rdx, KILL cr);
8711 
8712   ins_cost(30*100+10*100); // XXX
8713   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8714             "jne,s   normal\n\t"
8715             "xorl    rdx, rdx\n\t"
8716             "cmpl    $div, -1\n\t"
8717             "je,s    done\n"
8718     "normal: cdql\n\t"
8719             "idivl   $div\n"
8720     "done:"        %}
8721   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8722   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8723   ins_pipe(ialu_reg_reg_alu0);
8724 %}
8725 
8726 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8727                    rFlagsReg cr)
8728 %{
8729   match(Set rax (DivL rax div));
8730   effect(KILL rdx, KILL cr);
8731 
8732   ins_cost(30*100+10*100); // XXX
8733   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8734             "cmpq    rax, rdx\n\t"
8735             "jne,s   normal\n\t"
8736             "xorl    rdx, rdx\n\t"
8737             "cmpq    $div, -1\n\t"
8738             "je,s    done\n"
8739     "normal: cdqq\n\t"
8740             "idivq   $div\n"
8741     "done:"        %}
8742   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8743   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8744   ins_pipe(ialu_reg_reg_alu0);
8745 %}
8746 
8747 // Integer DIVMOD with Register, both quotient and mod results
8748 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8749                              rFlagsReg cr)
8750 %{
8751   match(DivModI rax div);
8752   effect(KILL cr);
8753 
8754   ins_cost(30*100+10*100); // XXX
8755   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8756             "jne,s   normal\n\t"
8757             "xorl    rdx, rdx\n\t"
8758             "cmpl    $div, -1\n\t"
8759             "je,s    done\n"
8760     "normal: cdql\n\t"
8761             "idivl   $div\n"
8762     "done:"        %}
8763   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8764   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8765   ins_pipe(pipe_slow);
8766 %}
8767 
8768 // Long DIVMOD with Register, both quotient and mod results
8769 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8770                              rFlagsReg cr)
8771 %{
8772   match(DivModL rax div);
8773   effect(KILL cr);
8774 
8775   ins_cost(30*100+10*100); // XXX
8776   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8777             "cmpq    rax, rdx\n\t"
8778             "jne,s   normal\n\t"
8779             "xorl    rdx, rdx\n\t"
8780             "cmpq    $div, -1\n\t"
8781             "je,s    done\n"
8782     "normal: cdqq\n\t"
8783             "idivq   $div\n"
8784     "done:"        %}
8785   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8786   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8787   ins_pipe(pipe_slow);
8788 %}
8789 
8790 //----------- DivL-By-Constant-Expansions--------------------------------------
8791 // DivI cases are handled by the compiler
8792 
8793 // Magic constant, reciprocal of 10
8794 instruct loadConL_0x6666666666666667(rRegL dst)
8795 %{
8796   effect(DEF dst);
8797 
8798   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8799   ins_encode(load_immL(dst, 0x6666666666666667));
8800   ins_pipe(ialu_reg);
8801 %}
8802 
8803 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8804 %{
8805   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8806 
8807   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8808   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8809   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8810   ins_pipe(ialu_reg_reg_alu0);
8811 %}
8812 
8813 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8814 %{
8815   effect(USE_DEF dst, KILL cr);
8816 
8817   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8818   opcode(0xC1, 0x7); /* C1 /7 ib */
8819   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8820   ins_pipe(ialu_reg);
8821 %}
8822 
8823 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8824 %{
8825   effect(USE_DEF dst, KILL cr);
8826 
8827   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8828   opcode(0xC1, 0x7); /* C1 /7 ib */
8829   ins_encode(reg_opc_imm_wide(dst, 0x2));
8830   ins_pipe(ialu_reg);
8831 %}
8832 
8833 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8834 %{
8835   match(Set dst (DivL src div));
8836 
8837   ins_cost((5+8)*100);
8838   expand %{
8839     rax_RegL rax;                     // Killed temp
8840     rFlagsReg cr;                     // Killed
8841     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8842     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8843     sarL_rReg_63(src, cr);            // sarq  src, 63
8844     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8845     subL_rReg(dst, src, cr);          // subl  rdx, src
8846   %}
8847 %}
8848 
8849 //-----------------------------------------------------------------------------
8850 
8851 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8852                    rFlagsReg cr)
8853 %{
8854   match(Set rdx (ModI rax div));
8855   effect(KILL rax, KILL cr);
8856 
8857   ins_cost(300); // XXX
8858   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8859             "jne,s   normal\n\t"
8860             "xorl    rdx, rdx\n\t"
8861             "cmpl    $div, -1\n\t"
8862             "je,s    done\n"
8863     "normal: cdql\n\t"
8864             "idivl   $div\n"
8865     "done:"        %}
8866   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8867   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8868   ins_pipe(ialu_reg_reg_alu0);
8869 %}
8870 
8871 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8872                    rFlagsReg cr)
8873 %{
8874   match(Set rdx (ModL rax div));
8875   effect(KILL rax, KILL cr);
8876 
8877   ins_cost(300); // XXX
8878   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8879             "cmpq    rax, rdx\n\t"
8880             "jne,s   normal\n\t"
8881             "xorl    rdx, rdx\n\t"
8882             "cmpq    $div, -1\n\t"
8883             "je,s    done\n"
8884     "normal: cdqq\n\t"
8885             "idivq   $div\n"
8886     "done:"        %}
8887   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8888   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8889   ins_pipe(ialu_reg_reg_alu0);
8890 %}
8891 
8892 // Integer Shift Instructions
8893 // Shift Left by one
8894 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8895 %{
8896   match(Set dst (LShiftI dst shift));
8897   effect(KILL cr);
8898 
8899   format %{ "sall    $dst, $shift" %}
8900   opcode(0xD1, 0x4); /* D1 /4 */
8901   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8902   ins_pipe(ialu_reg);
8903 %}
8904 
8905 // Shift Left by one
8906 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8907 %{
8908   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8909   effect(KILL cr);
8910 
8911   format %{ "sall    $dst, $shift\t" %}
8912   opcode(0xD1, 0x4); /* D1 /4 */
8913   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8914   ins_pipe(ialu_mem_imm);
8915 %}
8916 
8917 // Shift Left by 8-bit immediate
8918 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8919 %{
8920   match(Set dst (LShiftI dst shift));
8921   effect(KILL cr);
8922 
8923   format %{ "sall    $dst, $shift" %}
8924   opcode(0xC1, 0x4); /* C1 /4 ib */
8925   ins_encode(reg_opc_imm(dst, shift));
8926   ins_pipe(ialu_reg);
8927 %}
8928 
8929 // Shift Left by 8-bit immediate
8930 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8931 %{
8932   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8933   effect(KILL cr);
8934 
8935   format %{ "sall    $dst, $shift" %}
8936   opcode(0xC1, 0x4); /* C1 /4 ib */
8937   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8938   ins_pipe(ialu_mem_imm);
8939 %}
8940 
8941 // Shift Left by variable
8942 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8943 %{
8944   match(Set dst (LShiftI dst shift));
8945   effect(KILL cr);
8946 
8947   format %{ "sall    $dst, $shift" %}
8948   opcode(0xD3, 0x4); /* D3 /4 */
8949   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8950   ins_pipe(ialu_reg_reg);
8951 %}
8952 
8953 // Shift Left by variable
8954 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8955 %{
8956   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8957   effect(KILL cr);
8958 
8959   format %{ "sall    $dst, $shift" %}
8960   opcode(0xD3, 0x4); /* D3 /4 */
8961   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8962   ins_pipe(ialu_mem_reg);
8963 %}
8964 
8965 // Arithmetic shift right by one
8966 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8967 %{
8968   match(Set dst (RShiftI dst shift));
8969   effect(KILL cr);
8970 
8971   format %{ "sarl    $dst, $shift" %}
8972   opcode(0xD1, 0x7); /* D1 /7 */
8973   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8974   ins_pipe(ialu_reg);
8975 %}
8976 
8977 // Arithmetic shift right by one
8978 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8979 %{
8980   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8981   effect(KILL cr);
8982 
8983   format %{ "sarl    $dst, $shift" %}
8984   opcode(0xD1, 0x7); /* D1 /7 */
8985   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8986   ins_pipe(ialu_mem_imm);
8987 %}
8988 
8989 // Arithmetic Shift Right by 8-bit immediate
8990 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8991 %{
8992   match(Set dst (RShiftI dst shift));
8993   effect(KILL cr);
8994 
8995   format %{ "sarl    $dst, $shift" %}
8996   opcode(0xC1, 0x7); /* C1 /7 ib */
8997   ins_encode(reg_opc_imm(dst, shift));
8998   ins_pipe(ialu_mem_imm);
8999 %}
9000 
9001 // Arithmetic Shift Right by 8-bit immediate
9002 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9003 %{
9004   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9005   effect(KILL cr);
9006 
9007   format %{ "sarl    $dst, $shift" %}
9008   opcode(0xC1, 0x7); /* C1 /7 ib */
9009   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9010   ins_pipe(ialu_mem_imm);
9011 %}
9012 
9013 // Arithmetic Shift Right by variable
9014 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9015 %{
9016   match(Set dst (RShiftI dst shift));
9017   effect(KILL cr);
9018 
9019   format %{ "sarl    $dst, $shift" %}
9020   opcode(0xD3, 0x7); /* D3 /7 */
9021   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9022   ins_pipe(ialu_reg_reg);
9023 %}
9024 
9025 // Arithmetic Shift Right by variable
9026 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9027 %{
9028   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9029   effect(KILL cr);
9030 
9031   format %{ "sarl    $dst, $shift" %}
9032   opcode(0xD3, 0x7); /* D3 /7 */
9033   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9034   ins_pipe(ialu_mem_reg);
9035 %}
9036 
9037 // Logical shift right by one
9038 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9039 %{
9040   match(Set dst (URShiftI dst shift));
9041   effect(KILL cr);
9042 
9043   format %{ "shrl    $dst, $shift" %}
9044   opcode(0xD1, 0x5); /* D1 /5 */
9045   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9046   ins_pipe(ialu_reg);
9047 %}
9048 
9049 // Logical shift right by one
9050 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9051 %{
9052   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9053   effect(KILL cr);
9054 
9055   format %{ "shrl    $dst, $shift" %}
9056   opcode(0xD1, 0x5); /* D1 /5 */
9057   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9058   ins_pipe(ialu_mem_imm);
9059 %}
9060 
9061 // Logical Shift Right by 8-bit immediate
9062 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9063 %{
9064   match(Set dst (URShiftI dst shift));
9065   effect(KILL cr);
9066 
9067   format %{ "shrl    $dst, $shift" %}
9068   opcode(0xC1, 0x5); /* C1 /5 ib */
9069   ins_encode(reg_opc_imm(dst, shift));
9070   ins_pipe(ialu_reg);
9071 %}
9072 
9073 // Logical Shift Right by 8-bit immediate
9074 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9075 %{
9076   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9077   effect(KILL cr);
9078 
9079   format %{ "shrl    $dst, $shift" %}
9080   opcode(0xC1, 0x5); /* C1 /5 ib */
9081   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9082   ins_pipe(ialu_mem_imm);
9083 %}
9084 
9085 // Logical Shift Right by variable
9086 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9087 %{
9088   match(Set dst (URShiftI dst shift));
9089   effect(KILL cr);
9090 
9091   format %{ "shrl    $dst, $shift" %}
9092   opcode(0xD3, 0x5); /* D3 /5 */
9093   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9094   ins_pipe(ialu_reg_reg);
9095 %}
9096 
9097 // Logical Shift Right by variable
9098 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9099 %{
9100   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9101   effect(KILL cr);
9102 
9103   format %{ "shrl    $dst, $shift" %}
9104   opcode(0xD3, 0x5); /* D3 /5 */
9105   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9106   ins_pipe(ialu_mem_reg);
9107 %}
9108 
9109 // Long Shift Instructions
9110 // Shift Left by one
9111 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9112 %{
9113   match(Set dst (LShiftL dst shift));
9114   effect(KILL cr);
9115 
9116   format %{ "salq    $dst, $shift" %}
9117   opcode(0xD1, 0x4); /* D1 /4 */
9118   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 // Shift Left by one
9123 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9124 %{
9125   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9126   effect(KILL cr);
9127 
9128   format %{ "salq    $dst, $shift" %}
9129   opcode(0xD1, 0x4); /* D1 /4 */
9130   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9131   ins_pipe(ialu_mem_imm);
9132 %}
9133 
9134 // Shift Left by 8-bit immediate
9135 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9136 %{
9137   match(Set dst (LShiftL dst shift));
9138   effect(KILL cr);
9139 
9140   format %{ "salq    $dst, $shift" %}
9141   opcode(0xC1, 0x4); /* C1 /4 ib */
9142   ins_encode(reg_opc_imm_wide(dst, shift));
9143   ins_pipe(ialu_reg);
9144 %}
9145 
9146 // Shift Left by 8-bit immediate
9147 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9148 %{
9149   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9150   effect(KILL cr);
9151 
9152   format %{ "salq    $dst, $shift" %}
9153   opcode(0xC1, 0x4); /* C1 /4 ib */
9154   ins_encode(REX_mem_wide(dst), OpcP,
9155              RM_opc_mem(secondary, dst), Con8or32(shift));
9156   ins_pipe(ialu_mem_imm);
9157 %}
9158 
9159 // Shift Left by variable
9160 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9161 %{
9162   match(Set dst (LShiftL dst shift));
9163   effect(KILL cr);
9164 
9165   format %{ "salq    $dst, $shift" %}
9166   opcode(0xD3, 0x4); /* D3 /4 */
9167   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9168   ins_pipe(ialu_reg_reg);
9169 %}
9170 
9171 // Shift Left by variable
9172 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9173 %{
9174   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9175   effect(KILL cr);
9176 
9177   format %{ "salq    $dst, $shift" %}
9178   opcode(0xD3, 0x4); /* D3 /4 */
9179   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9180   ins_pipe(ialu_mem_reg);
9181 %}
9182 
9183 // Arithmetic shift right by one
9184 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9185 %{
9186   match(Set dst (RShiftL dst shift));
9187   effect(KILL cr);
9188 
9189   format %{ "sarq    $dst, $shift" %}
9190   opcode(0xD1, 0x7); /* D1 /7 */
9191   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9192   ins_pipe(ialu_reg);
9193 %}
9194 
9195 // Arithmetic shift right by one
9196 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9197 %{
9198   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9199   effect(KILL cr);
9200 
9201   format %{ "sarq    $dst, $shift" %}
9202   opcode(0xD1, 0x7); /* D1 /7 */
9203   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9204   ins_pipe(ialu_mem_imm);
9205 %}
9206 
9207 // Arithmetic Shift Right by 8-bit immediate
9208 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9209 %{
9210   match(Set dst (RShiftL dst shift));
9211   effect(KILL cr);
9212 
9213   format %{ "sarq    $dst, $shift" %}
9214   opcode(0xC1, 0x7); /* C1 /7 ib */
9215   ins_encode(reg_opc_imm_wide(dst, shift));
9216   ins_pipe(ialu_mem_imm);
9217 %}
9218 
9219 // Arithmetic Shift Right by 8-bit immediate
9220 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9221 %{
9222   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9223   effect(KILL cr);
9224 
9225   format %{ "sarq    $dst, $shift" %}
9226   opcode(0xC1, 0x7); /* C1 /7 ib */
9227   ins_encode(REX_mem_wide(dst), OpcP,
9228              RM_opc_mem(secondary, dst), Con8or32(shift));
9229   ins_pipe(ialu_mem_imm);
9230 %}
9231 
9232 // Arithmetic Shift Right by variable
9233 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9234 %{
9235   match(Set dst (RShiftL dst shift));
9236   effect(KILL cr);
9237 
9238   format %{ "sarq    $dst, $shift" %}
9239   opcode(0xD3, 0x7); /* D3 /7 */
9240   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9241   ins_pipe(ialu_reg_reg);
9242 %}
9243 
9244 // Arithmetic Shift Right by variable
9245 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9246 %{
9247   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9248   effect(KILL cr);
9249 
9250   format %{ "sarq    $dst, $shift" %}
9251   opcode(0xD3, 0x7); /* D3 /7 */
9252   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9253   ins_pipe(ialu_mem_reg);
9254 %}
9255 
9256 // Logical shift right by one
9257 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9258 %{
9259   match(Set dst (URShiftL dst shift));
9260   effect(KILL cr);
9261 
9262   format %{ "shrq    $dst, $shift" %}
9263   opcode(0xD1, 0x5); /* D1 /5 */
9264   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9265   ins_pipe(ialu_reg);
9266 %}
9267 
9268 // Logical shift right by one
9269 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9270 %{
9271   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9272   effect(KILL cr);
9273 
9274   format %{ "shrq    $dst, $shift" %}
9275   opcode(0xD1, 0x5); /* D1 /5 */
9276   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9277   ins_pipe(ialu_mem_imm);
9278 %}
9279 
9280 // Logical Shift Right by 8-bit immediate
9281 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9282 %{
9283   match(Set dst (URShiftL dst shift));
9284   effect(KILL cr);
9285 
9286   format %{ "shrq    $dst, $shift" %}
9287   opcode(0xC1, 0x5); /* C1 /5 ib */
9288   ins_encode(reg_opc_imm_wide(dst, shift));
9289   ins_pipe(ialu_reg);
9290 %}
9291 
9292 
9293 // Logical Shift Right by 8-bit immediate
9294 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9295 %{
9296   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9297   effect(KILL cr);
9298 
9299   format %{ "shrq    $dst, $shift" %}
9300   opcode(0xC1, 0x5); /* C1 /5 ib */
9301   ins_encode(REX_mem_wide(dst), OpcP,
9302              RM_opc_mem(secondary, dst), Con8or32(shift));
9303   ins_pipe(ialu_mem_imm);
9304 %}
9305 
9306 // Logical Shift Right by variable
9307 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9308 %{
9309   match(Set dst (URShiftL dst shift));
9310   effect(KILL cr);
9311 
9312   format %{ "shrq    $dst, $shift" %}
9313   opcode(0xD3, 0x5); /* D3 /5 */
9314   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9315   ins_pipe(ialu_reg_reg);
9316 %}
9317 
9318 // Logical Shift Right by variable
9319 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9320 %{
9321   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9322   effect(KILL cr);
9323 
9324   format %{ "shrq    $dst, $shift" %}
9325   opcode(0xD3, 0x5); /* D3 /5 */
9326   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9327   ins_pipe(ialu_mem_reg);
9328 %}
9329 
9330 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9331 // This idiom is used by the compiler for the i2b bytecode.
9332 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9333 %{
9334   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9335 
9336   format %{ "movsbl  $dst, $src\t# i2b" %}
9337   opcode(0x0F, 0xBE);
9338   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9339   ins_pipe(ialu_reg_reg);
9340 %}
9341 
9342 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9343 // This idiom is used by the compiler the i2s bytecode.
9344 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9345 %{
9346   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9347 
9348   format %{ "movswl  $dst, $src\t# i2s" %}
9349   opcode(0x0F, 0xBF);
9350   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9351   ins_pipe(ialu_reg_reg);
9352 %}
9353 
9354 // ROL/ROR instructions
9355 
9356 // ROL expand
9357 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9358   effect(KILL cr, USE_DEF dst);
9359 
9360   format %{ "roll    $dst" %}
9361   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9362   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9363   ins_pipe(ialu_reg);
9364 %}
9365 
9366 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9367   effect(USE_DEF dst, USE shift, KILL cr);
9368 
9369   format %{ "roll    $dst, $shift" %}
9370   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9371   ins_encode( reg_opc_imm(dst, shift) );
9372   ins_pipe(ialu_reg);
9373 %}
9374 
9375 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9376 %{
9377   effect(USE_DEF dst, USE shift, KILL cr);
9378 
9379   format %{ "roll    $dst, $shift" %}
9380   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9381   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9382   ins_pipe(ialu_reg_reg);
9383 %}
9384 // end of ROL expand
9385 
9386 // Rotate Left by one
9387 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9388 %{
9389   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9390 
9391   expand %{
9392     rolI_rReg_imm1(dst, cr);
9393   %}
9394 %}
9395 
9396 // Rotate Left by 8-bit immediate
9397 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9398 %{
9399   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9400   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9401 
9402   expand %{
9403     rolI_rReg_imm8(dst, lshift, cr);
9404   %}
9405 %}
9406 
9407 // Rotate Left by variable
9408 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9409 %{
9410   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9411 
9412   expand %{
9413     rolI_rReg_CL(dst, shift, cr);
9414   %}
9415 %}
9416 
9417 // Rotate Left by variable
9418 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9419 %{
9420   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9421 
9422   expand %{
9423     rolI_rReg_CL(dst, shift, cr);
9424   %}
9425 %}
9426 
9427 // ROR expand
9428 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9429 %{
9430   effect(USE_DEF dst, KILL cr);
9431 
9432   format %{ "rorl    $dst" %}
9433   opcode(0xD1, 0x1); /* D1 /1 */
9434   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9435   ins_pipe(ialu_reg);
9436 %}
9437 
9438 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9439 %{
9440   effect(USE_DEF dst, USE shift, KILL cr);
9441 
9442   format %{ "rorl    $dst, $shift" %}
9443   opcode(0xC1, 0x1); /* C1 /1 ib */
9444   ins_encode(reg_opc_imm(dst, shift));
9445   ins_pipe(ialu_reg);
9446 %}
9447 
9448 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9449 %{
9450   effect(USE_DEF dst, USE shift, KILL cr);
9451 
9452   format %{ "rorl    $dst, $shift" %}
9453   opcode(0xD3, 0x1); /* D3 /1 */
9454   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9455   ins_pipe(ialu_reg_reg);
9456 %}
9457 // end of ROR expand
9458 
9459 // Rotate Right by one
9460 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9461 %{
9462   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9463 
9464   expand %{
9465     rorI_rReg_imm1(dst, cr);
9466   %}
9467 %}
9468 
9469 // Rotate Right by 8-bit immediate
9470 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9471 %{
9472   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9473   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9474 
9475   expand %{
9476     rorI_rReg_imm8(dst, rshift, cr);
9477   %}
9478 %}
9479 
9480 // Rotate Right by variable
9481 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9482 %{
9483   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9484 
9485   expand %{
9486     rorI_rReg_CL(dst, shift, cr);
9487   %}
9488 %}
9489 
9490 // Rotate Right by variable
9491 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9492 %{
9493   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9494 
9495   expand %{
9496     rorI_rReg_CL(dst, shift, cr);
9497   %}
9498 %}
9499 
9500 // for long rotate
9501 // ROL expand
9502 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9503   effect(USE_DEF dst, KILL cr);
9504 
9505   format %{ "rolq    $dst" %}
9506   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9507   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9508   ins_pipe(ialu_reg);
9509 %}
9510 
9511 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9512   effect(USE_DEF dst, USE shift, KILL cr);
9513 
9514   format %{ "rolq    $dst, $shift" %}
9515   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9516   ins_encode( reg_opc_imm_wide(dst, shift) );
9517   ins_pipe(ialu_reg);
9518 %}
9519 
9520 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9521 %{
9522   effect(USE_DEF dst, USE shift, KILL cr);
9523 
9524   format %{ "rolq    $dst, $shift" %}
9525   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9526   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9527   ins_pipe(ialu_reg_reg);
9528 %}
9529 // end of ROL expand
9530 
9531 // Rotate Left by one
9532 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9533 %{
9534   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9535 
9536   expand %{
9537     rolL_rReg_imm1(dst, cr);
9538   %}
9539 %}
9540 
9541 // Rotate Left by 8-bit immediate
9542 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9543 %{
9544   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9545   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9546 
9547   expand %{
9548     rolL_rReg_imm8(dst, lshift, cr);
9549   %}
9550 %}
9551 
9552 // Rotate Left by variable
9553 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9554 %{
9555   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9556 
9557   expand %{
9558     rolL_rReg_CL(dst, shift, cr);
9559   %}
9560 %}
9561 
9562 // Rotate Left by variable
9563 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9564 %{
9565   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9566 
9567   expand %{
9568     rolL_rReg_CL(dst, shift, cr);
9569   %}
9570 %}
9571 
9572 // ROR expand
9573 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9574 %{
9575   effect(USE_DEF dst, KILL cr);
9576 
9577   format %{ "rorq    $dst" %}
9578   opcode(0xD1, 0x1); /* D1 /1 */
9579   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9580   ins_pipe(ialu_reg);
9581 %}
9582 
9583 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9584 %{
9585   effect(USE_DEF dst, USE shift, KILL cr);
9586 
9587   format %{ "rorq    $dst, $shift" %}
9588   opcode(0xC1, 0x1); /* C1 /1 ib */
9589   ins_encode(reg_opc_imm_wide(dst, shift));
9590   ins_pipe(ialu_reg);
9591 %}
9592 
9593 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9594 %{
9595   effect(USE_DEF dst, USE shift, KILL cr);
9596 
9597   format %{ "rorq    $dst, $shift" %}
9598   opcode(0xD3, 0x1); /* D3 /1 */
9599   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9600   ins_pipe(ialu_reg_reg);
9601 %}
9602 // end of ROR expand
9603 
9604 // Rotate Right by one
9605 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9606 %{
9607   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9608 
9609   expand %{
9610     rorL_rReg_imm1(dst, cr);
9611   %}
9612 %}
9613 
9614 // Rotate Right by 8-bit immediate
9615 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9616 %{
9617   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9618   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9619 
9620   expand %{
9621     rorL_rReg_imm8(dst, rshift, cr);
9622   %}
9623 %}
9624 
9625 // Rotate Right by variable
9626 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9627 %{
9628   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9629 
9630   expand %{
9631     rorL_rReg_CL(dst, shift, cr);
9632   %}
9633 %}
9634 
9635 // Rotate Right by variable
9636 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9637 %{
9638   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9639 
9640   expand %{
9641     rorL_rReg_CL(dst, shift, cr);
9642   %}
9643 %}
9644 
9645 // Logical Instructions
9646 
9647 // Integer Logical Instructions
9648 
9649 // And Instructions
9650 // And Register with Register
9651 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9652 %{
9653   match(Set dst (AndI dst src));
9654   effect(KILL cr);
9655 
9656   format %{ "andl    $dst, $src\t# int" %}
9657   opcode(0x23);
9658   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9659   ins_pipe(ialu_reg_reg);
9660 %}
9661 
9662 // And Register with Immediate 255
9663 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9664 %{
9665   match(Set dst (AndI dst src));
9666 
9667   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9668   opcode(0x0F, 0xB6);
9669   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9670   ins_pipe(ialu_reg);
9671 %}
9672 
9673 // And Register with Immediate 255 and promote to long
9674 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9675 %{
9676   match(Set dst (ConvI2L (AndI src mask)));
9677 
9678   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9679   opcode(0x0F, 0xB6);
9680   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9681   ins_pipe(ialu_reg);
9682 %}
9683 
9684 // And Register with Immediate 65535
9685 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9686 %{
9687   match(Set dst (AndI dst src));
9688 
9689   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9690   opcode(0x0F, 0xB7);
9691   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9692   ins_pipe(ialu_reg);
9693 %}
9694 
9695 // And Register with Immediate 65535 and promote to long
9696 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9697 %{
9698   match(Set dst (ConvI2L (AndI src mask)));
9699 
9700   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9701   opcode(0x0F, 0xB7);
9702   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9703   ins_pipe(ialu_reg);
9704 %}
9705 
9706 // And Register with Immediate
9707 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9708 %{
9709   match(Set dst (AndI dst src));
9710   effect(KILL cr);
9711 
9712   format %{ "andl    $dst, $src\t# int" %}
9713   opcode(0x81, 0x04); /* Opcode 81 /4 */
9714   ins_encode(OpcSErm(dst, src), Con8or32(src));
9715   ins_pipe(ialu_reg);
9716 %}
9717 
9718 // And Register with Memory
9719 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9720 %{
9721   match(Set dst (AndI dst (LoadI src)));
9722   effect(KILL cr);
9723 
9724   ins_cost(125);
9725   format %{ "andl    $dst, $src\t# int" %}
9726   opcode(0x23);
9727   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9728   ins_pipe(ialu_reg_mem);
9729 %}
9730 
9731 // And Memory with Register
9732 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9733 %{
9734   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9735   effect(KILL cr);
9736 
9737   ins_cost(150);
9738   format %{ "andl    $dst, $src\t# int" %}
9739   opcode(0x21); /* Opcode 21 /r */
9740   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9741   ins_pipe(ialu_mem_reg);
9742 %}
9743 
9744 // And Memory with Immediate
9745 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9746 %{
9747   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9748   effect(KILL cr);
9749 
9750   ins_cost(125);
9751   format %{ "andl    $dst, $src\t# int" %}
9752   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9753   ins_encode(REX_mem(dst), OpcSE(src),
9754              RM_opc_mem(secondary, dst), Con8or32(src));
9755   ins_pipe(ialu_mem_imm);
9756 %}
9757 
9758 // Or Instructions
9759 // Or Register with Register
9760 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9761 %{
9762   match(Set dst (OrI dst src));
9763   effect(KILL cr);
9764 
9765   format %{ "orl     $dst, $src\t# int" %}
9766   opcode(0x0B);
9767   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9768   ins_pipe(ialu_reg_reg);
9769 %}
9770 
9771 // Or Register with Immediate
9772 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9773 %{
9774   match(Set dst (OrI dst src));
9775   effect(KILL cr);
9776 
9777   format %{ "orl     $dst, $src\t# int" %}
9778   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9779   ins_encode(OpcSErm(dst, src), Con8or32(src));
9780   ins_pipe(ialu_reg);
9781 %}
9782 
9783 // Or Register with Memory
9784 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9785 %{
9786   match(Set dst (OrI dst (LoadI src)));
9787   effect(KILL cr);
9788 
9789   ins_cost(125);
9790   format %{ "orl     $dst, $src\t# int" %}
9791   opcode(0x0B);
9792   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9793   ins_pipe(ialu_reg_mem);
9794 %}
9795 
9796 // Or Memory with Register
9797 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9798 %{
9799   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9800   effect(KILL cr);
9801 
9802   ins_cost(150);
9803   format %{ "orl     $dst, $src\t# int" %}
9804   opcode(0x09); /* Opcode 09 /r */
9805   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9806   ins_pipe(ialu_mem_reg);
9807 %}
9808 
9809 // Or Memory with Immediate
9810 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9811 %{
9812   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9813   effect(KILL cr);
9814 
9815   ins_cost(125);
9816   format %{ "orl     $dst, $src\t# int" %}
9817   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9818   ins_encode(REX_mem(dst), OpcSE(src),
9819              RM_opc_mem(secondary, dst), Con8or32(src));
9820   ins_pipe(ialu_mem_imm);
9821 %}
9822 
9823 // Xor Instructions
9824 // Xor Register with Register
9825 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9826 %{
9827   match(Set dst (XorI dst src));
9828   effect(KILL cr);
9829 
9830   format %{ "xorl    $dst, $src\t# int" %}
9831   opcode(0x33);
9832   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9833   ins_pipe(ialu_reg_reg);
9834 %}
9835 
9836 // Xor Register with Immediate -1
9837 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9838   match(Set dst (XorI dst imm));
9839 
9840   format %{ "not    $dst" %}
9841   ins_encode %{
9842      __ notl($dst$$Register);
9843   %}
9844   ins_pipe(ialu_reg);
9845 %}
9846 
9847 // Xor Register with Immediate
9848 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9849 %{
9850   match(Set dst (XorI dst src));
9851   effect(KILL cr);
9852 
9853   format %{ "xorl    $dst, $src\t# int" %}
9854   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9855   ins_encode(OpcSErm(dst, src), Con8or32(src));
9856   ins_pipe(ialu_reg);
9857 %}
9858 
9859 // Xor Register with Memory
9860 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9861 %{
9862   match(Set dst (XorI dst (LoadI src)));
9863   effect(KILL cr);
9864 
9865   ins_cost(125);
9866   format %{ "xorl    $dst, $src\t# int" %}
9867   opcode(0x33);
9868   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9869   ins_pipe(ialu_reg_mem);
9870 %}
9871 
9872 // Xor Memory with Register
9873 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9874 %{
9875   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9876   effect(KILL cr);
9877 
9878   ins_cost(150);
9879   format %{ "xorl    $dst, $src\t# int" %}
9880   opcode(0x31); /* Opcode 31 /r */
9881   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9882   ins_pipe(ialu_mem_reg);
9883 %}
9884 
9885 // Xor Memory with Immediate
9886 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9887 %{
9888   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9889   effect(KILL cr);
9890 
9891   ins_cost(125);
9892   format %{ "xorl    $dst, $src\t# int" %}
9893   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9894   ins_encode(REX_mem(dst), OpcSE(src),
9895              RM_opc_mem(secondary, dst), Con8or32(src));
9896   ins_pipe(ialu_mem_imm);
9897 %}
9898 
9899 
9900 // Long Logical Instructions
9901 
9902 // And Instructions
9903 // And Register with Register
9904 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9905 %{
9906   match(Set dst (AndL dst src));
9907   effect(KILL cr);
9908 
9909   format %{ "andq    $dst, $src\t# long" %}
9910   opcode(0x23);
9911   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9912   ins_pipe(ialu_reg_reg);
9913 %}
9914 
9915 // And Register with Immediate 255
9916 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9917 %{
9918   match(Set dst (AndL dst src));
9919 
9920   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9921   opcode(0x0F, 0xB6);
9922   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9923   ins_pipe(ialu_reg);
9924 %}
9925 
9926 // And Register with Immediate 65535
9927 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9928 %{
9929   match(Set dst (AndL dst src));
9930 
9931   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9932   opcode(0x0F, 0xB7);
9933   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9934   ins_pipe(ialu_reg);
9935 %}
9936 
9937 // And Register with Immediate
9938 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9939 %{
9940   match(Set dst (AndL dst src));
9941   effect(KILL cr);
9942 
9943   format %{ "andq    $dst, $src\t# long" %}
9944   opcode(0x81, 0x04); /* Opcode 81 /4 */
9945   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9946   ins_pipe(ialu_reg);
9947 %}
9948 
9949 // And Register with Memory
9950 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9951 %{
9952   match(Set dst (AndL dst (LoadL src)));
9953   effect(KILL cr);
9954 
9955   ins_cost(125);
9956   format %{ "andq    $dst, $src\t# long" %}
9957   opcode(0x23);
9958   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9959   ins_pipe(ialu_reg_mem);
9960 %}
9961 
9962 // And Memory with Register
9963 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9964 %{
9965   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9966   effect(KILL cr);
9967 
9968   ins_cost(150);
9969   format %{ "andq    $dst, $src\t# long" %}
9970   opcode(0x21); /* Opcode 21 /r */
9971   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9972   ins_pipe(ialu_mem_reg);
9973 %}
9974 
9975 // And Memory with Immediate
9976 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9977 %{
9978   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9979   effect(KILL cr);
9980 
9981   ins_cost(125);
9982   format %{ "andq    $dst, $src\t# long" %}
9983   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9984   ins_encode(REX_mem_wide(dst), OpcSE(src),
9985              RM_opc_mem(secondary, dst), Con8or32(src));
9986   ins_pipe(ialu_mem_imm);
9987 %}
9988 
9989 // Or Instructions
9990 // Or Register with Register
9991 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9992 %{
9993   match(Set dst (OrL dst src));
9994   effect(KILL cr);
9995 
9996   format %{ "orq     $dst, $src\t# long" %}
9997   opcode(0x0B);
9998   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9999   ins_pipe(ialu_reg_reg);
10000 %}
10001 
10002 // Use any_RegP to match R15 (TLS register) without spilling.
10003 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10004   match(Set dst (OrL dst (CastP2X src)));
10005   effect(KILL cr);
10006 
10007   format %{ "orq     $dst, $src\t# long" %}
10008   opcode(0x0B);
10009   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10010   ins_pipe(ialu_reg_reg);
10011 %}
10012 
10013 
10014 // Or Register with Immediate
10015 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10016 %{
10017   match(Set dst (OrL dst src));
10018   effect(KILL cr);
10019 
10020   format %{ "orq     $dst, $src\t# long" %}
10021   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10022   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10023   ins_pipe(ialu_reg);
10024 %}
10025 
10026 // Or Register with Memory
10027 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10028 %{
10029   match(Set dst (OrL dst (LoadL src)));
10030   effect(KILL cr);
10031 
10032   ins_cost(125);
10033   format %{ "orq     $dst, $src\t# long" %}
10034   opcode(0x0B);
10035   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10036   ins_pipe(ialu_reg_mem);
10037 %}
10038 
10039 // Or Memory with Register
10040 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10041 %{
10042   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10043   effect(KILL cr);
10044 
10045   ins_cost(150);
10046   format %{ "orq     $dst, $src\t# long" %}
10047   opcode(0x09); /* Opcode 09 /r */
10048   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10049   ins_pipe(ialu_mem_reg);
10050 %}
10051 
10052 // Or Memory with Immediate
10053 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10054 %{
10055   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10056   effect(KILL cr);
10057 
10058   ins_cost(125);
10059   format %{ "orq     $dst, $src\t# long" %}
10060   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10061   ins_encode(REX_mem_wide(dst), OpcSE(src),
10062              RM_opc_mem(secondary, dst), Con8or32(src));
10063   ins_pipe(ialu_mem_imm);
10064 %}
10065 
10066 // Xor Instructions
10067 // Xor Register with Register
10068 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10069 %{
10070   match(Set dst (XorL dst src));
10071   effect(KILL cr);
10072 
10073   format %{ "xorq    $dst, $src\t# long" %}
10074   opcode(0x33);
10075   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10076   ins_pipe(ialu_reg_reg);
10077 %}
10078 
10079 // Xor Register with Immediate -1
10080 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10081   match(Set dst (XorL dst imm));
10082 
10083   format %{ "notq   $dst" %}
10084   ins_encode %{
10085      __ notq($dst$$Register);
10086   %}
10087   ins_pipe(ialu_reg);
10088 %}
10089 
10090 // Xor Register with Immediate
10091 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10092 %{
10093   match(Set dst (XorL dst src));
10094   effect(KILL cr);
10095 
10096   format %{ "xorq    $dst, $src\t# long" %}
10097   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10098   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10099   ins_pipe(ialu_reg);
10100 %}
10101 
10102 // Xor Register with Memory
10103 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10104 %{
10105   match(Set dst (XorL dst (LoadL src)));
10106   effect(KILL cr);
10107 
10108   ins_cost(125);
10109   format %{ "xorq    $dst, $src\t# long" %}
10110   opcode(0x33);
10111   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10112   ins_pipe(ialu_reg_mem);
10113 %}
10114 
10115 // Xor Memory with Register
10116 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10117 %{
10118   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10119   effect(KILL cr);
10120 
10121   ins_cost(150);
10122   format %{ "xorq    $dst, $src\t# long" %}
10123   opcode(0x31); /* Opcode 31 /r */
10124   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10125   ins_pipe(ialu_mem_reg);
10126 %}
10127 
10128 // Xor Memory with Immediate
10129 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10130 %{
10131   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10132   effect(KILL cr);
10133 
10134   ins_cost(125);
10135   format %{ "xorq    $dst, $src\t# long" %}
10136   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10137   ins_encode(REX_mem_wide(dst), OpcSE(src),
10138              RM_opc_mem(secondary, dst), Con8or32(src));
10139   ins_pipe(ialu_mem_imm);
10140 %}
10141 
10142 // Convert Int to Boolean
10143 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10144 %{
10145   match(Set dst (Conv2B src));
10146   effect(KILL cr);
10147 
10148   format %{ "testl   $src, $src\t# ci2b\n\t"
10149             "setnz   $dst\n\t"
10150             "movzbl  $dst, $dst" %}
10151   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10152              setNZ_reg(dst),
10153              REX_reg_breg(dst, dst), // movzbl
10154              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10155   ins_pipe(pipe_slow); // XXX
10156 %}
10157 
10158 // Convert Pointer to Boolean
10159 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10160 %{
10161   match(Set dst (Conv2B src));
10162   effect(KILL cr);
10163 
10164   format %{ "testq   $src, $src\t# cp2b\n\t"
10165             "setnz   $dst\n\t"
10166             "movzbl  $dst, $dst" %}
10167   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10168              setNZ_reg(dst),
10169              REX_reg_breg(dst, dst), // movzbl
10170              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10171   ins_pipe(pipe_slow); // XXX
10172 %}
10173 
10174 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10175 %{
10176   match(Set dst (CmpLTMask p q));
10177   effect(KILL cr);
10178 
10179   ins_cost(400); // XXX
10180   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10181             "setlt   $dst\n\t"
10182             "movzbl  $dst, $dst\n\t"
10183             "negl    $dst" %}
10184   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10185              setLT_reg(dst),
10186              REX_reg_breg(dst, dst), // movzbl
10187              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10188              neg_reg(dst));
10189   ins_pipe(pipe_slow);
10190 %}
10191 
10192 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10193 %{
10194   match(Set dst (CmpLTMask dst zero));
10195   effect(KILL cr);
10196 
10197   ins_cost(100); // XXX
10198   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10199   opcode(0xC1, 0x7);  /* C1 /7 ib */
10200   ins_encode(reg_opc_imm(dst, 0x1F));
10201   ins_pipe(ialu_reg);
10202 %}
10203 
10204 
10205 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10206                          rRegI tmp,
10207                          rFlagsReg cr)
10208 %{
10209   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10210   effect(TEMP tmp, KILL cr);
10211 
10212   ins_cost(400); // XXX
10213   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10214             "sbbl    $tmp, $tmp\n\t"
10215             "andl    $tmp, $y\n\t"
10216             "addl    $p, $tmp" %}
10217   ins_encode(enc_cmpLTP(p, q, y, tmp));
10218   ins_pipe(pipe_cmplt);
10219 %}
10220 
10221 /* If I enable this, I encourage spilling in the inner loop of compress.
10222 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10223 %{
10224   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10225   effect( TEMP tmp, KILL cr );
10226   ins_cost(400);
10227 
10228   format %{ "SUB    $p,$q\n\t"
10229             "SBB    RCX,RCX\n\t"
10230             "AND    RCX,$y\n\t"
10231             "ADD    $p,RCX" %}
10232   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10233 %}
10234 */
10235 
10236 //---------- FP Instructions------------------------------------------------
10237 
10238 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10239 %{
10240   match(Set cr (CmpF src1 src2));
10241 
10242   ins_cost(145);
10243   format %{ "ucomiss $src1, $src2\n\t"
10244             "jnp,s   exit\n\t"
10245             "pushfq\t# saw NaN, set CF\n\t"
10246             "andq    [rsp], #0xffffff2b\n\t"
10247             "popfq\n"
10248     "exit:   nop\t# avoid branch to branch" %}
10249   opcode(0x0F, 0x2E);
10250   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10251              cmpfp_fixup);
10252   ins_pipe(pipe_slow);
10253 %}
10254 
10255 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10256   match(Set cr (CmpF src1 src2));
10257 
10258   ins_cost(145);
10259   format %{ "ucomiss $src1, $src2" %}
10260   ins_encode %{
10261     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10262   %}
10263   ins_pipe(pipe_slow);
10264 %}
10265 
10266 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10267 %{
10268   match(Set cr (CmpF src1 (LoadF src2)));
10269 
10270   ins_cost(145);
10271   format %{ "ucomiss $src1, $src2\n\t"
10272             "jnp,s   exit\n\t"
10273             "pushfq\t# saw NaN, set CF\n\t"
10274             "andq    [rsp], #0xffffff2b\n\t"
10275             "popfq\n"
10276     "exit:   nop\t# avoid branch to branch" %}
10277   opcode(0x0F, 0x2E);
10278   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10279              cmpfp_fixup);
10280   ins_pipe(pipe_slow);
10281 %}
10282 
10283 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10284   match(Set cr (CmpF src1 (LoadF src2)));
10285 
10286   ins_cost(100);
10287   format %{ "ucomiss $src1, $src2" %}
10288   opcode(0x0F, 0x2E);
10289   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10290   ins_pipe(pipe_slow);
10291 %}
10292 
10293 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10294   match(Set cr (CmpF src con));
10295 
10296   ins_cost(145);
10297   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10298             "jnp,s   exit\n\t"
10299             "pushfq\t# saw NaN, set CF\n\t"
10300             "andq    [rsp], #0xffffff2b\n\t"
10301             "popfq\n"
10302     "exit:   nop\t# avoid branch to branch" %}
10303   ins_encode %{
10304     Label L_exit;
10305     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10306     __ jcc(Assembler::noParity, L_exit);
10307     __ pushf();
10308     __ andq(rsp, 0xffffff2b);
10309     __ popf();
10310     __ bind(L_exit);
10311     __ nop();
10312   %}
10313   ins_pipe(pipe_slow);
10314 %}
10315 
10316 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10317   match(Set cr (CmpF src con));
10318   ins_cost(100);
10319   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10320   ins_encode %{
10321     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10322   %}
10323   ins_pipe(pipe_slow);
10324 %}
10325 
10326 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10327 %{
10328   match(Set cr (CmpD src1 src2));
10329 
10330   ins_cost(145);
10331   format %{ "ucomisd $src1, $src2\n\t"
10332             "jnp,s   exit\n\t"
10333             "pushfq\t# saw NaN, set CF\n\t"
10334             "andq    [rsp], #0xffffff2b\n\t"
10335             "popfq\n"
10336     "exit:   nop\t# avoid branch to branch" %}
10337   opcode(0x66, 0x0F, 0x2E);
10338   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10339              cmpfp_fixup);
10340   ins_pipe(pipe_slow);
10341 %}
10342 
10343 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10344   match(Set cr (CmpD src1 src2));
10345 
10346   ins_cost(100);
10347   format %{ "ucomisd $src1, $src2 test" %}
10348   ins_encode %{
10349     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10350   %}
10351   ins_pipe(pipe_slow);
10352 %}
10353 
10354 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10355 %{
10356   match(Set cr (CmpD src1 (LoadD src2)));
10357 
10358   ins_cost(145);
10359   format %{ "ucomisd $src1, $src2\n\t"
10360             "jnp,s   exit\n\t"
10361             "pushfq\t# saw NaN, set CF\n\t"
10362             "andq    [rsp], #0xffffff2b\n\t"
10363             "popfq\n"
10364     "exit:   nop\t# avoid branch to branch" %}
10365   opcode(0x66, 0x0F, 0x2E);
10366   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10367              cmpfp_fixup);
10368   ins_pipe(pipe_slow);
10369 %}
10370 
10371 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10372   match(Set cr (CmpD src1 (LoadD src2)));
10373 
10374   ins_cost(100);
10375   format %{ "ucomisd $src1, $src2" %}
10376   opcode(0x66, 0x0F, 0x2E);
10377   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10378   ins_pipe(pipe_slow);
10379 %}
10380 
10381 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10382   match(Set cr (CmpD src con));
10383 
10384   ins_cost(145);
10385   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10386             "jnp,s   exit\n\t"
10387             "pushfq\t# saw NaN, set CF\n\t"
10388             "andq    [rsp], #0xffffff2b\n\t"
10389             "popfq\n"
10390     "exit:   nop\t# avoid branch to branch" %}
10391   ins_encode %{
10392     Label L_exit;
10393     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10394     __ jcc(Assembler::noParity, L_exit);
10395     __ pushf();
10396     __ andq(rsp, 0xffffff2b);
10397     __ popf();
10398     __ bind(L_exit);
10399     __ nop();
10400   %}
10401   ins_pipe(pipe_slow);
10402 %}
10403 
10404 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10405   match(Set cr (CmpD src con));
10406   ins_cost(100);
10407   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10408   ins_encode %{
10409     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10410   %}
10411   ins_pipe(pipe_slow);
10412 %}
10413 
10414 // Compare into -1,0,1
10415 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10416 %{
10417   match(Set dst (CmpF3 src1 src2));
10418   effect(KILL cr);
10419 
10420   ins_cost(275);
10421   format %{ "ucomiss $src1, $src2\n\t"
10422             "movl    $dst, #-1\n\t"
10423             "jp,s    done\n\t"
10424             "jb,s    done\n\t"
10425             "setne   $dst\n\t"
10426             "movzbl  $dst, $dst\n"
10427     "done:" %}
10428 
10429   opcode(0x0F, 0x2E);
10430   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10431              cmpfp3(dst));
10432   ins_pipe(pipe_slow);
10433 %}
10434 
10435 // Compare into -1,0,1
10436 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10437 %{
10438   match(Set dst (CmpF3 src1 (LoadF src2)));
10439   effect(KILL cr);
10440 
10441   ins_cost(275);
10442   format %{ "ucomiss $src1, $src2\n\t"
10443             "movl    $dst, #-1\n\t"
10444             "jp,s    done\n\t"
10445             "jb,s    done\n\t"
10446             "setne   $dst\n\t"
10447             "movzbl  $dst, $dst\n"
10448     "done:" %}
10449 
10450   opcode(0x0F, 0x2E);
10451   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10452              cmpfp3(dst));
10453   ins_pipe(pipe_slow);
10454 %}
10455 
10456 // Compare into -1,0,1
10457 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10458   match(Set dst (CmpF3 src con));
10459   effect(KILL cr);
10460 
10461   ins_cost(275);
10462   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10463             "movl    $dst, #-1\n\t"
10464             "jp,s    done\n\t"
10465             "jb,s    done\n\t"
10466             "setne   $dst\n\t"
10467             "movzbl  $dst, $dst\n"
10468     "done:" %}
10469   ins_encode %{
10470     Label L_done;
10471     Register Rdst = $dst$$Register;
10472     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10473     __ movl(Rdst, -1);
10474     __ jcc(Assembler::parity, L_done);
10475     __ jcc(Assembler::below, L_done);
10476     __ setb(Assembler::notEqual, Rdst);
10477     __ movzbl(Rdst, Rdst);
10478     __ bind(L_done);
10479   %}
10480   ins_pipe(pipe_slow);
10481 %}
10482 
10483 // Compare into -1,0,1
10484 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10485 %{
10486   match(Set dst (CmpD3 src1 src2));
10487   effect(KILL cr);
10488 
10489   ins_cost(275);
10490   format %{ "ucomisd $src1, $src2\n\t"
10491             "movl    $dst, #-1\n\t"
10492             "jp,s    done\n\t"
10493             "jb,s    done\n\t"
10494             "setne   $dst\n\t"
10495             "movzbl  $dst, $dst\n"
10496     "done:" %}
10497 
10498   opcode(0x66, 0x0F, 0x2E);
10499   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10500              cmpfp3(dst));
10501   ins_pipe(pipe_slow);
10502 %}
10503 
10504 // Compare into -1,0,1
10505 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10506 %{
10507   match(Set dst (CmpD3 src1 (LoadD src2)));
10508   effect(KILL cr);
10509 
10510   ins_cost(275);
10511   format %{ "ucomisd $src1, $src2\n\t"
10512             "movl    $dst, #-1\n\t"
10513             "jp,s    done\n\t"
10514             "jb,s    done\n\t"
10515             "setne   $dst\n\t"
10516             "movzbl  $dst, $dst\n"
10517     "done:" %}
10518 
10519   opcode(0x66, 0x0F, 0x2E);
10520   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10521              cmpfp3(dst));
10522   ins_pipe(pipe_slow);
10523 %}
10524 
10525 // Compare into -1,0,1
10526 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10527   match(Set dst (CmpD3 src con));
10528   effect(KILL cr);
10529 
10530   ins_cost(275);
10531   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10532             "movl    $dst, #-1\n\t"
10533             "jp,s    done\n\t"
10534             "jb,s    done\n\t"
10535             "setne   $dst\n\t"
10536             "movzbl  $dst, $dst\n"
10537     "done:" %}
10538   ins_encode %{
10539     Register Rdst = $dst$$Register;
10540     Label L_done;
10541     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10542     __ movl(Rdst, -1);
10543     __ jcc(Assembler::parity, L_done);
10544     __ jcc(Assembler::below, L_done);
10545     __ setb(Assembler::notEqual, Rdst);
10546     __ movzbl(Rdst, Rdst);
10547     __ bind(L_done);
10548   %}
10549   ins_pipe(pipe_slow);
10550 %}
10551 
10552 instruct addF_reg(regF dst, regF src)
10553 %{
10554   match(Set dst (AddF dst src));
10555 
10556   format %{ "addss   $dst, $src" %}
10557   ins_cost(150); // XXX
10558   opcode(0xF3, 0x0F, 0x58);
10559   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10560   ins_pipe(pipe_slow);
10561 %}
10562 
10563 instruct addF_mem(regF dst, memory src)
10564 %{
10565   match(Set dst (AddF dst (LoadF src)));
10566 
10567   format %{ "addss   $dst, $src" %}
10568   ins_cost(150); // XXX
10569   opcode(0xF3, 0x0F, 0x58);
10570   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10571   ins_pipe(pipe_slow);
10572 %}
10573 
10574 instruct addF_imm(regF dst, immF con) %{
10575   match(Set dst (AddF dst con));
10576   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10577   ins_cost(150); // XXX
10578   ins_encode %{
10579     __ addss($dst$$XMMRegister, $constantaddress($con));
10580   %}
10581   ins_pipe(pipe_slow);
10582 %}
10583 
10584 instruct addD_reg(regD dst, regD src)
10585 %{
10586   match(Set dst (AddD dst src));
10587 
10588   format %{ "addsd   $dst, $src" %}
10589   ins_cost(150); // XXX
10590   opcode(0xF2, 0x0F, 0x58);
10591   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10592   ins_pipe(pipe_slow);
10593 %}
10594 
10595 instruct addD_mem(regD dst, memory src)
10596 %{
10597   match(Set dst (AddD dst (LoadD src)));
10598 
10599   format %{ "addsd   $dst, $src" %}
10600   ins_cost(150); // XXX
10601   opcode(0xF2, 0x0F, 0x58);
10602   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10603   ins_pipe(pipe_slow);
10604 %}
10605 
10606 instruct addD_imm(regD dst, immD con) %{
10607   match(Set dst (AddD dst con));
10608   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10609   ins_cost(150); // XXX
10610   ins_encode %{
10611     __ addsd($dst$$XMMRegister, $constantaddress($con));
10612   %}
10613   ins_pipe(pipe_slow);
10614 %}
10615 
10616 instruct subF_reg(regF dst, regF src)
10617 %{
10618   match(Set dst (SubF dst src));
10619 
10620   format %{ "subss   $dst, $src" %}
10621   ins_cost(150); // XXX
10622   opcode(0xF3, 0x0F, 0x5C);
10623   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10624   ins_pipe(pipe_slow);
10625 %}
10626 
10627 instruct subF_mem(regF dst, memory src)
10628 %{
10629   match(Set dst (SubF dst (LoadF src)));
10630 
10631   format %{ "subss   $dst, $src" %}
10632   ins_cost(150); // XXX
10633   opcode(0xF3, 0x0F, 0x5C);
10634   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10635   ins_pipe(pipe_slow);
10636 %}
10637 
10638 instruct subF_imm(regF dst, immF con) %{
10639   match(Set dst (SubF dst con));
10640   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10641   ins_cost(150); // XXX
10642   ins_encode %{
10643     __ subss($dst$$XMMRegister, $constantaddress($con));
10644   %}
10645   ins_pipe(pipe_slow);
10646 %}
10647 
10648 instruct subD_reg(regD dst, regD src)
10649 %{
10650   match(Set dst (SubD dst src));
10651 
10652   format %{ "subsd   $dst, $src" %}
10653   ins_cost(150); // XXX
10654   opcode(0xF2, 0x0F, 0x5C);
10655   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10656   ins_pipe(pipe_slow);
10657 %}
10658 
10659 instruct subD_mem(regD dst, memory src)
10660 %{
10661   match(Set dst (SubD dst (LoadD src)));
10662 
10663   format %{ "subsd   $dst, $src" %}
10664   ins_cost(150); // XXX
10665   opcode(0xF2, 0x0F, 0x5C);
10666   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10667   ins_pipe(pipe_slow);
10668 %}
10669 
10670 instruct subD_imm(regD dst, immD con) %{
10671   match(Set dst (SubD dst con));
10672   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10673   ins_cost(150); // XXX
10674   ins_encode %{
10675     __ subsd($dst$$XMMRegister, $constantaddress($con));
10676   %}
10677   ins_pipe(pipe_slow);
10678 %}
10679 
10680 instruct mulF_reg(regF dst, regF src)
10681 %{
10682   match(Set dst (MulF dst src));
10683 
10684   format %{ "mulss   $dst, $src" %}
10685   ins_cost(150); // XXX
10686   opcode(0xF3, 0x0F, 0x59);
10687   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10688   ins_pipe(pipe_slow);
10689 %}
10690 
10691 instruct mulF_mem(regF dst, memory src)
10692 %{
10693   match(Set dst (MulF dst (LoadF src)));
10694 
10695   format %{ "mulss   $dst, $src" %}
10696   ins_cost(150); // XXX
10697   opcode(0xF3, 0x0F, 0x59);
10698   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10699   ins_pipe(pipe_slow);
10700 %}
10701 
10702 instruct mulF_imm(regF dst, immF con) %{
10703   match(Set dst (MulF dst con));
10704   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10705   ins_cost(150); // XXX
10706   ins_encode %{
10707     __ mulss($dst$$XMMRegister, $constantaddress($con));
10708   %}
10709   ins_pipe(pipe_slow);
10710 %}
10711 
10712 instruct mulD_reg(regD dst, regD src)
10713 %{
10714   match(Set dst (MulD dst src));
10715 
10716   format %{ "mulsd   $dst, $src" %}
10717   ins_cost(150); // XXX
10718   opcode(0xF2, 0x0F, 0x59);
10719   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10720   ins_pipe(pipe_slow);
10721 %}
10722 
10723 instruct mulD_mem(regD dst, memory src)
10724 %{
10725   match(Set dst (MulD dst (LoadD src)));
10726 
10727   format %{ "mulsd   $dst, $src" %}
10728   ins_cost(150); // XXX
10729   opcode(0xF2, 0x0F, 0x59);
10730   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10731   ins_pipe(pipe_slow);
10732 %}
10733 
10734 instruct mulD_imm(regD dst, immD con) %{
10735   match(Set dst (MulD dst con));
10736   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10737   ins_cost(150); // XXX
10738   ins_encode %{
10739     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10740   %}
10741   ins_pipe(pipe_slow);
10742 %}
10743 
10744 instruct divF_reg(regF dst, regF src)
10745 %{
10746   match(Set dst (DivF dst src));
10747 
10748   format %{ "divss   $dst, $src" %}
10749   ins_cost(150); // XXX
10750   opcode(0xF3, 0x0F, 0x5E);
10751   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10752   ins_pipe(pipe_slow);
10753 %}
10754 
10755 instruct divF_mem(regF dst, memory src)
10756 %{
10757   match(Set dst (DivF dst (LoadF src)));
10758 
10759   format %{ "divss   $dst, $src" %}
10760   ins_cost(150); // XXX
10761   opcode(0xF3, 0x0F, 0x5E);
10762   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10763   ins_pipe(pipe_slow);
10764 %}
10765 
10766 instruct divF_imm(regF dst, immF con) %{
10767   match(Set dst (DivF dst con));
10768   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10769   ins_cost(150); // XXX
10770   ins_encode %{
10771     __ divss($dst$$XMMRegister, $constantaddress($con));
10772   %}
10773   ins_pipe(pipe_slow);
10774 %}
10775 
10776 instruct divD_reg(regD dst, regD src)
10777 %{
10778   match(Set dst (DivD dst src));
10779 
10780   format %{ "divsd   $dst, $src" %}
10781   ins_cost(150); // XXX
10782   opcode(0xF2, 0x0F, 0x5E);
10783   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10784   ins_pipe(pipe_slow);
10785 %}
10786 
10787 instruct divD_mem(regD dst, memory src)
10788 %{
10789   match(Set dst (DivD dst (LoadD src)));
10790 
10791   format %{ "divsd   $dst, $src" %}
10792   ins_cost(150); // XXX
10793   opcode(0xF2, 0x0F, 0x5E);
10794   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10795   ins_pipe(pipe_slow);
10796 %}
10797 
10798 instruct divD_imm(regD dst, immD con) %{
10799   match(Set dst (DivD dst con));
10800   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10801   ins_cost(150); // XXX
10802   ins_encode %{
10803     __ divsd($dst$$XMMRegister, $constantaddress($con));
10804   %}
10805   ins_pipe(pipe_slow);
10806 %}
10807 
10808 instruct sqrtF_reg(regF dst, regF src)
10809 %{
10810   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10811 
10812   format %{ "sqrtss  $dst, $src" %}
10813   ins_cost(150); // XXX
10814   opcode(0xF3, 0x0F, 0x51);
10815   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10816   ins_pipe(pipe_slow);
10817 %}
10818 
10819 instruct sqrtF_mem(regF dst, memory src)
10820 %{
10821   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10822 
10823   format %{ "sqrtss  $dst, $src" %}
10824   ins_cost(150); // XXX
10825   opcode(0xF3, 0x0F, 0x51);
10826   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10827   ins_pipe(pipe_slow);
10828 %}
10829 
10830 instruct sqrtF_imm(regF dst, immF con) %{
10831   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10832   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10833   ins_cost(150); // XXX
10834   ins_encode %{
10835     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10836   %}
10837   ins_pipe(pipe_slow);
10838 %}
10839 
10840 instruct sqrtD_reg(regD dst, regD src)
10841 %{
10842   match(Set dst (SqrtD src));
10843 
10844   format %{ "sqrtsd  $dst, $src" %}
10845   ins_cost(150); // XXX
10846   opcode(0xF2, 0x0F, 0x51);
10847   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10848   ins_pipe(pipe_slow);
10849 %}
10850 
10851 instruct sqrtD_mem(regD dst, memory src)
10852 %{
10853   match(Set dst (SqrtD (LoadD src)));
10854 
10855   format %{ "sqrtsd  $dst, $src" %}
10856   ins_cost(150); // XXX
10857   opcode(0xF2, 0x0F, 0x51);
10858   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10859   ins_pipe(pipe_slow);
10860 %}
10861 
10862 instruct sqrtD_imm(regD dst, immD con) %{
10863   match(Set dst (SqrtD con));
10864   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10865   ins_cost(150); // XXX
10866   ins_encode %{
10867     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10868   %}
10869   ins_pipe(pipe_slow);
10870 %}
10871 
10872 instruct absF_reg(regF dst)
10873 %{
10874   match(Set dst (AbsF dst));
10875 
10876   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10877   ins_encode(absF_encoding(dst));
10878   ins_pipe(pipe_slow);
10879 %}
10880 
10881 instruct absD_reg(regD dst)
10882 %{
10883   match(Set dst (AbsD dst));
10884 
10885   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10886             "# abs double by sign masking" %}
10887   ins_encode(absD_encoding(dst));
10888   ins_pipe(pipe_slow);
10889 %}
10890 
10891 instruct negF_reg(regF dst)
10892 %{
10893   match(Set dst (NegF dst));
10894 
10895   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10896   ins_encode(negF_encoding(dst));
10897   ins_pipe(pipe_slow);
10898 %}
10899 
10900 instruct negD_reg(regD dst)
10901 %{
10902   match(Set dst (NegD dst));
10903 
10904   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10905             "# neg double by sign flipping" %}
10906   ins_encode(negD_encoding(dst));
10907   ins_pipe(pipe_slow);
10908 %}
10909 
10910 // -----------Trig and Trancendental Instructions------------------------------
10911 instruct cosD_reg(regD dst) %{
10912   match(Set dst (CosD dst));
10913 
10914   format %{ "dcos   $dst\n\t" %}
10915   opcode(0xD9, 0xFF);
10916   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10917   ins_pipe( pipe_slow );
10918 %}
10919 
10920 instruct sinD_reg(regD dst) %{
10921   match(Set dst (SinD dst));
10922 
10923   format %{ "dsin   $dst\n\t" %}
10924   opcode(0xD9, 0xFE);
10925   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10926   ins_pipe( pipe_slow );
10927 %}
10928 
10929 instruct tanD_reg(regD dst) %{
10930   match(Set dst (TanD dst));
10931 
10932   format %{ "dtan   $dst\n\t" %}
10933   ins_encode( Push_SrcXD(dst),
10934               Opcode(0xD9), Opcode(0xF2),   //fptan
10935               Opcode(0xDD), Opcode(0xD8),   //fstp st
10936               Push_ResultXD(dst) );
10937   ins_pipe( pipe_slow );
10938 %}
10939 
10940 instruct log10D_reg(regD dst) %{
10941   // The source and result Double operands in XMM registers
10942   match(Set dst (Log10D dst));
10943   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10944   // fyl2x        ; compute log_10(2) * log_2(x)
10945   format %{ "fldlg2\t\t\t#Log10\n\t"
10946             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10947          %}
10948    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10949               Push_SrcXD(dst),
10950               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10951               Push_ResultXD(dst));
10952 
10953   ins_pipe( pipe_slow );
10954 %}
10955 
10956 instruct logD_reg(regD dst) %{
10957   // The source and result Double operands in XMM registers
10958   match(Set dst (LogD dst));
10959   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10960   // fyl2x        ; compute log_e(2) * log_2(x)
10961   format %{ "fldln2\t\t\t#Log_e\n\t"
10962             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10963          %}
10964   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10965               Push_SrcXD(dst),
10966               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10967               Push_ResultXD(dst));
10968   ins_pipe( pipe_slow );
10969 %}
10970 
10971 
10972 
10973 //----------Arithmetic Conversion Instructions---------------------------------
10974 
10975 instruct roundFloat_nop(regF dst)
10976 %{
10977   match(Set dst (RoundFloat dst));
10978 
10979   ins_cost(0);
10980   ins_encode();
10981   ins_pipe(empty);
10982 %}
10983 
10984 instruct roundDouble_nop(regD dst)
10985 %{
10986   match(Set dst (RoundDouble dst));
10987 
10988   ins_cost(0);
10989   ins_encode();
10990   ins_pipe(empty);
10991 %}
10992 
10993 instruct convF2D_reg_reg(regD dst, regF src)
10994 %{
10995   match(Set dst (ConvF2D src));
10996 
10997   format %{ "cvtss2sd $dst, $src" %}
10998   opcode(0xF3, 0x0F, 0x5A);
10999   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11000   ins_pipe(pipe_slow); // XXX
11001 %}
11002 
11003 instruct convF2D_reg_mem(regD dst, memory src)
11004 %{
11005   match(Set dst (ConvF2D (LoadF src)));
11006 
11007   format %{ "cvtss2sd $dst, $src" %}
11008   opcode(0xF3, 0x0F, 0x5A);
11009   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11010   ins_pipe(pipe_slow); // XXX
11011 %}
11012 
11013 instruct convD2F_reg_reg(regF dst, regD src)
11014 %{
11015   match(Set dst (ConvD2F src));
11016 
11017   format %{ "cvtsd2ss $dst, $src" %}
11018   opcode(0xF2, 0x0F, 0x5A);
11019   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11020   ins_pipe(pipe_slow); // XXX
11021 %}
11022 
11023 instruct convD2F_reg_mem(regF dst, memory src)
11024 %{
11025   match(Set dst (ConvD2F (LoadD src)));
11026 
11027   format %{ "cvtsd2ss $dst, $src" %}
11028   opcode(0xF2, 0x0F, 0x5A);
11029   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11030   ins_pipe(pipe_slow); // XXX
11031 %}
11032 
11033 // XXX do mem variants
11034 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11035 %{
11036   match(Set dst (ConvF2I src));
11037   effect(KILL cr);
11038 
11039   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11040             "cmpl    $dst, #0x80000000\n\t"
11041             "jne,s   done\n\t"
11042             "subq    rsp, #8\n\t"
11043             "movss   [rsp], $src\n\t"
11044             "call    f2i_fixup\n\t"
11045             "popq    $dst\n"
11046     "done:   "%}
11047   opcode(0xF3, 0x0F, 0x2C);
11048   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11049              f2i_fixup(dst, src));
11050   ins_pipe(pipe_slow);
11051 %}
11052 
11053 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11054 %{
11055   match(Set dst (ConvF2L src));
11056   effect(KILL cr);
11057 
11058   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11059             "cmpq    $dst, [0x8000000000000000]\n\t"
11060             "jne,s   done\n\t"
11061             "subq    rsp, #8\n\t"
11062             "movss   [rsp], $src\n\t"
11063             "call    f2l_fixup\n\t"
11064             "popq    $dst\n"
11065     "done:   "%}
11066   opcode(0xF3, 0x0F, 0x2C);
11067   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11068              f2l_fixup(dst, src));
11069   ins_pipe(pipe_slow);
11070 %}
11071 
11072 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11073 %{
11074   match(Set dst (ConvD2I src));
11075   effect(KILL cr);
11076 
11077   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11078             "cmpl    $dst, #0x80000000\n\t"
11079             "jne,s   done\n\t"
11080             "subq    rsp, #8\n\t"
11081             "movsd   [rsp], $src\n\t"
11082             "call    d2i_fixup\n\t"
11083             "popq    $dst\n"
11084     "done:   "%}
11085   opcode(0xF2, 0x0F, 0x2C);
11086   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11087              d2i_fixup(dst, src));
11088   ins_pipe(pipe_slow);
11089 %}
11090 
11091 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11092 %{
11093   match(Set dst (ConvD2L src));
11094   effect(KILL cr);
11095 
11096   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11097             "cmpq    $dst, [0x8000000000000000]\n\t"
11098             "jne,s   done\n\t"
11099             "subq    rsp, #8\n\t"
11100             "movsd   [rsp], $src\n\t"
11101             "call    d2l_fixup\n\t"
11102             "popq    $dst\n"
11103     "done:   "%}
11104   opcode(0xF2, 0x0F, 0x2C);
11105   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11106              d2l_fixup(dst, src));
11107   ins_pipe(pipe_slow);
11108 %}
11109 
11110 instruct convI2F_reg_reg(regF dst, rRegI src)
11111 %{
11112   predicate(!UseXmmI2F);
11113   match(Set dst (ConvI2F src));
11114 
11115   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11116   opcode(0xF3, 0x0F, 0x2A);
11117   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11118   ins_pipe(pipe_slow); // XXX
11119 %}
11120 
11121 instruct convI2F_reg_mem(regF dst, memory src)
11122 %{
11123   match(Set dst (ConvI2F (LoadI src)));
11124 
11125   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11126   opcode(0xF3, 0x0F, 0x2A);
11127   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11128   ins_pipe(pipe_slow); // XXX
11129 %}
11130 
11131 instruct convI2D_reg_reg(regD dst, rRegI src)
11132 %{
11133   predicate(!UseXmmI2D);
11134   match(Set dst (ConvI2D src));
11135 
11136   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11137   opcode(0xF2, 0x0F, 0x2A);
11138   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11139   ins_pipe(pipe_slow); // XXX
11140 %}
11141 
11142 instruct convI2D_reg_mem(regD dst, memory src)
11143 %{
11144   match(Set dst (ConvI2D (LoadI src)));
11145 
11146   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11147   opcode(0xF2, 0x0F, 0x2A);
11148   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11149   ins_pipe(pipe_slow); // XXX
11150 %}
11151 
11152 instruct convXI2F_reg(regF dst, rRegI src)
11153 %{
11154   predicate(UseXmmI2F);
11155   match(Set dst (ConvI2F src));
11156 
11157   format %{ "movdl $dst, $src\n\t"
11158             "cvtdq2psl $dst, $dst\t# i2f" %}
11159   ins_encode %{
11160     __ movdl($dst$$XMMRegister, $src$$Register);
11161     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11162   %}
11163   ins_pipe(pipe_slow); // XXX
11164 %}
11165 
11166 instruct convXI2D_reg(regD dst, rRegI src)
11167 %{
11168   predicate(UseXmmI2D);
11169   match(Set dst (ConvI2D src));
11170 
11171   format %{ "movdl $dst, $src\n\t"
11172             "cvtdq2pdl $dst, $dst\t# i2d" %}
11173   ins_encode %{
11174     __ movdl($dst$$XMMRegister, $src$$Register);
11175     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11176   %}
11177   ins_pipe(pipe_slow); // XXX
11178 %}
11179 
11180 instruct convL2F_reg_reg(regF dst, rRegL src)
11181 %{
11182   match(Set dst (ConvL2F src));
11183 
11184   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11185   opcode(0xF3, 0x0F, 0x2A);
11186   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11187   ins_pipe(pipe_slow); // XXX
11188 %}
11189 
11190 instruct convL2F_reg_mem(regF dst, memory src)
11191 %{
11192   match(Set dst (ConvL2F (LoadL src)));
11193 
11194   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11195   opcode(0xF3, 0x0F, 0x2A);
11196   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11197   ins_pipe(pipe_slow); // XXX
11198 %}
11199 
11200 instruct convL2D_reg_reg(regD dst, rRegL src)
11201 %{
11202   match(Set dst (ConvL2D src));
11203 
11204   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11205   opcode(0xF2, 0x0F, 0x2A);
11206   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11207   ins_pipe(pipe_slow); // XXX
11208 %}
11209 
11210 instruct convL2D_reg_mem(regD dst, memory src)
11211 %{
11212   match(Set dst (ConvL2D (LoadL src)));
11213 
11214   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11215   opcode(0xF2, 0x0F, 0x2A);
11216   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11217   ins_pipe(pipe_slow); // XXX
11218 %}
11219 
11220 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11221 %{
11222   match(Set dst (ConvI2L src));
11223 
11224   ins_cost(125);
11225   format %{ "movslq  $dst, $src\t# i2l" %}
11226   ins_encode %{
11227     __ movslq($dst$$Register, $src$$Register);
11228   %}
11229   ins_pipe(ialu_reg_reg);
11230 %}
11231 
11232 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11233 // %{
11234 //   match(Set dst (ConvI2L src));
11235 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11236 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11237 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11238 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11239 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11240 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11241 
11242 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11243 //   ins_encode(enc_copy(dst, src));
11244 // //   opcode(0x63); // needs REX.W
11245 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11246 //   ins_pipe(ialu_reg_reg);
11247 // %}
11248 
11249 // Zero-extend convert int to long
11250 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11251 %{
11252   match(Set dst (AndL (ConvI2L src) mask));
11253 
11254   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11255   ins_encode(enc_copy(dst, src));
11256   ins_pipe(ialu_reg_reg);
11257 %}
11258 
11259 // Zero-extend convert int to long
11260 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11261 %{
11262   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11263 
11264   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11265   opcode(0x8B);
11266   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11267   ins_pipe(ialu_reg_mem);
11268 %}
11269 
11270 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11271 %{
11272   match(Set dst (AndL src mask));
11273 
11274   format %{ "movl    $dst, $src\t# zero-extend long" %}
11275   ins_encode(enc_copy_always(dst, src));
11276   ins_pipe(ialu_reg_reg);
11277 %}
11278 
11279 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11280 %{
11281   match(Set dst (ConvL2I src));
11282 
11283   format %{ "movl    $dst, $src\t# l2i" %}
11284   ins_encode(enc_copy_always(dst, src));
11285   ins_pipe(ialu_reg_reg);
11286 %}
11287 
11288 
11289 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11290   match(Set dst (MoveF2I src));
11291   effect(DEF dst, USE src);
11292 
11293   ins_cost(125);
11294   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11295   opcode(0x8B);
11296   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11297   ins_pipe(ialu_reg_mem);
11298 %}
11299 
11300 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11301   match(Set dst (MoveI2F src));
11302   effect(DEF dst, USE src);
11303 
11304   ins_cost(125);
11305   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11306   opcode(0xF3, 0x0F, 0x10);
11307   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11308   ins_pipe(pipe_slow);
11309 %}
11310 
11311 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11312   match(Set dst (MoveD2L src));
11313   effect(DEF dst, USE src);
11314 
11315   ins_cost(125);
11316   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11317   opcode(0x8B);
11318   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11319   ins_pipe(ialu_reg_mem);
11320 %}
11321 
11322 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11323   predicate(!UseXmmLoadAndClearUpper);
11324   match(Set dst (MoveL2D src));
11325   effect(DEF dst, USE src);
11326 
11327   ins_cost(125);
11328   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11329   opcode(0x66, 0x0F, 0x12);
11330   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11331   ins_pipe(pipe_slow);
11332 %}
11333 
11334 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11335   predicate(UseXmmLoadAndClearUpper);
11336   match(Set dst (MoveL2D src));
11337   effect(DEF dst, USE src);
11338 
11339   ins_cost(125);
11340   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11341   opcode(0xF2, 0x0F, 0x10);
11342   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11343   ins_pipe(pipe_slow);
11344 %}
11345 
11346 
11347 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11348   match(Set dst (MoveF2I src));
11349   effect(DEF dst, USE src);
11350 
11351   ins_cost(95); // XXX
11352   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11353   opcode(0xF3, 0x0F, 0x11);
11354   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11355   ins_pipe(pipe_slow);
11356 %}
11357 
11358 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11359   match(Set dst (MoveI2F src));
11360   effect(DEF dst, USE src);
11361 
11362   ins_cost(100);
11363   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11364   opcode(0x89);
11365   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11366   ins_pipe( ialu_mem_reg );
11367 %}
11368 
11369 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11370   match(Set dst (MoveD2L src));
11371   effect(DEF dst, USE src);
11372 
11373   ins_cost(95); // XXX
11374   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11375   opcode(0xF2, 0x0F, 0x11);
11376   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11377   ins_pipe(pipe_slow);
11378 %}
11379 
11380 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11381   match(Set dst (MoveL2D src));
11382   effect(DEF dst, USE src);
11383 
11384   ins_cost(100);
11385   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11386   opcode(0x89);
11387   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11388   ins_pipe(ialu_mem_reg);
11389 %}
11390 
11391 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11392   match(Set dst (MoveF2I src));
11393   effect(DEF dst, USE src);
11394   ins_cost(85);
11395   format %{ "movd    $dst,$src\t# MoveF2I" %}
11396   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11397   ins_pipe( pipe_slow );
11398 %}
11399 
11400 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11401   match(Set dst (MoveD2L src));
11402   effect(DEF dst, USE src);
11403   ins_cost(85);
11404   format %{ "movd    $dst,$src\t# MoveD2L" %}
11405   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11406   ins_pipe( pipe_slow );
11407 %}
11408 
11409 // The next instructions have long latency and use Int unit. Set high cost.
11410 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11411   match(Set dst (MoveI2F src));
11412   effect(DEF dst, USE src);
11413   ins_cost(300);
11414   format %{ "movd    $dst,$src\t# MoveI2F" %}
11415   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11420   match(Set dst (MoveL2D src));
11421   effect(DEF dst, USE src);
11422   ins_cost(300);
11423   format %{ "movd    $dst,$src\t# MoveL2D" %}
11424   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 // Replicate scalar to packed byte (1 byte) values in xmm
11429 instruct Repl8B_reg(regD dst, regD src) %{
11430   match(Set dst (Replicate8B src));
11431   format %{ "MOVDQA  $dst,$src\n\t"
11432             "PUNPCKLBW $dst,$dst\n\t"
11433             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11434   ins_encode( pshufd_8x8(dst, src));
11435   ins_pipe( pipe_slow );
11436 %}
11437 
11438 // Replicate scalar to packed byte (1 byte) values in xmm
11439 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11440   match(Set dst (Replicate8B src));
11441   format %{ "MOVD    $dst,$src\n\t"
11442             "PUNPCKLBW $dst,$dst\n\t"
11443             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11444   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 // Replicate scalar zero to packed byte (1 byte) values in xmm
11449 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11450   match(Set dst (Replicate8B zero));
11451   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11452   ins_encode( pxor(dst, dst));
11453   ins_pipe( fpu_reg_reg );
11454 %}
11455 
11456 // Replicate scalar to packed shore (2 byte) values in xmm
11457 instruct Repl4S_reg(regD dst, regD src) %{
11458   match(Set dst (Replicate4S src));
11459   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11460   ins_encode( pshufd_4x16(dst, src));
11461   ins_pipe( fpu_reg_reg );
11462 %}
11463 
11464 // Replicate scalar to packed shore (2 byte) values in xmm
11465 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11466   match(Set dst (Replicate4S src));
11467   format %{ "MOVD    $dst,$src\n\t"
11468             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11469   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11470   ins_pipe( fpu_reg_reg );
11471 %}
11472 
11473 // Replicate scalar zero to packed short (2 byte) values in xmm
11474 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11475   match(Set dst (Replicate4S zero));
11476   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11477   ins_encode( pxor(dst, dst));
11478   ins_pipe( fpu_reg_reg );
11479 %}
11480 
11481 // Replicate scalar to packed char (2 byte) values in xmm
11482 instruct Repl4C_reg(regD dst, regD src) %{
11483   match(Set dst (Replicate4C src));
11484   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11485   ins_encode( pshufd_4x16(dst, src));
11486   ins_pipe( fpu_reg_reg );
11487 %}
11488 
11489 // Replicate scalar to packed char (2 byte) values in xmm
11490 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11491   match(Set dst (Replicate4C src));
11492   format %{ "MOVD    $dst,$src\n\t"
11493             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11494   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11495   ins_pipe( fpu_reg_reg );
11496 %}
11497 
11498 // Replicate scalar zero to packed char (2 byte) values in xmm
11499 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11500   match(Set dst (Replicate4C zero));
11501   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11502   ins_encode( pxor(dst, dst));
11503   ins_pipe( fpu_reg_reg );
11504 %}
11505 
11506 // Replicate scalar to packed integer (4 byte) values in xmm
11507 instruct Repl2I_reg(regD dst, regD src) %{
11508   match(Set dst (Replicate2I src));
11509   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11510   ins_encode( pshufd(dst, src, 0x00));
11511   ins_pipe( fpu_reg_reg );
11512 %}
11513 
11514 // Replicate scalar to packed integer (4 byte) values in xmm
11515 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11516   match(Set dst (Replicate2I src));
11517   format %{ "MOVD   $dst,$src\n\t"
11518             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11519   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11520   ins_pipe( fpu_reg_reg );
11521 %}
11522 
11523 // Replicate scalar zero to packed integer (2 byte) values in xmm
11524 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11525   match(Set dst (Replicate2I zero));
11526   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11527   ins_encode( pxor(dst, dst));
11528   ins_pipe( fpu_reg_reg );
11529 %}
11530 
11531 // Replicate scalar to packed single precision floating point values in xmm
11532 instruct Repl2F_reg(regD dst, regD src) %{
11533   match(Set dst (Replicate2F src));
11534   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11535   ins_encode( pshufd(dst, src, 0xe0));
11536   ins_pipe( fpu_reg_reg );
11537 %}
11538 
11539 // Replicate scalar to packed single precision floating point values in xmm
11540 instruct Repl2F_regF(regD dst, regF src) %{
11541   match(Set dst (Replicate2F src));
11542   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11543   ins_encode( pshufd(dst, src, 0xe0));
11544   ins_pipe( fpu_reg_reg );
11545 %}
11546 
11547 // Replicate scalar to packed single precision floating point values in xmm
11548 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11549   match(Set dst (Replicate2F zero));
11550   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11551   ins_encode( pxor(dst, dst));
11552   ins_pipe( fpu_reg_reg );
11553 %}
11554 
11555 
11556 // =======================================================================
11557 // fast clearing of an array
11558 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11559                   rFlagsReg cr)
11560 %{
11561   match(Set dummy (ClearArray cnt base));
11562   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11563 
11564   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11565             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11566   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11567              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11568   ins_pipe(pipe_slow);
11569 %}
11570 
11571 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11572                         rax_RegI result, regD tmp1, rFlagsReg cr)
11573 %{
11574   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11575   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11576 
11577   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11578   ins_encode %{
11579     __ string_compare($str1$$Register, $str2$$Register,
11580                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11581                       $tmp1$$XMMRegister);
11582   %}
11583   ins_pipe( pipe_slow );
11584 %}
11585 
11586 // fast search of substring with known size.
11587 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11588                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11589 %{
11590   predicate(UseSSE42Intrinsics);
11591   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11592   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11593 
11594   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11595   ins_encode %{
11596     int icnt2 = (int)$int_cnt2$$constant;
11597     if (icnt2 >= 8) {
11598       // IndexOf for constant substrings with size >= 8 elements
11599       // which don't need to be loaded through stack.
11600       __ string_indexofC8($str1$$Register, $str2$$Register,
11601                           $cnt1$$Register, $cnt2$$Register,
11602                           icnt2, $result$$Register,
11603                           $vec$$XMMRegister, $tmp$$Register);
11604     } else {
11605       // Small strings are loaded through stack if they cross page boundary.
11606       __ string_indexof($str1$$Register, $str2$$Register,
11607                         $cnt1$$Register, $cnt2$$Register,
11608                         icnt2, $result$$Register,
11609                         $vec$$XMMRegister, $tmp$$Register);
11610     }
11611   %}
11612   ins_pipe( pipe_slow );
11613 %}
11614 
11615 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11616                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11617 %{
11618   predicate(UseSSE42Intrinsics);
11619   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11620   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11621 
11622   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11623   ins_encode %{
11624     __ string_indexof($str1$$Register, $str2$$Register,
11625                       $cnt1$$Register, $cnt2$$Register,
11626                       (-1), $result$$Register,
11627                       $vec$$XMMRegister, $tmp$$Register);
11628   %}
11629   ins_pipe( pipe_slow );
11630 %}
11631 
11632 // fast string equals
11633 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11634                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11635 %{
11636   match(Set result (StrEquals (Binary str1 str2) cnt));
11637   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11638 
11639   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11640   ins_encode %{
11641     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11642                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11643                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11644   %}
11645   ins_pipe( pipe_slow );
11646 %}
11647 
11648 // fast array equals
11649 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11650                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11651 %{
11652   match(Set result (AryEq ary1 ary2));
11653   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11654   //ins_cost(300);
11655 
11656   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11657   ins_encode %{
11658     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11659                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11660                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11661   %}
11662   ins_pipe( pipe_slow );
11663 %}
11664 
11665 //----------Control Flow Instructions------------------------------------------
11666 // Signed compare Instructions
11667 
11668 // XXX more variants!!
11669 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11670 %{
11671   match(Set cr (CmpI op1 op2));
11672   effect(DEF cr, USE op1, USE op2);
11673 
11674   format %{ "cmpl    $op1, $op2" %}
11675   opcode(0x3B);  /* Opcode 3B /r */
11676   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11677   ins_pipe(ialu_cr_reg_reg);
11678 %}
11679 
11680 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11681 %{
11682   match(Set cr (CmpI op1 op2));
11683 
11684   format %{ "cmpl    $op1, $op2" %}
11685   opcode(0x81, 0x07); /* Opcode 81 /7 */
11686   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11687   ins_pipe(ialu_cr_reg_imm);
11688 %}
11689 
11690 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11691 %{
11692   match(Set cr (CmpI op1 (LoadI op2)));
11693 
11694   ins_cost(500); // XXX
11695   format %{ "cmpl    $op1, $op2" %}
11696   opcode(0x3B); /* Opcode 3B /r */
11697   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11698   ins_pipe(ialu_cr_reg_mem);
11699 %}
11700 
11701 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11702 %{
11703   match(Set cr (CmpI src zero));
11704 
11705   format %{ "testl   $src, $src" %}
11706   opcode(0x85);
11707   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11708   ins_pipe(ialu_cr_reg_imm);
11709 %}
11710 
11711 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11712 %{
11713   match(Set cr (CmpI (AndI src con) zero));
11714 
11715   format %{ "testl   $src, $con" %}
11716   opcode(0xF7, 0x00);
11717   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11718   ins_pipe(ialu_cr_reg_imm);
11719 %}
11720 
11721 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11722 %{
11723   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11724 
11725   format %{ "testl   $src, $mem" %}
11726   opcode(0x85);
11727   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11728   ins_pipe(ialu_cr_reg_mem);
11729 %}
11730 
11731 // Unsigned compare Instructions; really, same as signed except they
11732 // produce an rFlagsRegU instead of rFlagsReg.
11733 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11734 %{
11735   match(Set cr (CmpU op1 op2));
11736 
11737   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11738   opcode(0x3B); /* Opcode 3B /r */
11739   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11740   ins_pipe(ialu_cr_reg_reg);
11741 %}
11742 
11743 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11744 %{
11745   match(Set cr (CmpU op1 op2));
11746 
11747   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11748   opcode(0x81,0x07); /* Opcode 81 /7 */
11749   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11750   ins_pipe(ialu_cr_reg_imm);
11751 %}
11752 
11753 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11754 %{
11755   match(Set cr (CmpU op1 (LoadI op2)));
11756 
11757   ins_cost(500); // XXX
11758   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11759   opcode(0x3B); /* Opcode 3B /r */
11760   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11761   ins_pipe(ialu_cr_reg_mem);
11762 %}
11763 
11764 // // // Cisc-spilled version of cmpU_rReg
11765 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11766 // //%{
11767 // //  match(Set cr (CmpU (LoadI op1) op2));
11768 // //
11769 // //  format %{ "CMPu   $op1,$op2" %}
11770 // //  ins_cost(500);
11771 // //  opcode(0x39);  /* Opcode 39 /r */
11772 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11773 // //%}
11774 
11775 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11776 %{
11777   match(Set cr (CmpU src zero));
11778 
11779   format %{ "testl  $src, $src\t# unsigned" %}
11780   opcode(0x85);
11781   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11782   ins_pipe(ialu_cr_reg_imm);
11783 %}
11784 
11785 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11786 %{
11787   match(Set cr (CmpP op1 op2));
11788 
11789   format %{ "cmpq    $op1, $op2\t# ptr" %}
11790   opcode(0x3B); /* Opcode 3B /r */
11791   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11792   ins_pipe(ialu_cr_reg_reg);
11793 %}
11794 
11795 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11796 %{
11797   match(Set cr (CmpP op1 (LoadP op2)));
11798 
11799   ins_cost(500); // XXX
11800   format %{ "cmpq    $op1, $op2\t# ptr" %}
11801   opcode(0x3B); /* Opcode 3B /r */
11802   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11803   ins_pipe(ialu_cr_reg_mem);
11804 %}
11805 
11806 // // // Cisc-spilled version of cmpP_rReg
11807 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11808 // //%{
11809 // //  match(Set cr (CmpP (LoadP op1) op2));
11810 // //
11811 // //  format %{ "CMPu   $op1,$op2" %}
11812 // //  ins_cost(500);
11813 // //  opcode(0x39);  /* Opcode 39 /r */
11814 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11815 // //%}
11816 
11817 // XXX this is generalized by compP_rReg_mem???
11818 // Compare raw pointer (used in out-of-heap check).
11819 // Only works because non-oop pointers must be raw pointers
11820 // and raw pointers have no anti-dependencies.
11821 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11822 %{
11823   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11824   match(Set cr (CmpP op1 (LoadP op2)));
11825 
11826   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11827   opcode(0x3B); /* Opcode 3B /r */
11828   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11829   ins_pipe(ialu_cr_reg_mem);
11830 %}
11831 
11832 // This will generate a signed flags result. This should be OK since
11833 // any compare to a zero should be eq/neq.
11834 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11835 %{
11836   match(Set cr (CmpP src zero));
11837 
11838   format %{ "testq   $src, $src\t# ptr" %}
11839   opcode(0x85);
11840   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11841   ins_pipe(ialu_cr_reg_imm);
11842 %}
11843 
11844 // This will generate a signed flags result. This should be OK since
11845 // any compare to a zero should be eq/neq.
11846 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11847 %{
11848   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11849   match(Set cr (CmpP (LoadP op) zero));
11850 
11851   ins_cost(500); // XXX
11852   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11853   opcode(0xF7); /* Opcode F7 /0 */
11854   ins_encode(REX_mem_wide(op),
11855              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11856   ins_pipe(ialu_cr_reg_imm);
11857 %}
11858 
11859 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11860 %{
11861   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11862   match(Set cr (CmpP (LoadP mem) zero));
11863 
11864   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11865   ins_encode %{
11866     __ cmpq(r12, $mem$$Address);
11867   %}
11868   ins_pipe(ialu_cr_reg_mem);
11869 %}
11870 
11871 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11872 %{
11873   match(Set cr (CmpN op1 op2));
11874 
11875   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11876   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11877   ins_pipe(ialu_cr_reg_reg);
11878 %}
11879 
11880 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11881 %{
11882   match(Set cr (CmpN src (LoadN mem)));
11883 
11884   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11885   ins_encode %{
11886     __ cmpl($src$$Register, $mem$$Address);
11887   %}
11888   ins_pipe(ialu_cr_reg_mem);
11889 %}
11890 
11891 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11892   match(Set cr (CmpN op1 op2));
11893 
11894   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11895   ins_encode %{
11896     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11897   %}
11898   ins_pipe(ialu_cr_reg_imm);
11899 %}
11900 
11901 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11902 %{
11903   match(Set cr (CmpN src (LoadN mem)));
11904 
11905   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11906   ins_encode %{
11907     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11908   %}
11909   ins_pipe(ialu_cr_reg_mem);
11910 %}
11911 
11912 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11913   match(Set cr (CmpN src zero));
11914 
11915   format %{ "testl   $src, $src\t# compressed ptr" %}
11916   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11917   ins_pipe(ialu_cr_reg_imm);
11918 %}
11919 
11920 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11921 %{
11922   predicate(Universe::narrow_oop_base() != NULL);
11923   match(Set cr (CmpN (LoadN mem) zero));
11924 
11925   ins_cost(500); // XXX
11926   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11927   ins_encode %{
11928     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11929   %}
11930   ins_pipe(ialu_cr_reg_mem);
11931 %}
11932 
11933 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11934 %{
11935   predicate(Universe::narrow_oop_base() == NULL);
11936   match(Set cr (CmpN (LoadN mem) zero));
11937 
11938   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11939   ins_encode %{
11940     __ cmpl(r12, $mem$$Address);
11941   %}
11942   ins_pipe(ialu_cr_reg_mem);
11943 %}
11944 
11945 // Yanked all unsigned pointer compare operations.
11946 // Pointer compares are done with CmpP which is already unsigned.
11947 
11948 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11949 %{
11950   match(Set cr (CmpL op1 op2));
11951 
11952   format %{ "cmpq    $op1, $op2" %}
11953   opcode(0x3B);  /* Opcode 3B /r */
11954   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11955   ins_pipe(ialu_cr_reg_reg);
11956 %}
11957 
11958 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11959 %{
11960   match(Set cr (CmpL op1 op2));
11961 
11962   format %{ "cmpq    $op1, $op2" %}
11963   opcode(0x81, 0x07); /* Opcode 81 /7 */
11964   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11965   ins_pipe(ialu_cr_reg_imm);
11966 %}
11967 
11968 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11969 %{
11970   match(Set cr (CmpL op1 (LoadL op2)));
11971 
11972   format %{ "cmpq    $op1, $op2" %}
11973   opcode(0x3B); /* Opcode 3B /r */
11974   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11975   ins_pipe(ialu_cr_reg_mem);
11976 %}
11977 
11978 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11979 %{
11980   match(Set cr (CmpL src zero));
11981 
11982   format %{ "testq   $src, $src" %}
11983   opcode(0x85);
11984   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11985   ins_pipe(ialu_cr_reg_imm);
11986 %}
11987 
11988 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11989 %{
11990   match(Set cr (CmpL (AndL src con) zero));
11991 
11992   format %{ "testq   $src, $con\t# long" %}
11993   opcode(0xF7, 0x00);
11994   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11995   ins_pipe(ialu_cr_reg_imm);
11996 %}
11997 
11998 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11999 %{
12000   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12001 
12002   format %{ "testq   $src, $mem" %}
12003   opcode(0x85);
12004   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12005   ins_pipe(ialu_cr_reg_mem);
12006 %}
12007 
12008 // Manifest a CmpL result in an integer register.  Very painful.
12009 // This is the test to avoid.
12010 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12011 %{
12012   match(Set dst (CmpL3 src1 src2));
12013   effect(KILL flags);
12014 
12015   ins_cost(275); // XXX
12016   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12017             "movl    $dst, -1\n\t"
12018             "jl,s    done\n\t"
12019             "setne   $dst\n\t"
12020             "movzbl  $dst, $dst\n\t"
12021     "done:" %}
12022   ins_encode(cmpl3_flag(src1, src2, dst));
12023   ins_pipe(pipe_slow);
12024 %}
12025 
12026 //----------Max and Min--------------------------------------------------------
12027 // Min Instructions
12028 
12029 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12030 %{
12031   effect(USE_DEF dst, USE src, USE cr);
12032 
12033   format %{ "cmovlgt $dst, $src\t# min" %}
12034   opcode(0x0F, 0x4F);
12035   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12036   ins_pipe(pipe_cmov_reg);
12037 %}
12038 
12039 
12040 instruct minI_rReg(rRegI dst, rRegI src)
12041 %{
12042   match(Set dst (MinI dst src));
12043 
12044   ins_cost(200);
12045   expand %{
12046     rFlagsReg cr;
12047     compI_rReg(cr, dst, src);
12048     cmovI_reg_g(dst, src, cr);
12049   %}
12050 %}
12051 
12052 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12053 %{
12054   effect(USE_DEF dst, USE src, USE cr);
12055 
12056   format %{ "cmovllt $dst, $src\t# max" %}
12057   opcode(0x0F, 0x4C);
12058   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12059   ins_pipe(pipe_cmov_reg);
12060 %}
12061 
12062 
12063 instruct maxI_rReg(rRegI dst, rRegI src)
12064 %{
12065   match(Set dst (MaxI dst src));
12066 
12067   ins_cost(200);
12068   expand %{
12069     rFlagsReg cr;
12070     compI_rReg(cr, dst, src);
12071     cmovI_reg_l(dst, src, cr);
12072   %}
12073 %}
12074 
12075 // ============================================================================
12076 // Branch Instructions
12077 
12078 // Jump Direct - Label defines a relative address from JMP+1
12079 instruct jmpDir(label labl)
12080 %{
12081   match(Goto);
12082   effect(USE labl);
12083 
12084   ins_cost(300);
12085   format %{ "jmp     $labl" %}
12086   size(5);
12087   opcode(0xE9);
12088   ins_encode(OpcP, Lbl(labl));
12089   ins_pipe(pipe_jmp);
12090   ins_pc_relative(1);
12091 %}
12092 
12093 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12094 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12095 %{
12096   match(If cop cr);
12097   effect(USE labl);
12098 
12099   ins_cost(300);
12100   format %{ "j$cop     $labl" %}
12101   size(6);
12102   opcode(0x0F, 0x80);
12103   ins_encode(Jcc(cop, labl));
12104   ins_pipe(pipe_jcc);
12105   ins_pc_relative(1);
12106 %}
12107 
12108 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12109 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12110 %{
12111   match(CountedLoopEnd cop cr);
12112   effect(USE labl);
12113 
12114   ins_cost(300);
12115   format %{ "j$cop     $labl\t# loop end" %}
12116   size(6);
12117   opcode(0x0F, 0x80);
12118   ins_encode(Jcc(cop, labl));
12119   ins_pipe(pipe_jcc);
12120   ins_pc_relative(1);
12121 %}
12122 
12123 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12124 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12125   match(CountedLoopEnd cop cmp);
12126   effect(USE labl);
12127 
12128   ins_cost(300);
12129   format %{ "j$cop,u   $labl\t# loop end" %}
12130   size(6);
12131   opcode(0x0F, 0x80);
12132   ins_encode(Jcc(cop, labl));
12133   ins_pipe(pipe_jcc);
12134   ins_pc_relative(1);
12135 %}
12136 
12137 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12138   match(CountedLoopEnd cop cmp);
12139   effect(USE labl);
12140 
12141   ins_cost(200);
12142   format %{ "j$cop,u   $labl\t# loop end" %}
12143   size(6);
12144   opcode(0x0F, 0x80);
12145   ins_encode(Jcc(cop, labl));
12146   ins_pipe(pipe_jcc);
12147   ins_pc_relative(1);
12148 %}
12149 
12150 // Jump Direct Conditional - using unsigned comparison
12151 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12152   match(If cop cmp);
12153   effect(USE labl);
12154 
12155   ins_cost(300);
12156   format %{ "j$cop,u  $labl" %}
12157   size(6);
12158   opcode(0x0F, 0x80);
12159   ins_encode(Jcc(cop, labl));
12160   ins_pipe(pipe_jcc);
12161   ins_pc_relative(1);
12162 %}
12163 
12164 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12165   match(If cop cmp);
12166   effect(USE labl);
12167 
12168   ins_cost(200);
12169   format %{ "j$cop,u  $labl" %}
12170   size(6);
12171   opcode(0x0F, 0x80);
12172   ins_encode(Jcc(cop, labl));
12173   ins_pipe(pipe_jcc);
12174   ins_pc_relative(1);
12175 %}
12176 
12177 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12178   match(If cop cmp);
12179   effect(USE labl);
12180 
12181   ins_cost(200);
12182   format %{ $$template
12183     if ($cop$$cmpcode == Assembler::notEqual) {
12184       $$emit$$"jp,u   $labl\n\t"
12185       $$emit$$"j$cop,u   $labl"
12186     } else {
12187       $$emit$$"jp,u   done\n\t"
12188       $$emit$$"j$cop,u   $labl\n\t"
12189       $$emit$$"done:"
12190     }
12191   %}
12192   size(12);
12193   opcode(0x0F, 0x80);
12194   ins_encode %{
12195     Label* l = $labl$$label;
12196     $$$emit8$primary;
12197     emit_cc(cbuf, $secondary, Assembler::parity);
12198     int parity_disp = -1;
12199     if ($cop$$cmpcode == Assembler::notEqual) {
12200        // the two jumps 6 bytes apart so the jump distances are too
12201        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12202     } else if ($cop$$cmpcode == Assembler::equal) {
12203        parity_disp = 6;
12204     } else {
12205        ShouldNotReachHere();
12206     }
12207     emit_d32(cbuf, parity_disp);
12208     $$$emit8$primary;
12209     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12210     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12211     emit_d32(cbuf, disp);
12212   %}
12213   ins_pipe(pipe_jcc);
12214   ins_pc_relative(1);
12215 %}
12216 
12217 // ============================================================================
12218 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12219 // superklass array for an instance of the superklass.  Set a hidden
12220 // internal cache on a hit (cache is checked with exposed code in
12221 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12222 // encoding ALSO sets flags.
12223 
12224 instruct partialSubtypeCheck(rdi_RegP result,
12225                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12226                              rFlagsReg cr)
12227 %{
12228   match(Set result (PartialSubtypeCheck sub super));
12229   effect(KILL rcx, KILL cr);
12230 
12231   ins_cost(1100);  // slightly larger than the next version
12232   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12233             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12234             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12235             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12236             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12237             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12238             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12239     "miss:\t" %}
12240 
12241   opcode(0x1); // Force a XOR of RDI
12242   ins_encode(enc_PartialSubtypeCheck());
12243   ins_pipe(pipe_slow);
12244 %}
12245 
12246 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12247                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12248                                      immP0 zero,
12249                                      rdi_RegP result)
12250 %{
12251   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12252   effect(KILL rcx, KILL result);
12253 
12254   ins_cost(1000);
12255   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12256             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12257             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12258             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12259             "jne,s   miss\t\t# Missed: flags nz\n\t"
12260             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12261     "miss:\t" %}
12262 
12263   opcode(0x0); // No need to XOR RDI
12264   ins_encode(enc_PartialSubtypeCheck());
12265   ins_pipe(pipe_slow);
12266 %}
12267 
12268 // ============================================================================
12269 // Branch Instructions -- short offset versions
12270 //
12271 // These instructions are used to replace jumps of a long offset (the default
12272 // match) with jumps of a shorter offset.  These instructions are all tagged
12273 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12274 // match rules in general matching.  Instead, the ADLC generates a conversion
12275 // method in the MachNode which can be used to do in-place replacement of the
12276 // long variant with the shorter variant.  The compiler will determine if a
12277 // branch can be taken by the is_short_branch_offset() predicate in the machine
12278 // specific code section of the file.
12279 
12280 // Jump Direct - Label defines a relative address from JMP+1
12281 instruct jmpDir_short(label labl) %{
12282   match(Goto);
12283   effect(USE labl);
12284 
12285   ins_cost(300);
12286   format %{ "jmp,s   $labl" %}
12287   size(2);
12288   opcode(0xEB);
12289   ins_encode(OpcP, LblShort(labl));
12290   ins_pipe(pipe_jmp);
12291   ins_pc_relative(1);
12292   ins_short_branch(1);
12293 %}
12294 
12295 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12296 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12297   match(If cop cr);
12298   effect(USE labl);
12299 
12300   ins_cost(300);
12301   format %{ "j$cop,s   $labl" %}
12302   size(2);
12303   opcode(0x70);
12304   ins_encode(JccShort(cop, labl));
12305   ins_pipe(pipe_jcc);
12306   ins_pc_relative(1);
12307   ins_short_branch(1);
12308 %}
12309 
12310 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12311 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12312   match(CountedLoopEnd cop cr);
12313   effect(USE labl);
12314 
12315   ins_cost(300);
12316   format %{ "j$cop,s   $labl\t# loop end" %}
12317   size(2);
12318   opcode(0x70);
12319   ins_encode(JccShort(cop, labl));
12320   ins_pipe(pipe_jcc);
12321   ins_pc_relative(1);
12322   ins_short_branch(1);
12323 %}
12324 
12325 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12326 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12327   match(CountedLoopEnd cop cmp);
12328   effect(USE labl);
12329 
12330   ins_cost(300);
12331   format %{ "j$cop,us  $labl\t# loop end" %}
12332   size(2);
12333   opcode(0x70);
12334   ins_encode(JccShort(cop, labl));
12335   ins_pipe(pipe_jcc);
12336   ins_pc_relative(1);
12337   ins_short_branch(1);
12338 %}
12339 
12340 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12341   match(CountedLoopEnd cop cmp);
12342   effect(USE labl);
12343 
12344   ins_cost(300);
12345   format %{ "j$cop,us  $labl\t# loop end" %}
12346   size(2);
12347   opcode(0x70);
12348   ins_encode(JccShort(cop, labl));
12349   ins_pipe(pipe_jcc);
12350   ins_pc_relative(1);
12351   ins_short_branch(1);
12352 %}
12353 
12354 // Jump Direct Conditional - using unsigned comparison
12355 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12356   match(If cop cmp);
12357   effect(USE labl);
12358 
12359   ins_cost(300);
12360   format %{ "j$cop,us  $labl" %}
12361   size(2);
12362   opcode(0x70);
12363   ins_encode(JccShort(cop, labl));
12364   ins_pipe(pipe_jcc);
12365   ins_pc_relative(1);
12366   ins_short_branch(1);
12367 %}
12368 
12369 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12370   match(If cop cmp);
12371   effect(USE labl);
12372 
12373   ins_cost(300);
12374   format %{ "j$cop,us  $labl" %}
12375   size(2);
12376   opcode(0x70);
12377   ins_encode(JccShort(cop, labl));
12378   ins_pipe(pipe_jcc);
12379   ins_pc_relative(1);
12380   ins_short_branch(1);
12381 %}
12382 
12383 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12384   match(If cop cmp);
12385   effect(USE labl);
12386 
12387   ins_cost(300);
12388   format %{ $$template
12389     if ($cop$$cmpcode == Assembler::notEqual) {
12390       $$emit$$"jp,u,s   $labl\n\t"
12391       $$emit$$"j$cop,u,s   $labl"
12392     } else {
12393       $$emit$$"jp,u,s   done\n\t"
12394       $$emit$$"j$cop,u,s  $labl\n\t"
12395       $$emit$$"done:"
12396     }
12397   %}
12398   size(4);
12399   opcode(0x70);
12400   ins_encode %{
12401     Label* l = $labl$$label;
12402     emit_cc(cbuf, $primary, Assembler::parity);
12403     int parity_disp = -1;
12404     if ($cop$$cmpcode == Assembler::notEqual) {
12405       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12406     } else if ($cop$$cmpcode == Assembler::equal) {
12407       parity_disp = 2;
12408     } else {
12409       ShouldNotReachHere();
12410     }
12411     emit_d8(cbuf, parity_disp);
12412     emit_cc(cbuf, $primary, $cop$$cmpcode);
12413     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12414     emit_d8(cbuf, disp);
12415     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12416     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12417   %}
12418   ins_pipe(pipe_jcc);
12419   ins_pc_relative(1);
12420   ins_short_branch(1);
12421 %}
12422 
12423 // ============================================================================
12424 // inlined locking and unlocking
12425 
12426 instruct cmpFastLock(rFlagsReg cr,
12427                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12428 %{
12429   match(Set cr (FastLock object box));
12430   effect(TEMP tmp, TEMP scr);
12431 
12432   ins_cost(300);
12433   format %{ "fastlock $object,$box,$tmp,$scr" %}
12434   ins_encode(Fast_Lock(object, box, tmp, scr));
12435   ins_pipe(pipe_slow);
12436   ins_pc_relative(1);
12437 %}
12438 
12439 instruct cmpFastUnlock(rFlagsReg cr,
12440                        rRegP object, rax_RegP box, rRegP tmp)
12441 %{
12442   match(Set cr (FastUnlock object box));
12443   effect(TEMP tmp);
12444 
12445   ins_cost(300);
12446   format %{ "fastunlock $object, $box, $tmp" %}
12447   ins_encode(Fast_Unlock(object, box, tmp));
12448   ins_pipe(pipe_slow);
12449   ins_pc_relative(1);
12450 %}
12451 
12452 
12453 // ============================================================================
12454 // Safepoint Instructions
12455 instruct safePoint_poll(rFlagsReg cr)
12456 %{
12457   predicate(!Assembler::is_polling_page_far());
12458   match(SafePoint);
12459   effect(KILL cr);
12460 
12461   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12462             "# Safepoint: poll for GC" %}
12463   ins_cost(125);
12464   ins_encode %{
12465     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12466     __ testl(rax, addr);
12467   %}
12468   ins_pipe(ialu_reg_mem);
12469 %}
12470 
12471 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12472 %{
12473   predicate(Assembler::is_polling_page_far());
12474   match(SafePoint poll);
12475   effect(KILL cr, USE poll);
12476 
12477   format %{ "testl  rax, [$poll]\t"
12478             "# Safepoint: poll for GC" %}
12479   ins_cost(125);
12480   ins_encode %{
12481     __ relocate(relocInfo::poll_type);
12482     __ testl(rax, Address($poll$$Register, 0));
12483   %}
12484   ins_pipe(ialu_reg_mem);
12485 %}
12486 
12487 // ============================================================================
12488 // Procedure Call/Return Instructions
12489 // Call Java Static Instruction
12490 // Note: If this code changes, the corresponding ret_addr_offset() and
12491 //       compute_padding() functions will have to be adjusted.
12492 instruct CallStaticJavaDirect(method meth) %{
12493   match(CallStaticJava);
12494   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12495   effect(USE meth);
12496 
12497   ins_cost(300);
12498   format %{ "call,static " %}
12499   opcode(0xE8); /* E8 cd */
12500   ins_encode(Java_Static_Call(meth), call_epilog);
12501   ins_pipe(pipe_slow);
12502   ins_pc_relative(1);
12503   ins_alignment(4);
12504 %}
12505 
12506 // Call Java Static Instruction (method handle version)
12507 // Note: If this code changes, the corresponding ret_addr_offset() and
12508 //       compute_padding() functions will have to be adjusted.
12509 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12510   match(CallStaticJava);
12511   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12512   effect(USE meth);
12513   // RBP is saved by all callees (for interpreter stack correction).
12514   // We use it here for a similar purpose, in {preserve,restore}_SP.
12515 
12516   ins_cost(300);
12517   format %{ "call,static/MethodHandle " %}
12518   opcode(0xE8); /* E8 cd */
12519   ins_encode(preserve_SP,
12520              Java_Static_Call(meth),
12521              restore_SP,
12522              call_epilog);
12523   ins_pipe(pipe_slow);
12524   ins_pc_relative(1);
12525   ins_alignment(4);
12526 %}
12527 
12528 // Call Java Dynamic Instruction
12529 // Note: If this code changes, the corresponding ret_addr_offset() and
12530 //       compute_padding() functions will have to be adjusted.
12531 instruct CallDynamicJavaDirect(method meth)
12532 %{
12533   match(CallDynamicJava);
12534   effect(USE meth);
12535 
12536   ins_cost(300);
12537   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12538             "call,dynamic " %}
12539   opcode(0xE8); /* E8 cd */
12540   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12541   ins_pipe(pipe_slow);
12542   ins_pc_relative(1);
12543   ins_alignment(4);
12544 %}
12545 
12546 // Call Runtime Instruction
12547 instruct CallRuntimeDirect(method meth)
12548 %{
12549   match(CallRuntime);
12550   effect(USE meth);
12551 
12552   ins_cost(300);
12553   format %{ "call,runtime " %}
12554   opcode(0xE8); /* E8 cd */
12555   ins_encode(Java_To_Runtime(meth));
12556   ins_pipe(pipe_slow);
12557   ins_pc_relative(1);
12558 %}
12559 
12560 // Call runtime without safepoint
12561 instruct CallLeafDirect(method meth)
12562 %{
12563   match(CallLeaf);
12564   effect(USE meth);
12565 
12566   ins_cost(300);
12567   format %{ "call_leaf,runtime " %}
12568   opcode(0xE8); /* E8 cd */
12569   ins_encode(Java_To_Runtime(meth));
12570   ins_pipe(pipe_slow);
12571   ins_pc_relative(1);
12572 %}
12573 
12574 // Call runtime without safepoint
12575 instruct CallLeafNoFPDirect(method meth)
12576 %{
12577   match(CallLeafNoFP);
12578   effect(USE meth);
12579 
12580   ins_cost(300);
12581   format %{ "call_leaf_nofp,runtime " %}
12582   opcode(0xE8); /* E8 cd */
12583   ins_encode(Java_To_Runtime(meth));
12584   ins_pipe(pipe_slow);
12585   ins_pc_relative(1);
12586 %}
12587 
12588 // Return Instruction
12589 // Remove the return address & jump to it.
12590 // Notice: We always emit a nop after a ret to make sure there is room
12591 // for safepoint patching
12592 instruct Ret()
12593 %{
12594   match(Return);
12595 
12596   format %{ "ret" %}
12597   opcode(0xC3);
12598   ins_encode(OpcP);
12599   ins_pipe(pipe_jmp);
12600 %}
12601 
12602 // Tail Call; Jump from runtime stub to Java code.
12603 // Also known as an 'interprocedural jump'.
12604 // Target of jump will eventually return to caller.
12605 // TailJump below removes the return address.
12606 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12607 %{
12608   match(TailCall jump_target method_oop);
12609 
12610   ins_cost(300);
12611   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12612   opcode(0xFF, 0x4); /* Opcode FF /4 */
12613   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12614   ins_pipe(pipe_jmp);
12615 %}
12616 
12617 // Tail Jump; remove the return address; jump to target.
12618 // TailCall above leaves the return address around.
12619 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12620 %{
12621   match(TailJump jump_target ex_oop);
12622 
12623   ins_cost(300);
12624   format %{ "popq    rdx\t# pop return address\n\t"
12625             "jmp     $jump_target" %}
12626   opcode(0xFF, 0x4); /* Opcode FF /4 */
12627   ins_encode(Opcode(0x5a), // popq rdx
12628              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12629   ins_pipe(pipe_jmp);
12630 %}
12631 
12632 // Create exception oop: created by stack-crawling runtime code.
12633 // Created exception is now available to this handler, and is setup
12634 // just prior to jumping to this handler.  No code emitted.
12635 instruct CreateException(rax_RegP ex_oop)
12636 %{
12637   match(Set ex_oop (CreateEx));
12638 
12639   size(0);
12640   // use the following format syntax
12641   format %{ "# exception oop is in rax; no code emitted" %}
12642   ins_encode();
12643   ins_pipe(empty);
12644 %}
12645 
12646 // Rethrow exception:
12647 // The exception oop will come in the first argument position.
12648 // Then JUMP (not call) to the rethrow stub code.
12649 instruct RethrowException()
12650 %{
12651   match(Rethrow);
12652 
12653   // use the following format syntax
12654   format %{ "jmp     rethrow_stub" %}
12655   ins_encode(enc_rethrow);
12656   ins_pipe(pipe_jmp);
12657 %}
12658 
12659 
12660 //----------PEEPHOLE RULES-----------------------------------------------------
12661 // These must follow all instruction definitions as they use the names
12662 // defined in the instructions definitions.
12663 //
12664 // peepmatch ( root_instr_name [preceding_instruction]* );
12665 //
12666 // peepconstraint %{
12667 // (instruction_number.operand_name relational_op instruction_number.operand_name
12668 //  [, ...] );
12669 // // instruction numbers are zero-based using left to right order in peepmatch
12670 //
12671 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12672 // // provide an instruction_number.operand_name for each operand that appears
12673 // // in the replacement instruction's match rule
12674 //
12675 // ---------VM FLAGS---------------------------------------------------------
12676 //
12677 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12678 //
12679 // Each peephole rule is given an identifying number starting with zero and
12680 // increasing by one in the order seen by the parser.  An individual peephole
12681 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12682 // on the command-line.
12683 //
12684 // ---------CURRENT LIMITATIONS----------------------------------------------
12685 //
12686 // Only match adjacent instructions in same basic block
12687 // Only equality constraints
12688 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12689 // Only one replacement instruction
12690 //
12691 // ---------EXAMPLE----------------------------------------------------------
12692 //
12693 // // pertinent parts of existing instructions in architecture description
12694 // instruct movI(rRegI dst, rRegI src)
12695 // %{
12696 //   match(Set dst (CopyI src));
12697 // %}
12698 //
12699 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12700 // %{
12701 //   match(Set dst (AddI dst src));
12702 //   effect(KILL cr);
12703 // %}
12704 //
12705 // // Change (inc mov) to lea
12706 // peephole %{
12707 //   // increment preceeded by register-register move
12708 //   peepmatch ( incI_rReg movI );
12709 //   // require that the destination register of the increment
12710 //   // match the destination register of the move
12711 //   peepconstraint ( 0.dst == 1.dst );
12712 //   // construct a replacement instruction that sets
12713 //   // the destination to ( move's source register + one )
12714 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12715 // %}
12716 //
12717 
12718 // Implementation no longer uses movX instructions since
12719 // machine-independent system no longer uses CopyX nodes.
12720 //
12721 // peephole
12722 // %{
12723 //   peepmatch (incI_rReg movI);
12724 //   peepconstraint (0.dst == 1.dst);
12725 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12726 // %}
12727 
12728 // peephole
12729 // %{
12730 //   peepmatch (decI_rReg movI);
12731 //   peepconstraint (0.dst == 1.dst);
12732 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12733 // %}
12734 
12735 // peephole
12736 // %{
12737 //   peepmatch (addI_rReg_imm movI);
12738 //   peepconstraint (0.dst == 1.dst);
12739 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12740 // %}
12741 
12742 // peephole
12743 // %{
12744 //   peepmatch (incL_rReg movL);
12745 //   peepconstraint (0.dst == 1.dst);
12746 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12747 // %}
12748 
12749 // peephole
12750 // %{
12751 //   peepmatch (decL_rReg movL);
12752 //   peepconstraint (0.dst == 1.dst);
12753 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12754 // %}
12755 
12756 // peephole
12757 // %{
12758 //   peepmatch (addL_rReg_imm movL);
12759 //   peepconstraint (0.dst == 1.dst);
12760 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12761 // %}
12762 
12763 // peephole
12764 // %{
12765 //   peepmatch (addP_rReg_imm movP);
12766 //   peepconstraint (0.dst == 1.dst);
12767 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12768 // %}
12769 
12770 // // Change load of spilled value to only a spill
12771 // instruct storeI(memory mem, rRegI src)
12772 // %{
12773 //   match(Set mem (StoreI mem src));
12774 // %}
12775 //
12776 // instruct loadI(rRegI dst, memory mem)
12777 // %{
12778 //   match(Set dst (LoadI mem));
12779 // %}
12780 //
12781 
12782 peephole
12783 %{
12784   peepmatch (loadI storeI);
12785   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12786   peepreplace (storeI(1.mem 1.mem 1.src));
12787 %}
12788 
12789 peephole
12790 %{
12791   peepmatch (loadL storeL);
12792   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12793   peepreplace (storeL(1.mem 1.mem 1.src));
12794 %}
12795 
12796 //----------SMARTSPILL RULES---------------------------------------------------
12797 // These must follow all instruction definitions as they use the names
12798 // defined in the instructions definitions.