1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 // This could be in MacroAssembler but it's fairly C2 specific
 834 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 835   Label exit;
 836   __ jccb(Assembler::noParity, exit);
 837   __ pushf();
 838   __ andq(Address(rsp, 0), 0xffffff2b);
 839   __ popf();
 840   __ bind(exit);
 841   __ nop(); // (target for branch to avoid branch to branch)
 842 }
 843 
 844 
 845 //=============================================================================
 846 const bool Matcher::constant_table_absolute_addressing = true;
 847 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 848 
 849 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 850   // Empty encoding
 851 }
 852 
 853 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 854   return 0;
 855 }
 856 
 857 #ifndef PRODUCT
 858 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 859   st->print("# MachConstantBaseNode (empty encoding)");
 860 }
 861 #endif
 862 
 863 
 864 //=============================================================================
 865 #ifndef PRODUCT
 866 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 867 {
 868   Compile* C = ra_->C;
 869 
 870   int framesize = C->frame_slots() << LogBytesPerInt;
 871   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 872   // Remove wordSize for return adr already pushed
 873   // and another for the RBP we are going to save
 874   framesize -= 2*wordSize;
 875   bool need_nop = true;
 876 
 877   // Calls to C2R adapters often do not accept exceptional returns.
 878   // We require that their callers must bang for them.  But be
 879   // careful, because some VM calls (such as call site linkage) can
 880   // use several kilobytes of stack.  But the stack safety zone should
 881   // account for that.  See bugs 4446381, 4468289, 4497237.
 882   if (C->need_stack_bang(framesize)) {
 883     st->print_cr("# stack bang"); st->print("\t");
 884     need_nop = false;
 885   }
 886   st->print_cr("pushq   rbp"); st->print("\t");
 887 
 888   if (VerifyStackAtCalls) {
 889     // Majik cookie to verify stack depth
 890     st->print_cr("pushq   0xffffffffbadb100d"
 891                   "\t# Majik cookie for stack depth check");
 892     st->print("\t");
 893     framesize -= wordSize; // Remove 2 for cookie
 894     need_nop = false;
 895   }
 896 
 897   if (framesize) {
 898     st->print("subq    rsp, #%d\t# Create frame", framesize);
 899     if (framesize < 0x80 && need_nop) {
 900       st->print("\n\tnop\t# nop for patch_verified_entry");
 901     }
 902   }
 903 }
 904 #endif
 905 
 906 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 907 {
 908   Compile* C = ra_->C;
 909 
 910   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 911   // NativeJump::patch_verified_entry will be able to patch out the entry
 912   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 913   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 914   // 6 bytes. So if we don't do the fldcw or the push then we must
 915   // use the 6 byte frame allocation even if we have no frame. :-(
 916   // If method sets FPU control word do it now
 917 
 918   int framesize = C->frame_slots() << LogBytesPerInt;
 919   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 920   // Remove wordSize for return adr already pushed
 921   // and another for the RBP we are going to save
 922   framesize -= 2*wordSize;
 923   bool need_nop = true;
 924 
 925   // Calls to C2R adapters often do not accept exceptional returns.
 926   // We require that their callers must bang for them.  But be
 927   // careful, because some VM calls (such as call site linkage) can
 928   // use several kilobytes of stack.  But the stack safety zone should
 929   // account for that.  See bugs 4446381, 4468289, 4497237.
 930   if (C->need_stack_bang(framesize)) {
 931     MacroAssembler masm(&cbuf);
 932     masm.generate_stack_overflow_check(framesize);
 933     need_nop = false;
 934   }
 935 
 936   // We always push rbp so that on return to interpreter rbp will be
 937   // restored correctly and we can correct the stack.
 938   emit_opcode(cbuf, 0x50 | RBP_enc);
 939 
 940   if (VerifyStackAtCalls) {
 941     // Majik cookie to verify stack depth
 942     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 943     emit_d32(cbuf, 0xbadb100d);
 944     framesize -= wordSize; // Remove 2 for cookie
 945     need_nop = false;
 946   }
 947 
 948   if (framesize) {
 949     emit_opcode(cbuf, Assembler::REX_W);
 950     if (framesize < 0x80) {
 951       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 952       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 953       emit_d8(cbuf, framesize);
 954       if (need_nop) {
 955         emit_opcode(cbuf, 0x90); // nop
 956       }
 957     } else {
 958       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 959       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 960       emit_d32(cbuf, framesize);
 961     }
 962   }
 963 
 964   C->set_frame_complete(cbuf.insts_size());
 965 
 966 #ifdef ASSERT
 967   if (VerifyStackAtCalls) {
 968     Label L;
 969     MacroAssembler masm(&cbuf);
 970     masm.push(rax);
 971     masm.mov(rax, rsp);
 972     masm.andptr(rax, StackAlignmentInBytes-1);
 973     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 974     masm.pop(rax);
 975     masm.jcc(Assembler::equal, L);
 976     masm.stop("Stack is not properly aligned!");
 977     masm.bind(L);
 978   }
 979 #endif
 980 }
 981 
 982 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 983 {
 984   return MachNode::size(ra_); // too many variables; just compute it
 985                               // the hard way
 986 }
 987 
 988 int MachPrologNode::reloc() const
 989 {
 990   return 0; // a large enough number
 991 }
 992 
 993 //=============================================================================
 994 #ifndef PRODUCT
 995 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 996 {
 997   Compile* C = ra_->C;
 998   int framesize = C->frame_slots() << LogBytesPerInt;
 999   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1000   // Remove word for return adr already pushed
1001   // and RBP
1002   framesize -= 2*wordSize;
1003 
1004   if (framesize) {
1005     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
1006     st->print("\t");
1007   }
1008 
1009   st->print_cr("popq   rbp");
1010   if (do_polling() && C->is_method_compilation()) {
1011     st->print("\t");
1012     if (Assembler::is_polling_page_far()) {
1013       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1014                    "testl  rax, [rscratch1]\t"
1015                    "# Safepoint: poll for GC");
1016     } else {
1017       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1018                    "# Safepoint: poll for GC");
1019     }
1020   }
1021 }
1022 #endif
1023 
1024 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1025 {
1026   Compile* C = ra_->C;
1027   int framesize = C->frame_slots() << LogBytesPerInt;
1028   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1029   // Remove word for return adr already pushed
1030   // and RBP
1031   framesize -= 2*wordSize;
1032 
1033   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1034 
1035   if (framesize) {
1036     emit_opcode(cbuf, Assembler::REX_W);
1037     if (framesize < 0x80) {
1038       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1039       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1040       emit_d8(cbuf, framesize);
1041     } else {
1042       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1043       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1044       emit_d32(cbuf, framesize);
1045     }
1046   }
1047 
1048   // popq rbp
1049   emit_opcode(cbuf, 0x58 | RBP_enc);
1050 
1051   if (do_polling() && C->is_method_compilation()) {
1052     MacroAssembler _masm(&cbuf);
1053     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1054     if (Assembler::is_polling_page_far()) {
1055       __ lea(rscratch1, polling_page);
1056       __ relocate(relocInfo::poll_return_type);
1057       __ testl(rax, Address(rscratch1, 0));
1058     } else {
1059       __ testl(rax, polling_page);
1060     }
1061   }
1062 }
1063 
1064 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1065 {
1066   return MachNode::size(ra_); // too many variables; just compute it
1067                               // the hard way
1068 }
1069 
1070 int MachEpilogNode::reloc() const
1071 {
1072   return 2; // a large enough number
1073 }
1074 
1075 const Pipeline* MachEpilogNode::pipeline() const
1076 {
1077   return MachNode::pipeline_class();
1078 }
1079 
1080 int MachEpilogNode::safepoint_offset() const
1081 {
1082   return 0;
1083 }
1084 
1085 //=============================================================================
1086 
1087 enum RC {
1088   rc_bad,
1089   rc_int,
1090   rc_float,
1091   rc_stack
1092 };
1093 
1094 static enum RC rc_class(OptoReg::Name reg)
1095 {
1096   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1097 
1098   if (OptoReg::is_stack(reg)) return rc_stack;
1099 
1100   VMReg r = OptoReg::as_VMReg(reg);
1101 
1102   if (r->is_Register()) return rc_int;
1103 
1104   assert(r->is_XMMRegister(), "must be");
1105   return rc_float;
1106 }
1107 
1108 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1109                                        PhaseRegAlloc* ra_,
1110                                        bool do_size,
1111                                        outputStream* st) const
1112 {
1113 
1114   // Get registers to move
1115   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1116   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1117   OptoReg::Name dst_second = ra_->get_reg_second(this);
1118   OptoReg::Name dst_first = ra_->get_reg_first(this);
1119 
1120   enum RC src_second_rc = rc_class(src_second);
1121   enum RC src_first_rc = rc_class(src_first);
1122   enum RC dst_second_rc = rc_class(dst_second);
1123   enum RC dst_first_rc = rc_class(dst_first);
1124 
1125   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1126          "must move at least 1 register" );
1127 
1128   if (src_first == dst_first && src_second == dst_second) {
1129     // Self copy, no move
1130     return 0;
1131   } else if (src_first_rc == rc_stack) {
1132     // mem ->
1133     if (dst_first_rc == rc_stack) {
1134       // mem -> mem
1135       assert(src_second != dst_first, "overlap");
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int src_offset = ra_->reg2offset(src_first);
1140         int dst_offset = ra_->reg2offset(dst_first);
1141         if (cbuf) {
1142           emit_opcode(*cbuf, 0xFF);
1143           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1144 
1145           emit_opcode(*cbuf, 0x8F);
1146           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1147 
1148 #ifndef PRODUCT
1149         } else if (!do_size) {
1150           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1151                      "popq    [rsp + #%d]",
1152                      src_offset,
1153                      dst_offset);
1154 #endif
1155         }
1156         return
1157           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1158           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1159       } else {
1160         // 32-bit
1161         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1162         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1163         // No pushl/popl, so:
1164         int src_offset = ra_->reg2offset(src_first);
1165         int dst_offset = ra_->reg2offset(dst_first);
1166         if (cbuf) {
1167           emit_opcode(*cbuf, Assembler::REX_W);
1168           emit_opcode(*cbuf, 0x89);
1169           emit_opcode(*cbuf, 0x44);
1170           emit_opcode(*cbuf, 0x24);
1171           emit_opcode(*cbuf, 0xF8);
1172 
1173           emit_opcode(*cbuf, 0x8B);
1174           encode_RegMem(*cbuf,
1175                         RAX_enc,
1176                         RSP_enc, 0x4, 0, src_offset,
1177                         false);
1178 
1179           emit_opcode(*cbuf, 0x89);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, dst_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, Assembler::REX_W);
1186           emit_opcode(*cbuf, 0x8B);
1187           emit_opcode(*cbuf, 0x44);
1188           emit_opcode(*cbuf, 0x24);
1189           emit_opcode(*cbuf, 0xF8);
1190 
1191 #ifndef PRODUCT
1192         } else if (!do_size) {
1193           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1194                      "movl    rax, [rsp + #%d]\n\t"
1195                      "movl    [rsp + #%d], rax\n\t"
1196                      "movq    rax, [rsp - #8]",
1197                      src_offset,
1198                      dst_offset);
1199 #endif
1200         }
1201         return
1202           5 + // movq
1203           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1204           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1205           5; // movq
1206       }
1207     } else if (dst_first_rc == rc_int) {
1208       // mem -> gpr
1209       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1210           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1211         // 64-bit
1212         int offset = ra_->reg2offset(src_first);
1213         if (cbuf) {
1214           if (Matcher::_regEncode[dst_first] < 8) {
1215             emit_opcode(*cbuf, Assembler::REX_W);
1216           } else {
1217             emit_opcode(*cbuf, Assembler::REX_WR);
1218           }
1219           emit_opcode(*cbuf, 0x8B);
1220           encode_RegMem(*cbuf,
1221                         Matcher::_regEncode[dst_first],
1222                         RSP_enc, 0x4, 0, offset,
1223                         false);
1224 #ifndef PRODUCT
1225         } else if (!do_size) {
1226           st->print("movq    %s, [rsp + #%d]\t# spill",
1227                      Matcher::regName[dst_first],
1228                      offset);
1229 #endif
1230         }
1231         return
1232           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1233       } else {
1234         // 32-bit
1235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1237         int offset = ra_->reg2offset(src_first);
1238         if (cbuf) {
1239           if (Matcher::_regEncode[dst_first] >= 8) {
1240             emit_opcode(*cbuf, Assembler::REX_R);
1241           }
1242           emit_opcode(*cbuf, 0x8B);
1243           encode_RegMem(*cbuf,
1244                         Matcher::_regEncode[dst_first],
1245                         RSP_enc, 0x4, 0, offset,
1246                         false);
1247 #ifndef PRODUCT
1248         } else if (!do_size) {
1249           st->print("movl    %s, [rsp + #%d]\t# spill",
1250                      Matcher::regName[dst_first],
1251                      offset);
1252 #endif
1253         }
1254         return
1255           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1256           ((Matcher::_regEncode[dst_first] < 8)
1257            ? 3
1258            : 4); // REX
1259       }
1260     } else if (dst_first_rc == rc_float) {
1261       // mem-> xmm
1262       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1263           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1264         // 64-bit
1265         int offset = ra_->reg2offset(src_first);
1266         if (cbuf) {
1267           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1268           if (Matcher::_regEncode[dst_first] >= 8) {
1269             emit_opcode(*cbuf, Assembler::REX_R);
1270           }
1271           emit_opcode(*cbuf, 0x0F);
1272           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1273           encode_RegMem(*cbuf,
1274                         Matcher::_regEncode[dst_first],
1275                         RSP_enc, 0x4, 0, offset,
1276                         false);
1277 #ifndef PRODUCT
1278         } else if (!do_size) {
1279           st->print("%s  %s, [rsp + #%d]\t# spill",
1280                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1281                      Matcher::regName[dst_first],
1282                      offset);
1283 #endif
1284         }
1285         return
1286           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1287           ((Matcher::_regEncode[dst_first] < 8)
1288            ? 5
1289            : 6); // REX
1290       } else {
1291         // 32-bit
1292         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1293         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1294         int offset = ra_->reg2offset(src_first);
1295         if (cbuf) {
1296           emit_opcode(*cbuf, 0xF3);
1297           if (Matcher::_regEncode[dst_first] >= 8) {
1298             emit_opcode(*cbuf, Assembler::REX_R);
1299           }
1300           emit_opcode(*cbuf, 0x0F);
1301           emit_opcode(*cbuf, 0x10);
1302           encode_RegMem(*cbuf,
1303                         Matcher::_regEncode[dst_first],
1304                         RSP_enc, 0x4, 0, offset,
1305                         false);
1306 #ifndef PRODUCT
1307         } else if (!do_size) {
1308           st->print("movss   %s, [rsp + #%d]\t# spill",
1309                      Matcher::regName[dst_first],
1310                      offset);
1311 #endif
1312         }
1313         return
1314           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1315           ((Matcher::_regEncode[dst_first] < 8)
1316            ? 5
1317            : 6); // REX
1318       }
1319     }
1320   } else if (src_first_rc == rc_int) {
1321     // gpr ->
1322     if (dst_first_rc == rc_stack) {
1323       // gpr -> mem
1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1326         // 64-bit
1327         int offset = ra_->reg2offset(dst_first);
1328         if (cbuf) {
1329           if (Matcher::_regEncode[src_first] < 8) {
1330             emit_opcode(*cbuf, Assembler::REX_W);
1331           } else {
1332             emit_opcode(*cbuf, Assembler::REX_WR);
1333           }
1334           emit_opcode(*cbuf, 0x89);
1335           encode_RegMem(*cbuf,
1336                         Matcher::_regEncode[src_first],
1337                         RSP_enc, 0x4, 0, offset,
1338                         false);
1339 #ifndef PRODUCT
1340         } else if (!do_size) {
1341           st->print("movq    [rsp + #%d], %s\t# spill",
1342                      offset,
1343                      Matcher::regName[src_first]);
1344 #endif
1345         }
1346         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1347       } else {
1348         // 32-bit
1349         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1350         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1351         int offset = ra_->reg2offset(dst_first);
1352         if (cbuf) {
1353           if (Matcher::_regEncode[src_first] >= 8) {
1354             emit_opcode(*cbuf, Assembler::REX_R);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movl    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return
1369           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1370           ((Matcher::_regEncode[src_first] < 8)
1371            ? 3
1372            : 4); // REX
1373       }
1374     } else if (dst_first_rc == rc_int) {
1375       // gpr -> gpr
1376       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1377           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1378         // 64-bit
1379         if (cbuf) {
1380           if (Matcher::_regEncode[dst_first] < 8) {
1381             if (Matcher::_regEncode[src_first] < 8) {
1382               emit_opcode(*cbuf, Assembler::REX_W);
1383             } else {
1384               emit_opcode(*cbuf, Assembler::REX_WB);
1385             }
1386           } else {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_WR);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WRB);
1391             }
1392           }
1393           emit_opcode(*cbuf, 0x8B);
1394           emit_rm(*cbuf, 0x3,
1395                   Matcher::_regEncode[dst_first] & 7,
1396                   Matcher::_regEncode[src_first] & 7);
1397 #ifndef PRODUCT
1398         } else if (!do_size) {
1399           st->print("movq    %s, %s\t# spill",
1400                      Matcher::regName[dst_first],
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404         return 3; // REX
1405       } else {
1406         // 32-bit
1407         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1408         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1409         if (cbuf) {
1410           if (Matcher::_regEncode[dst_first] < 8) {
1411             if (Matcher::_regEncode[src_first] >= 8) {
1412               emit_opcode(*cbuf, Assembler::REX_B);
1413             }
1414           } else {
1415             if (Matcher::_regEncode[src_first] < 8) {
1416               emit_opcode(*cbuf, Assembler::REX_R);
1417             } else {
1418               emit_opcode(*cbuf, Assembler::REX_RB);
1419             }
1420           }
1421           emit_opcode(*cbuf, 0x8B);
1422           emit_rm(*cbuf, 0x3,
1423                   Matcher::_regEncode[dst_first] & 7,
1424                   Matcher::_regEncode[src_first] & 7);
1425 #ifndef PRODUCT
1426         } else if (!do_size) {
1427           st->print("movl    %s, %s\t# spill",
1428                      Matcher::regName[dst_first],
1429                      Matcher::regName[src_first]);
1430 #endif
1431         }
1432         return
1433           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1434           ? 2
1435           : 3; // REX
1436       }
1437     } else if (dst_first_rc == rc_float) {
1438       // gpr -> xmm
1439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1441         // 64-bit
1442         if (cbuf) {
1443           emit_opcode(*cbuf, 0x66);
1444           if (Matcher::_regEncode[dst_first] < 8) {
1445             if (Matcher::_regEncode[src_first] < 8) {
1446               emit_opcode(*cbuf, Assembler::REX_W);
1447             } else {
1448               emit_opcode(*cbuf, Assembler::REX_WB);
1449             }
1450           } else {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_WR);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WRB);
1455             }
1456           }
1457           emit_opcode(*cbuf, 0x0F);
1458           emit_opcode(*cbuf, 0x6E);
1459           emit_rm(*cbuf, 0x3,
1460                   Matcher::_regEncode[dst_first] & 7,
1461                   Matcher::_regEncode[src_first] & 7);
1462 #ifndef PRODUCT
1463         } else if (!do_size) {
1464           st->print("movdq   %s, %s\t# spill",
1465                      Matcher::regName[dst_first],
1466                      Matcher::regName[src_first]);
1467 #endif
1468         }
1469         return 5; // REX
1470       } else {
1471         // 32-bit
1472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1474         if (cbuf) {
1475           emit_opcode(*cbuf, 0x66);
1476           if (Matcher::_regEncode[dst_first] < 8) {
1477             if (Matcher::_regEncode[src_first] >= 8) {
1478               emit_opcode(*cbuf, Assembler::REX_B);
1479             }
1480           } else {
1481             if (Matcher::_regEncode[src_first] < 8) {
1482               emit_opcode(*cbuf, Assembler::REX_R);
1483             } else {
1484               emit_opcode(*cbuf, Assembler::REX_RB);
1485             }
1486           }
1487           emit_opcode(*cbuf, 0x0F);
1488           emit_opcode(*cbuf, 0x6E);
1489           emit_rm(*cbuf, 0x3,
1490                   Matcher::_regEncode[dst_first] & 7,
1491                   Matcher::_regEncode[src_first] & 7);
1492 #ifndef PRODUCT
1493         } else if (!do_size) {
1494           st->print("movdl   %s, %s\t# spill",
1495                      Matcher::regName[dst_first],
1496                      Matcher::regName[src_first]);
1497 #endif
1498         }
1499         return
1500           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1501           ? 4
1502           : 5; // REX
1503       }
1504     }
1505   } else if (src_first_rc == rc_float) {
1506     // xmm ->
1507     if (dst_first_rc == rc_stack) {
1508       // xmm -> mem
1509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1511         // 64-bit
1512         int offset = ra_->reg2offset(dst_first);
1513         if (cbuf) {
1514           emit_opcode(*cbuf, 0xF2);
1515           if (Matcher::_regEncode[src_first] >= 8) {
1516               emit_opcode(*cbuf, Assembler::REX_R);
1517           }
1518           emit_opcode(*cbuf, 0x0F);
1519           emit_opcode(*cbuf, 0x11);
1520           encode_RegMem(*cbuf,
1521                         Matcher::_regEncode[src_first],
1522                         RSP_enc, 0x4, 0, offset,
1523                         false);
1524 #ifndef PRODUCT
1525         } else if (!do_size) {
1526           st->print("movsd   [rsp + #%d], %s\t# spill",
1527                      offset,
1528                      Matcher::regName[src_first]);
1529 #endif
1530         }
1531         return
1532           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1533           ((Matcher::_regEncode[src_first] < 8)
1534            ? 5
1535            : 6); // REX
1536       } else {
1537         // 32-bit
1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1540         int offset = ra_->reg2offset(dst_first);
1541         if (cbuf) {
1542           emit_opcode(*cbuf, 0xF3);
1543           if (Matcher::_regEncode[src_first] >= 8) {
1544               emit_opcode(*cbuf, Assembler::REX_R);
1545           }
1546           emit_opcode(*cbuf, 0x0F);
1547           emit_opcode(*cbuf, 0x11);
1548           encode_RegMem(*cbuf,
1549                         Matcher::_regEncode[src_first],
1550                         RSP_enc, 0x4, 0, offset,
1551                         false);
1552 #ifndef PRODUCT
1553         } else if (!do_size) {
1554           st->print("movss   [rsp + #%d], %s\t# spill",
1555                      offset,
1556                      Matcher::regName[src_first]);
1557 #endif
1558         }
1559         return
1560           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1561           ((Matcher::_regEncode[src_first] < 8)
1562            ? 5
1563            : 6); // REX
1564       }
1565     } else if (dst_first_rc == rc_int) {
1566       // xmm -> gpr
1567       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1568           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1569         // 64-bit
1570         if (cbuf) {
1571           emit_opcode(*cbuf, 0x66);
1572           if (Matcher::_regEncode[dst_first] < 8) {
1573             if (Matcher::_regEncode[src_first] < 8) {
1574               emit_opcode(*cbuf, Assembler::REX_W);
1575             } else {
1576               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1577             }
1578           } else {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WRB);
1583             }
1584           }
1585           emit_opcode(*cbuf, 0x0F);
1586           emit_opcode(*cbuf, 0x7E);
1587           emit_rm(*cbuf, 0x3,
1588                   Matcher::_regEncode[src_first] & 7,
1589                   Matcher::_regEncode[dst_first] & 7);
1590 #ifndef PRODUCT
1591         } else if (!do_size) {
1592           st->print("movdq   %s, %s\t# spill",
1593                      Matcher::regName[dst_first],
1594                      Matcher::regName[src_first]);
1595 #endif
1596         }
1597         return 5; // REX
1598       } else {
1599         // 32-bit
1600         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1601         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1602         if (cbuf) {
1603           emit_opcode(*cbuf, 0x66);
1604           if (Matcher::_regEncode[dst_first] < 8) {
1605             if (Matcher::_regEncode[src_first] >= 8) {
1606               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1607             }
1608           } else {
1609             if (Matcher::_regEncode[src_first] < 8) {
1610               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1611             } else {
1612               emit_opcode(*cbuf, Assembler::REX_RB);
1613             }
1614           }
1615           emit_opcode(*cbuf, 0x0F);
1616           emit_opcode(*cbuf, 0x7E);
1617           emit_rm(*cbuf, 0x3,
1618                   Matcher::_regEncode[src_first] & 7,
1619                   Matcher::_regEncode[dst_first] & 7);
1620 #ifndef PRODUCT
1621         } else if (!do_size) {
1622           st->print("movdl   %s, %s\t# spill",
1623                      Matcher::regName[dst_first],
1624                      Matcher::regName[src_first]);
1625 #endif
1626         }
1627         return
1628           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1629           ? 4
1630           : 5; // REX
1631       }
1632     } else if (dst_first_rc == rc_float) {
1633       // xmm -> xmm
1634       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1635           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1636         // 64-bit
1637         if (cbuf) {
1638           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1639           if (Matcher::_regEncode[dst_first] < 8) {
1640             if (Matcher::_regEncode[src_first] >= 8) {
1641               emit_opcode(*cbuf, Assembler::REX_B);
1642             }
1643           } else {
1644             if (Matcher::_regEncode[src_first] < 8) {
1645               emit_opcode(*cbuf, Assembler::REX_R);
1646             } else {
1647               emit_opcode(*cbuf, Assembler::REX_RB);
1648             }
1649           }
1650           emit_opcode(*cbuf, 0x0F);
1651           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1652           emit_rm(*cbuf, 0x3,
1653                   Matcher::_regEncode[dst_first] & 7,
1654                   Matcher::_regEncode[src_first] & 7);
1655 #ifndef PRODUCT
1656         } else if (!do_size) {
1657           st->print("%s  %s, %s\t# spill",
1658                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1659                      Matcher::regName[dst_first],
1660                      Matcher::regName[src_first]);
1661 #endif
1662         }
1663         return
1664           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1665           ? 4
1666           : 5; // REX
1667       } else {
1668         // 32-bit
1669         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1670         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1671         if (cbuf) {
1672           if (!UseXmmRegToRegMoveAll)
1673             emit_opcode(*cbuf, 0xF3);
1674           if (Matcher::_regEncode[dst_first] < 8) {
1675             if (Matcher::_regEncode[src_first] >= 8) {
1676               emit_opcode(*cbuf, Assembler::REX_B);
1677             }
1678           } else {
1679             if (Matcher::_regEncode[src_first] < 8) {
1680               emit_opcode(*cbuf, Assembler::REX_R);
1681             } else {
1682               emit_opcode(*cbuf, Assembler::REX_RB);
1683             }
1684           }
1685           emit_opcode(*cbuf, 0x0F);
1686           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1687           emit_rm(*cbuf, 0x3,
1688                   Matcher::_regEncode[dst_first] & 7,
1689                   Matcher::_regEncode[src_first] & 7);
1690 #ifndef PRODUCT
1691         } else if (!do_size) {
1692           st->print("%s  %s, %s\t# spill",
1693                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1694                      Matcher::regName[dst_first],
1695                      Matcher::regName[src_first]);
1696 #endif
1697         }
1698         return
1699           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1700           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1701           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1702       }
1703     }
1704   }
1705 
1706   assert(0," foo ");
1707   Unimplemented();
1708 
1709   return 0;
1710 }
1711 
1712 #ifndef PRODUCT
1713 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1714 {
1715   implementation(NULL, ra_, false, st);
1716 }
1717 #endif
1718 
1719 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1720 {
1721   implementation(&cbuf, ra_, false, NULL);
1722 }
1723 
1724 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1725 {
1726   return implementation(NULL, ra_, true, NULL);
1727 }
1728 
1729 //=============================================================================
1730 #ifndef PRODUCT
1731 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1732 {
1733   st->print("nop \t# %d bytes pad for loops and calls", _count);
1734 }
1735 #endif
1736 
1737 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1738 {
1739   MacroAssembler _masm(&cbuf);
1740   __ nop(_count);
1741 }
1742 
1743 uint MachNopNode::size(PhaseRegAlloc*) const
1744 {
1745   return _count;
1746 }
1747 
1748 
1749 //=============================================================================
1750 #ifndef PRODUCT
1751 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1752 {
1753   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1754   int reg = ra_->get_reg_first(this);
1755   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1756             Matcher::regName[reg], offset);
1757 }
1758 #endif
1759 
1760 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1761 {
1762   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1763   int reg = ra_->get_encode(this);
1764   if (offset >= 0x80) {
1765     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1766     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1767     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1768     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1769     emit_d32(cbuf, offset);
1770   } else {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d8(cbuf, offset);
1776   }
1777 }
1778 
1779 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1780 {
1781   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1782   return (offset < 0x80) ? 5 : 8; // REX
1783 }
1784 
1785 //=============================================================================
1786 
1787 // emit call stub, compiled java to interpreter
1788 void emit_java_to_interp(CodeBuffer& cbuf)
1789 {
1790   // Stub is fixed up when the corresponding call is converted from
1791   // calling compiled code to calling interpreted code.
1792   // movq rbx, 0
1793   // jmp -5 # to self
1794 
1795   address mark = cbuf.insts_mark();  // get mark within main instrs section
1796 
1797   // Note that the code buffer's insts_mark is always relative to insts.
1798   // That's why we must use the macroassembler to generate a stub.
1799   MacroAssembler _masm(&cbuf);
1800 
1801   address base =
1802   __ start_a_stub(Compile::MAX_stubs_size);
1803   if (base == NULL)  return;  // CodeBuffer::expand failed
1804   // static stub relocation stores the instruction address of the call
1805   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1806   // static stub relocation also tags the methodOop in the code-stream.
1807   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1808   // This is recognized as unresolved by relocs/nativeinst/ic code
1809   __ jump(RuntimeAddress(__ pc()));
1810 
1811   // Update current stubs pointer and restore insts_end.
1812   __ end_a_stub();
1813 }
1814 
1815 // size of call stub, compiled java to interpretor
1816 uint size_java_to_interp()
1817 {
1818   return 15;  // movq (1+1+8); jmp (1+4)
1819 }
1820 
1821 // relocation entries for call stub, compiled java to interpretor
1822 uint reloc_java_to_interp()
1823 {
1824   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1825 }
1826 
1827 //=============================================================================
1828 #ifndef PRODUCT
1829 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1830 {
1831   if (UseCompressedOops) {
1832     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1833     if (Universe::narrow_oop_shift() != 0) {
1834       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1835     }
1836     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1837   } else {
1838     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1839                  "# Inline cache check");
1840   }
1841   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1842   st->print_cr("\tnop\t# nops to align entry point");
1843 }
1844 #endif
1845 
1846 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1847 {
1848   MacroAssembler masm(&cbuf);
1849   uint insts_size = cbuf.insts_size();
1850   if (UseCompressedOops) {
1851     masm.load_klass(rscratch1, j_rarg0);
1852     masm.cmpptr(rax, rscratch1);
1853   } else {
1854     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1855   }
1856 
1857   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1858 
1859   /* WARNING these NOPs are critical so that verified entry point is properly
1860      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1861   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1862   if (OptoBreakpoint) {
1863     // Leave space for int3
1864     nops_cnt -= 1;
1865   }
1866   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1867   if (nops_cnt > 0)
1868     masm.nop(nops_cnt);
1869 }
1870 
1871 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1872 {
1873   return MachNode::size(ra_); // too many variables; just compute it
1874                               // the hard way
1875 }
1876 
1877 
1878 //=============================================================================
1879 uint size_exception_handler()
1880 {
1881   // NativeCall instruction size is the same as NativeJump.
1882   // Note that this value is also credited (in output.cpp) to
1883   // the size of the code section.
1884   return NativeJump::instruction_size;
1885 }
1886 
1887 // Emit exception handler code.
1888 int emit_exception_handler(CodeBuffer& cbuf)
1889 {
1890 
1891   // Note that the code buffer's insts_mark is always relative to insts.
1892   // That's why we must use the macroassembler to generate a handler.
1893   MacroAssembler _masm(&cbuf);
1894   address base =
1895   __ start_a_stub(size_exception_handler());
1896   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1897   int offset = __ offset();
1898   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1899   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1900   __ end_a_stub();
1901   return offset;
1902 }
1903 
1904 uint size_deopt_handler()
1905 {
1906   // three 5 byte instructions
1907   return 15;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's insts_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_deopt_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   address the_pc = (address) __ pc();
1922   Label next;
1923   // push a "the_pc" on the stack without destroying any registers
1924   // as they all may be live.
1925 
1926   // push address of "next"
1927   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1928   __ bind(next);
1929   // adjust it so it matches "the_pc"
1930   __ subptr(Address(rsp, 0), __ offset() - offset);
1931   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1932   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1933   __ end_a_stub();
1934   return offset;
1935 }
1936 
1937 
1938 const bool Matcher::match_rule_supported(int opcode) {
1939   if (!has_match_rule(opcode))
1940     return false;
1941 
1942   return true;  // Per default match rules are supported.
1943 }
1944 
1945 int Matcher::regnum_to_fpu_offset(int regnum)
1946 {
1947   return regnum - 32; // The FP registers are in the second chunk
1948 }
1949 
1950 // This is UltraSparc specific, true just means we have fast l2f conversion
1951 const bool Matcher::convL2FSupported(void) {
1952   return true;
1953 }
1954 
1955 // Vector width in bytes
1956 const uint Matcher::vector_width_in_bytes(void) {
1957   return 8;
1958 }
1959 
1960 // Vector ideal reg
1961 const uint Matcher::vector_ideal_reg(void) {
1962   return Op_RegD;
1963 }
1964 
1965 // Is this branch offset short enough that a short branch can be used?
1966 //
1967 // NOTE: If the platform does not provide any short branch variants, then
1968 //       this method should return false for offset 0.
1969 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1970   // The passed offset is relative to address of the branch.
1971   // On 86 a branch displacement is calculated relative to address
1972   // of a next instruction.
1973   offset -= br_size;
1974 
1975   // the short version of jmpConUCF2 contains multiple branches,
1976   // making the reach slightly less
1977   if (rule == jmpConUCF2_rule)
1978     return (-126 <= offset && offset <= 125);
1979   return (-128 <= offset && offset <= 127);
1980 }
1981 
1982 const bool Matcher::isSimpleConstant64(jlong value) {
1983   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1984   //return value == (int) value;  // Cf. storeImmL and immL32.
1985 
1986   // Probably always true, even if a temp register is required.
1987   return true;
1988 }
1989 
1990 // The ecx parameter to rep stosq for the ClearArray node is in words.
1991 const bool Matcher::init_array_count_is_in_bytes = false;
1992 
1993 // Threshold size for cleararray.
1994 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1995 
1996 // Should the Matcher clone shifts on addressing modes, expecting them
1997 // to be subsumed into complex addressing expressions or compute them
1998 // into registers?  True for Intel but false for most RISCs
1999 const bool Matcher::clone_shift_expressions = true;
2000 
2001 // Do we need to mask the count passed to shift instructions or does
2002 // the cpu only look at the lower 5/6 bits anyway?
2003 const bool Matcher::need_masked_shift_count = false;
2004 
2005 bool Matcher::narrow_oop_use_complex_address() {
2006   assert(UseCompressedOops, "only for compressed oops code");
2007   return (LogMinObjAlignmentInBytes <= 3);
2008 }
2009 
2010 // Is it better to copy float constants, or load them directly from
2011 // memory?  Intel can load a float constant from a direct address,
2012 // requiring no extra registers.  Most RISCs will have to materialize
2013 // an address into a register first, so they would do better to copy
2014 // the constant from stack.
2015 const bool Matcher::rematerialize_float_constants = true; // XXX
2016 
2017 // If CPU can load and store mis-aligned doubles directly then no
2018 // fixup is needed.  Else we split the double into 2 integer pieces
2019 // and move it piece-by-piece.  Only happens when passing doubles into
2020 // C code as the Java calling convention forces doubles to be aligned.
2021 const bool Matcher::misaligned_doubles_ok = true;
2022 
2023 // No-op on amd64
2024 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2025 
2026 // Advertise here if the CPU requires explicit rounding operations to
2027 // implement the UseStrictFP mode.
2028 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2029 
2030 // Are floats conerted to double when stored to stack during deoptimization?
2031 // On x64 it is stored without convertion so we can use normal access.
2032 bool Matcher::float_in_double() { return false; }
2033 
2034 // Do ints take an entire long register or just half?
2035 const bool Matcher::int_in_long = true;
2036 
2037 // Return whether or not this register is ever used as an argument.
2038 // This function is used on startup to build the trampoline stubs in
2039 // generateOptoStub.  Registers not mentioned will be killed by the VM
2040 // call in the trampoline, and arguments in those registers not be
2041 // available to the callee.
2042 bool Matcher::can_be_java_arg(int reg)
2043 {
2044   return
2045     reg ==  RDI_num || reg ==  RDI_H_num ||
2046     reg ==  RSI_num || reg ==  RSI_H_num ||
2047     reg ==  RDX_num || reg ==  RDX_H_num ||
2048     reg ==  RCX_num || reg ==  RCX_H_num ||
2049     reg ==   R8_num || reg ==   R8_H_num ||
2050     reg ==   R9_num || reg ==   R9_H_num ||
2051     reg ==  R12_num || reg ==  R12_H_num ||
2052     reg == XMM0_num || reg == XMM0_H_num ||
2053     reg == XMM1_num || reg == XMM1_H_num ||
2054     reg == XMM2_num || reg == XMM2_H_num ||
2055     reg == XMM3_num || reg == XMM3_H_num ||
2056     reg == XMM4_num || reg == XMM4_H_num ||
2057     reg == XMM5_num || reg == XMM5_H_num ||
2058     reg == XMM6_num || reg == XMM6_H_num ||
2059     reg == XMM7_num || reg == XMM7_H_num;
2060 }
2061 
2062 bool Matcher::is_spillable_arg(int reg)
2063 {
2064   return can_be_java_arg(reg);
2065 }
2066 
2067 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2068   // In 64 bit mode a code which use multiply when
2069   // devisor is constant is faster than hardware
2070   // DIV instruction (it uses MulHiL).
2071   return false;
2072 }
2073 
2074 // Register for DIVI projection of divmodI
2075 RegMask Matcher::divI_proj_mask() {
2076   return INT_RAX_REG_mask;
2077 }
2078 
2079 // Register for MODI projection of divmodI
2080 RegMask Matcher::modI_proj_mask() {
2081   return INT_RDX_REG_mask;
2082 }
2083 
2084 // Register for DIVL projection of divmodL
2085 RegMask Matcher::divL_proj_mask() {
2086   return LONG_RAX_REG_mask;
2087 }
2088 
2089 // Register for MODL projection of divmodL
2090 RegMask Matcher::modL_proj_mask() {
2091   return LONG_RDX_REG_mask;
2092 }
2093 
2094 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2095   return PTR_RBP_REG_mask;
2096 }
2097 
2098 static Address build_address(int b, int i, int s, int d) {
2099   Register index = as_Register(i);
2100   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2101   if (index == rsp) {
2102     index = noreg;
2103     scale = Address::no_scale;
2104   }
2105   Address addr(as_Register(b), index, scale, d);
2106   return addr;
2107 }
2108 
2109 %}
2110 
2111 //----------ENCODING BLOCK-----------------------------------------------------
2112 // This block specifies the encoding classes used by the compiler to
2113 // output byte streams.  Encoding classes are parameterized macros
2114 // used by Machine Instruction Nodes in order to generate the bit
2115 // encoding of the instruction.  Operands specify their base encoding
2116 // interface with the interface keyword.  There are currently
2117 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2118 // COND_INTER.  REG_INTER causes an operand to generate a function
2119 // which returns its register number when queried.  CONST_INTER causes
2120 // an operand to generate a function which returns the value of the
2121 // constant when queried.  MEMORY_INTER causes an operand to generate
2122 // four functions which return the Base Register, the Index Register,
2123 // the Scale Value, and the Offset Value of the operand when queried.
2124 // COND_INTER causes an operand to generate six functions which return
2125 // the encoding code (ie - encoding bits for the instruction)
2126 // associated with each basic boolean condition for a conditional
2127 // instruction.
2128 //
2129 // Instructions specify two basic values for encoding.  Again, a
2130 // function is available to check if the constant displacement is an
2131 // oop. They use the ins_encode keyword to specify their encoding
2132 // classes (which must be a sequence of enc_class names, and their
2133 // parameters, specified in the encoding block), and they use the
2134 // opcode keyword to specify, in order, their primary, secondary, and
2135 // tertiary opcode.  Only the opcode sections which a particular
2136 // instruction needs for encoding need to be specified.
2137 encode %{
2138   // Build emit functions for each basic byte or larger field in the
2139   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2140   // from C++ code in the enc_class source block.  Emit functions will
2141   // live in the main source block for now.  In future, we can
2142   // generalize this by adding a syntax that specifies the sizes of
2143   // fields in an order, so that the adlc can build the emit functions
2144   // automagically
2145 
2146   // Emit primary opcode
2147   enc_class OpcP
2148   %{
2149     emit_opcode(cbuf, $primary);
2150   %}
2151 
2152   // Emit secondary opcode
2153   enc_class OpcS
2154   %{
2155     emit_opcode(cbuf, $secondary);
2156   %}
2157 
2158   // Emit tertiary opcode
2159   enc_class OpcT
2160   %{
2161     emit_opcode(cbuf, $tertiary);
2162   %}
2163 
2164   // Emit opcode directly
2165   enc_class Opcode(immI d8)
2166   %{
2167     emit_opcode(cbuf, $d8$$constant);
2168   %}
2169 
2170   // Emit size prefix
2171   enc_class SizePrefix
2172   %{
2173     emit_opcode(cbuf, 0x66);
2174   %}
2175 
2176   enc_class reg(rRegI reg)
2177   %{
2178     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2179   %}
2180 
2181   enc_class reg_reg(rRegI dst, rRegI src)
2182   %{
2183     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2184   %}
2185 
2186   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2187   %{
2188     emit_opcode(cbuf, $opcode$$constant);
2189     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2190   %}
2191 
2192   enc_class cmpfp_fixup() %{
2193       MacroAssembler _masm(&cbuf);
2194       emit_cmpfp_fixup(_masm);
2195   %}
2196 
2197   enc_class cmpfp3(rRegI dst)
2198   %{
2199     int dstenc = $dst$$reg;
2200 
2201     // movl $dst, -1
2202     if (dstenc >= 8) {
2203       emit_opcode(cbuf, Assembler::REX_B);
2204     }
2205     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2206     emit_d32(cbuf, -1);
2207 
2208     // jp,s done
2209     emit_opcode(cbuf, 0x7A);
2210     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2211 
2212     // jb,s done
2213     emit_opcode(cbuf, 0x72);
2214     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2215 
2216     // setne $dst
2217     if (dstenc >= 4) {
2218       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2219     }
2220     emit_opcode(cbuf, 0x0F);
2221     emit_opcode(cbuf, 0x95);
2222     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2223 
2224     // movzbl $dst, $dst
2225     if (dstenc >= 4) {
2226       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2227     }
2228     emit_opcode(cbuf, 0x0F);
2229     emit_opcode(cbuf, 0xB6);
2230     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2231   %}
2232 
2233   enc_class cdql_enc(no_rax_rdx_RegI div)
2234   %{
2235     // Full implementation of Java idiv and irem; checks for
2236     // special case as described in JVM spec., p.243 & p.271.
2237     //
2238     //         normal case                           special case
2239     //
2240     // input : rax: dividend                         min_int
2241     //         reg: divisor                          -1
2242     //
2243     // output: rax: quotient  (= rax idiv reg)       min_int
2244     //         rdx: remainder (= rax irem reg)       0
2245     //
2246     //  Code sequnce:
2247     //
2248     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2249     //    5:   75 07/08                jne    e <normal>
2250     //    7:   33 d2                   xor    %edx,%edx
2251     //  [div >= 8 -> offset + 1]
2252     //  [REX_B]
2253     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2254     //    c:   74 03/04                je     11 <done>
2255     // 000000000000000e <normal>:
2256     //    e:   99                      cltd
2257     //  [div >= 8 -> offset + 1]
2258     //  [REX_B]
2259     //    f:   f7 f9                   idiv   $div
2260     // 0000000000000011 <done>:
2261 
2262     // cmp    $0x80000000,%eax
2263     emit_opcode(cbuf, 0x3d);
2264     emit_d8(cbuf, 0x00);
2265     emit_d8(cbuf, 0x00);
2266     emit_d8(cbuf, 0x00);
2267     emit_d8(cbuf, 0x80);
2268 
2269     // jne    e <normal>
2270     emit_opcode(cbuf, 0x75);
2271     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2272 
2273     // xor    %edx,%edx
2274     emit_opcode(cbuf, 0x33);
2275     emit_d8(cbuf, 0xD2);
2276 
2277     // cmp    $0xffffffffffffffff,%ecx
2278     if ($div$$reg >= 8) {
2279       emit_opcode(cbuf, Assembler::REX_B);
2280     }
2281     emit_opcode(cbuf, 0x83);
2282     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2283     emit_d8(cbuf, 0xFF);
2284 
2285     // je     11 <done>
2286     emit_opcode(cbuf, 0x74);
2287     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2288 
2289     // <normal>
2290     // cltd
2291     emit_opcode(cbuf, 0x99);
2292 
2293     // idivl (note: must be emitted by the user of this rule)
2294     // <done>
2295   %}
2296 
2297   enc_class cdqq_enc(no_rax_rdx_RegL div)
2298   %{
2299     // Full implementation of Java ldiv and lrem; checks for
2300     // special case as described in JVM spec., p.243 & p.271.
2301     //
2302     //         normal case                           special case
2303     //
2304     // input : rax: dividend                         min_long
2305     //         reg: divisor                          -1
2306     //
2307     // output: rax: quotient  (= rax idiv reg)       min_long
2308     //         rdx: remainder (= rax irem reg)       0
2309     //
2310     //  Code sequnce:
2311     //
2312     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2313     //    7:   00 00 80
2314     //    a:   48 39 d0                cmp    %rdx,%rax
2315     //    d:   75 08                   jne    17 <normal>
2316     //    f:   33 d2                   xor    %edx,%edx
2317     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2318     //   15:   74 05                   je     1c <done>
2319     // 0000000000000017 <normal>:
2320     //   17:   48 99                   cqto
2321     //   19:   48 f7 f9                idiv   $div
2322     // 000000000000001c <done>:
2323 
2324     // mov    $0x8000000000000000,%rdx
2325     emit_opcode(cbuf, Assembler::REX_W);
2326     emit_opcode(cbuf, 0xBA);
2327     emit_d8(cbuf, 0x00);
2328     emit_d8(cbuf, 0x00);
2329     emit_d8(cbuf, 0x00);
2330     emit_d8(cbuf, 0x00);
2331     emit_d8(cbuf, 0x00);
2332     emit_d8(cbuf, 0x00);
2333     emit_d8(cbuf, 0x00);
2334     emit_d8(cbuf, 0x80);
2335 
2336     // cmp    %rdx,%rax
2337     emit_opcode(cbuf, Assembler::REX_W);
2338     emit_opcode(cbuf, 0x39);
2339     emit_d8(cbuf, 0xD0);
2340 
2341     // jne    17 <normal>
2342     emit_opcode(cbuf, 0x75);
2343     emit_d8(cbuf, 0x08);
2344 
2345     // xor    %edx,%edx
2346     emit_opcode(cbuf, 0x33);
2347     emit_d8(cbuf, 0xD2);
2348 
2349     // cmp    $0xffffffffffffffff,$div
2350     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2351     emit_opcode(cbuf, 0x83);
2352     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2353     emit_d8(cbuf, 0xFF);
2354 
2355     // je     1e <done>
2356     emit_opcode(cbuf, 0x74);
2357     emit_d8(cbuf, 0x05);
2358 
2359     // <normal>
2360     // cqto
2361     emit_opcode(cbuf, Assembler::REX_W);
2362     emit_opcode(cbuf, 0x99);
2363 
2364     // idivq (note: must be emitted by the user of this rule)
2365     // <done>
2366   %}
2367 
2368   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2369   enc_class OpcSE(immI imm)
2370   %{
2371     // Emit primary opcode and set sign-extend bit
2372     // Check for 8-bit immediate, and set sign extend bit in opcode
2373     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2374       emit_opcode(cbuf, $primary | 0x02);
2375     } else {
2376       // 32-bit immediate
2377       emit_opcode(cbuf, $primary);
2378     }
2379   %}
2380 
2381   enc_class OpcSErm(rRegI dst, immI imm)
2382   %{
2383     // OpcSEr/m
2384     int dstenc = $dst$$reg;
2385     if (dstenc >= 8) {
2386       emit_opcode(cbuf, Assembler::REX_B);
2387       dstenc -= 8;
2388     }
2389     // Emit primary opcode and set sign-extend bit
2390     // Check for 8-bit immediate, and set sign extend bit in opcode
2391     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2392       emit_opcode(cbuf, $primary | 0x02);
2393     } else {
2394       // 32-bit immediate
2395       emit_opcode(cbuf, $primary);
2396     }
2397     // Emit r/m byte with secondary opcode, after primary opcode.
2398     emit_rm(cbuf, 0x3, $secondary, dstenc);
2399   %}
2400 
2401   enc_class OpcSErm_wide(rRegL dst, immI imm)
2402   %{
2403     // OpcSEr/m
2404     int dstenc = $dst$$reg;
2405     if (dstenc < 8) {
2406       emit_opcode(cbuf, Assembler::REX_W);
2407     } else {
2408       emit_opcode(cbuf, Assembler::REX_WB);
2409       dstenc -= 8;
2410     }
2411     // Emit primary opcode and set sign-extend bit
2412     // Check for 8-bit immediate, and set sign extend bit in opcode
2413     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2414       emit_opcode(cbuf, $primary | 0x02);
2415     } else {
2416       // 32-bit immediate
2417       emit_opcode(cbuf, $primary);
2418     }
2419     // Emit r/m byte with secondary opcode, after primary opcode.
2420     emit_rm(cbuf, 0x3, $secondary, dstenc);
2421   %}
2422 
2423   enc_class Con8or32(immI imm)
2424   %{
2425     // Check for 8-bit immediate, and set sign extend bit in opcode
2426     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2427       $$$emit8$imm$$constant;
2428     } else {
2429       // 32-bit immediate
2430       $$$emit32$imm$$constant;
2431     }
2432   %}
2433 
2434   enc_class opc2_reg(rRegI dst)
2435   %{
2436     // BSWAP
2437     emit_cc(cbuf, $secondary, $dst$$reg);
2438   %}
2439 
2440   enc_class opc3_reg(rRegI dst)
2441   %{
2442     // BSWAP
2443     emit_cc(cbuf, $tertiary, $dst$$reg);
2444   %}
2445 
2446   enc_class reg_opc(rRegI div)
2447   %{
2448     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2449     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2450   %}
2451 
2452   enc_class enc_cmov(cmpOp cop)
2453   %{
2454     // CMOV
2455     $$$emit8$primary;
2456     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2457   %}
2458 
2459   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2460   %{
2461     // Invert sense of branch from sense of cmov
2462     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2463     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2464                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2465                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2466     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2467     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2468     if ($dst$$reg < 8) {
2469       if ($src$$reg >= 8) {
2470         emit_opcode(cbuf, Assembler::REX_B);
2471       }
2472     } else {
2473       if ($src$$reg < 8) {
2474         emit_opcode(cbuf, Assembler::REX_R);
2475       } else {
2476         emit_opcode(cbuf, Assembler::REX_RB);
2477       }
2478     }
2479     emit_opcode(cbuf, 0x0F);
2480     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2481     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2482   %}
2483 
2484   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2485   %{
2486     // Invert sense of branch from sense of cmov
2487     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2488     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2489 
2490     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2491     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2492     if ($dst$$reg < 8) {
2493       if ($src$$reg >= 8) {
2494         emit_opcode(cbuf, Assembler::REX_B);
2495       }
2496     } else {
2497       if ($src$$reg < 8) {
2498         emit_opcode(cbuf, Assembler::REX_R);
2499       } else {
2500         emit_opcode(cbuf, Assembler::REX_RB);
2501       }
2502     }
2503     emit_opcode(cbuf, 0x0F);
2504     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2505     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2506   %}
2507 
2508   enc_class enc_PartialSubtypeCheck()
2509   %{
2510     Register Rrdi = as_Register(RDI_enc); // result register
2511     Register Rrax = as_Register(RAX_enc); // super class
2512     Register Rrcx = as_Register(RCX_enc); // killed
2513     Register Rrsi = as_Register(RSI_enc); // sub class
2514     Label miss;
2515     const bool set_cond_codes = true;
2516 
2517     MacroAssembler _masm(&cbuf);
2518     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2519                                      NULL, &miss,
2520                                      /*set_cond_codes:*/ true);
2521     if ($primary) {
2522       __ xorptr(Rrdi, Rrdi);
2523     }
2524     __ bind(miss);
2525   %}
2526 
2527   enc_class Java_To_Interpreter(method meth)
2528   %{
2529     // CALL Java_To_Interpreter
2530     // This is the instruction starting address for relocation info.
2531     cbuf.set_insts_mark();
2532     $$$emit8$primary;
2533     // CALL directly to the runtime
2534     emit_d32_reloc(cbuf,
2535                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2536                    runtime_call_Relocation::spec(),
2537                    RELOC_DISP32);
2538   %}
2539 
2540   enc_class preserve_SP %{
2541     debug_only(int off0 = cbuf.insts_size());
2542     MacroAssembler _masm(&cbuf);
2543     // RBP is preserved across all calls, even compiled calls.
2544     // Use it to preserve RSP in places where the callee might change the SP.
2545     __ movptr(rbp_mh_SP_save, rsp);
2546     debug_only(int off1 = cbuf.insts_size());
2547     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2548   %}
2549 
2550   enc_class restore_SP %{
2551     MacroAssembler _masm(&cbuf);
2552     __ movptr(rsp, rbp_mh_SP_save);
2553   %}
2554 
2555   enc_class Java_Static_Call(method meth)
2556   %{
2557     // JAVA STATIC CALL
2558     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2559     // determine who we intended to call.
2560     cbuf.set_insts_mark();
2561     $$$emit8$primary;
2562 
2563     if (!_method) {
2564       emit_d32_reloc(cbuf,
2565                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2566                      runtime_call_Relocation::spec(),
2567                      RELOC_DISP32);
2568     } else if (_optimized_virtual) {
2569       emit_d32_reloc(cbuf,
2570                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2571                      opt_virtual_call_Relocation::spec(),
2572                      RELOC_DISP32);
2573     } else {
2574       emit_d32_reloc(cbuf,
2575                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2576                      static_call_Relocation::spec(),
2577                      RELOC_DISP32);
2578     }
2579     if (_method) {
2580       // Emit stub for static call
2581       emit_java_to_interp(cbuf);
2582     }
2583   %}
2584 
2585   enc_class Java_Dynamic_Call(method meth)
2586   %{
2587     // JAVA DYNAMIC CALL
2588     // !!!!!
2589     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2590     // emit_call_dynamic_prologue( cbuf );
2591     cbuf.set_insts_mark();
2592 
2593     // movq rax, -1
2594     emit_opcode(cbuf, Assembler::REX_W);
2595     emit_opcode(cbuf, 0xB8 | RAX_enc);
2596     emit_d64_reloc(cbuf,
2597                    (int64_t) Universe::non_oop_word(),
2598                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2599     address virtual_call_oop_addr = cbuf.insts_mark();
2600     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2601     // who we intended to call.
2602     cbuf.set_insts_mark();
2603     $$$emit8$primary;
2604     emit_d32_reloc(cbuf,
2605                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2606                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2607                    RELOC_DISP32);
2608   %}
2609 
2610   enc_class Java_Compiled_Call(method meth)
2611   %{
2612     // JAVA COMPILED CALL
2613     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2614 
2615     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2616     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2617 
2618     // callq *disp(%rax)
2619     cbuf.set_insts_mark();
2620     $$$emit8$primary;
2621     if (disp < 0x80) {
2622       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2623       emit_d8(cbuf, disp); // Displacement
2624     } else {
2625       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2626       emit_d32(cbuf, disp); // Displacement
2627     }
2628   %}
2629 
2630   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2631   %{
2632     // SAL, SAR, SHR
2633     int dstenc = $dst$$reg;
2634     if (dstenc >= 8) {
2635       emit_opcode(cbuf, Assembler::REX_B);
2636       dstenc -= 8;
2637     }
2638     $$$emit8$primary;
2639     emit_rm(cbuf, 0x3, $secondary, dstenc);
2640     $$$emit8$shift$$constant;
2641   %}
2642 
2643   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2644   %{
2645     // SAL, SAR, SHR
2646     int dstenc = $dst$$reg;
2647     if (dstenc < 8) {
2648       emit_opcode(cbuf, Assembler::REX_W);
2649     } else {
2650       emit_opcode(cbuf, Assembler::REX_WB);
2651       dstenc -= 8;
2652     }
2653     $$$emit8$primary;
2654     emit_rm(cbuf, 0x3, $secondary, dstenc);
2655     $$$emit8$shift$$constant;
2656   %}
2657 
2658   enc_class load_immI(rRegI dst, immI src)
2659   %{
2660     int dstenc = $dst$$reg;
2661     if (dstenc >= 8) {
2662       emit_opcode(cbuf, Assembler::REX_B);
2663       dstenc -= 8;
2664     }
2665     emit_opcode(cbuf, 0xB8 | dstenc);
2666     $$$emit32$src$$constant;
2667   %}
2668 
2669   enc_class load_immL(rRegL dst, immL src)
2670   %{
2671     int dstenc = $dst$$reg;
2672     if (dstenc < 8) {
2673       emit_opcode(cbuf, Assembler::REX_W);
2674     } else {
2675       emit_opcode(cbuf, Assembler::REX_WB);
2676       dstenc -= 8;
2677     }
2678     emit_opcode(cbuf, 0xB8 | dstenc);
2679     emit_d64(cbuf, $src$$constant);
2680   %}
2681 
2682   enc_class load_immUL32(rRegL dst, immUL32 src)
2683   %{
2684     // same as load_immI, but this time we care about zeroes in the high word
2685     int dstenc = $dst$$reg;
2686     if (dstenc >= 8) {
2687       emit_opcode(cbuf, Assembler::REX_B);
2688       dstenc -= 8;
2689     }
2690     emit_opcode(cbuf, 0xB8 | dstenc);
2691     $$$emit32$src$$constant;
2692   %}
2693 
2694   enc_class load_immL32(rRegL dst, immL32 src)
2695   %{
2696     int dstenc = $dst$$reg;
2697     if (dstenc < 8) {
2698       emit_opcode(cbuf, Assembler::REX_W);
2699     } else {
2700       emit_opcode(cbuf, Assembler::REX_WB);
2701       dstenc -= 8;
2702     }
2703     emit_opcode(cbuf, 0xC7);
2704     emit_rm(cbuf, 0x03, 0x00, dstenc);
2705     $$$emit32$src$$constant;
2706   %}
2707 
2708   enc_class load_immP31(rRegP dst, immP32 src)
2709   %{
2710     // same as load_immI, but this time we care about zeroes in the high word
2711     int dstenc = $dst$$reg;
2712     if (dstenc >= 8) {
2713       emit_opcode(cbuf, Assembler::REX_B);
2714       dstenc -= 8;
2715     }
2716     emit_opcode(cbuf, 0xB8 | dstenc);
2717     $$$emit32$src$$constant;
2718   %}
2719 
2720   enc_class load_immP(rRegP dst, immP src)
2721   %{
2722     int dstenc = $dst$$reg;
2723     if (dstenc < 8) {
2724       emit_opcode(cbuf, Assembler::REX_W);
2725     } else {
2726       emit_opcode(cbuf, Assembler::REX_WB);
2727       dstenc -= 8;
2728     }
2729     emit_opcode(cbuf, 0xB8 | dstenc);
2730     // This next line should be generated from ADLC
2731     if ($src->constant_is_oop()) {
2732       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2733     } else {
2734       emit_d64(cbuf, $src$$constant);
2735     }
2736   %}
2737 
2738   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2739   enc_class enc_copy(rRegI dst, rRegI src)
2740   %{
2741     encode_copy(cbuf, $dst$$reg, $src$$reg);
2742   %}
2743 
2744   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2745   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2746     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2747   %}
2748 
2749   enc_class enc_copy_always(rRegI dst, rRegI src)
2750   %{
2751     int srcenc = $src$$reg;
2752     int dstenc = $dst$$reg;
2753 
2754     if (dstenc < 8) {
2755       if (srcenc >= 8) {
2756         emit_opcode(cbuf, Assembler::REX_B);
2757         srcenc -= 8;
2758       }
2759     } else {
2760       if (srcenc < 8) {
2761         emit_opcode(cbuf, Assembler::REX_R);
2762       } else {
2763         emit_opcode(cbuf, Assembler::REX_RB);
2764         srcenc -= 8;
2765       }
2766       dstenc -= 8;
2767     }
2768 
2769     emit_opcode(cbuf, 0x8B);
2770     emit_rm(cbuf, 0x3, dstenc, srcenc);
2771   %}
2772 
2773   enc_class enc_copy_wide(rRegL dst, rRegL src)
2774   %{
2775     int srcenc = $src$$reg;
2776     int dstenc = $dst$$reg;
2777 
2778     if (dstenc != srcenc) {
2779       if (dstenc < 8) {
2780         if (srcenc < 8) {
2781           emit_opcode(cbuf, Assembler::REX_W);
2782         } else {
2783           emit_opcode(cbuf, Assembler::REX_WB);
2784           srcenc -= 8;
2785         }
2786       } else {
2787         if (srcenc < 8) {
2788           emit_opcode(cbuf, Assembler::REX_WR);
2789         } else {
2790           emit_opcode(cbuf, Assembler::REX_WRB);
2791           srcenc -= 8;
2792         }
2793         dstenc -= 8;
2794       }
2795       emit_opcode(cbuf, 0x8B);
2796       emit_rm(cbuf, 0x3, dstenc, srcenc);
2797     }
2798   %}
2799 
2800   enc_class Con32(immI src)
2801   %{
2802     // Output immediate
2803     $$$emit32$src$$constant;
2804   %}
2805 
2806   enc_class Con64(immL src)
2807   %{
2808     // Output immediate
2809     emit_d64($src$$constant);
2810   %}
2811 
2812   enc_class Con32F_as_bits(immF src)
2813   %{
2814     // Output Float immediate bits
2815     jfloat jf = $src$$constant;
2816     jint jf_as_bits = jint_cast(jf);
2817     emit_d32(cbuf, jf_as_bits);
2818   %}
2819 
2820   enc_class Con16(immI src)
2821   %{
2822     // Output immediate
2823     $$$emit16$src$$constant;
2824   %}
2825 
2826   // How is this different from Con32??? XXX
2827   enc_class Con_d32(immI src)
2828   %{
2829     emit_d32(cbuf,$src$$constant);
2830   %}
2831 
2832   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2833     // Output immediate memory reference
2834     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2835     emit_d32(cbuf, 0x00);
2836   %}
2837 
2838   enc_class lock_prefix()
2839   %{
2840     if (os::is_MP()) {
2841       emit_opcode(cbuf, 0xF0); // lock
2842     }
2843   %}
2844 
2845   enc_class REX_mem(memory mem)
2846   %{
2847     if ($mem$$base >= 8) {
2848       if ($mem$$index < 8) {
2849         emit_opcode(cbuf, Assembler::REX_B);
2850       } else {
2851         emit_opcode(cbuf, Assembler::REX_XB);
2852       }
2853     } else {
2854       if ($mem$$index >= 8) {
2855         emit_opcode(cbuf, Assembler::REX_X);
2856       }
2857     }
2858   %}
2859 
2860   enc_class REX_mem_wide(memory mem)
2861   %{
2862     if ($mem$$base >= 8) {
2863       if ($mem$$index < 8) {
2864         emit_opcode(cbuf, Assembler::REX_WB);
2865       } else {
2866         emit_opcode(cbuf, Assembler::REX_WXB);
2867       }
2868     } else {
2869       if ($mem$$index < 8) {
2870         emit_opcode(cbuf, Assembler::REX_W);
2871       } else {
2872         emit_opcode(cbuf, Assembler::REX_WX);
2873       }
2874     }
2875   %}
2876 
2877   // for byte regs
2878   enc_class REX_breg(rRegI reg)
2879   %{
2880     if ($reg$$reg >= 4) {
2881       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2882     }
2883   %}
2884 
2885   // for byte regs
2886   enc_class REX_reg_breg(rRegI dst, rRegI src)
2887   %{
2888     if ($dst$$reg < 8) {
2889       if ($src$$reg >= 4) {
2890         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2891       }
2892     } else {
2893       if ($src$$reg < 8) {
2894         emit_opcode(cbuf, Assembler::REX_R);
2895       } else {
2896         emit_opcode(cbuf, Assembler::REX_RB);
2897       }
2898     }
2899   %}
2900 
2901   // for byte regs
2902   enc_class REX_breg_mem(rRegI reg, memory mem)
2903   %{
2904     if ($reg$$reg < 8) {
2905       if ($mem$$base < 8) {
2906         if ($mem$$index >= 8) {
2907           emit_opcode(cbuf, Assembler::REX_X);
2908         } else if ($reg$$reg >= 4) {
2909           emit_opcode(cbuf, Assembler::REX);
2910         }
2911       } else {
2912         if ($mem$$index < 8) {
2913           emit_opcode(cbuf, Assembler::REX_B);
2914         } else {
2915           emit_opcode(cbuf, Assembler::REX_XB);
2916         }
2917       }
2918     } else {
2919       if ($mem$$base < 8) {
2920         if ($mem$$index < 8) {
2921           emit_opcode(cbuf, Assembler::REX_R);
2922         } else {
2923           emit_opcode(cbuf, Assembler::REX_RX);
2924         }
2925       } else {
2926         if ($mem$$index < 8) {
2927           emit_opcode(cbuf, Assembler::REX_RB);
2928         } else {
2929           emit_opcode(cbuf, Assembler::REX_RXB);
2930         }
2931       }
2932     }
2933   %}
2934 
2935   enc_class REX_reg(rRegI reg)
2936   %{
2937     if ($reg$$reg >= 8) {
2938       emit_opcode(cbuf, Assembler::REX_B);
2939     }
2940   %}
2941 
2942   enc_class REX_reg_wide(rRegI reg)
2943   %{
2944     if ($reg$$reg < 8) {
2945       emit_opcode(cbuf, Assembler::REX_W);
2946     } else {
2947       emit_opcode(cbuf, Assembler::REX_WB);
2948     }
2949   %}
2950 
2951   enc_class REX_reg_reg(rRegI dst, rRegI src)
2952   %{
2953     if ($dst$$reg < 8) {
2954       if ($src$$reg >= 8) {
2955         emit_opcode(cbuf, Assembler::REX_B);
2956       }
2957     } else {
2958       if ($src$$reg < 8) {
2959         emit_opcode(cbuf, Assembler::REX_R);
2960       } else {
2961         emit_opcode(cbuf, Assembler::REX_RB);
2962       }
2963     }
2964   %}
2965 
2966   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2967   %{
2968     if ($dst$$reg < 8) {
2969       if ($src$$reg < 8) {
2970         emit_opcode(cbuf, Assembler::REX_W);
2971       } else {
2972         emit_opcode(cbuf, Assembler::REX_WB);
2973       }
2974     } else {
2975       if ($src$$reg < 8) {
2976         emit_opcode(cbuf, Assembler::REX_WR);
2977       } else {
2978         emit_opcode(cbuf, Assembler::REX_WRB);
2979       }
2980     }
2981   %}
2982 
2983   enc_class REX_reg_mem(rRegI reg, memory mem)
2984   %{
2985     if ($reg$$reg < 8) {
2986       if ($mem$$base < 8) {
2987         if ($mem$$index >= 8) {
2988           emit_opcode(cbuf, Assembler::REX_X);
2989         }
2990       } else {
2991         if ($mem$$index < 8) {
2992           emit_opcode(cbuf, Assembler::REX_B);
2993         } else {
2994           emit_opcode(cbuf, Assembler::REX_XB);
2995         }
2996       }
2997     } else {
2998       if ($mem$$base < 8) {
2999         if ($mem$$index < 8) {
3000           emit_opcode(cbuf, Assembler::REX_R);
3001         } else {
3002           emit_opcode(cbuf, Assembler::REX_RX);
3003         }
3004       } else {
3005         if ($mem$$index < 8) {
3006           emit_opcode(cbuf, Assembler::REX_RB);
3007         } else {
3008           emit_opcode(cbuf, Assembler::REX_RXB);
3009         }
3010       }
3011     }
3012   %}
3013 
3014   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3015   %{
3016     if ($reg$$reg < 8) {
3017       if ($mem$$base < 8) {
3018         if ($mem$$index < 8) {
3019           emit_opcode(cbuf, Assembler::REX_W);
3020         } else {
3021           emit_opcode(cbuf, Assembler::REX_WX);
3022         }
3023       } else {
3024         if ($mem$$index < 8) {
3025           emit_opcode(cbuf, Assembler::REX_WB);
3026         } else {
3027           emit_opcode(cbuf, Assembler::REX_WXB);
3028         }
3029       }
3030     } else {
3031       if ($mem$$base < 8) {
3032         if ($mem$$index < 8) {
3033           emit_opcode(cbuf, Assembler::REX_WR);
3034         } else {
3035           emit_opcode(cbuf, Assembler::REX_WRX);
3036         }
3037       } else {
3038         if ($mem$$index < 8) {
3039           emit_opcode(cbuf, Assembler::REX_WRB);
3040         } else {
3041           emit_opcode(cbuf, Assembler::REX_WRXB);
3042         }
3043       }
3044     }
3045   %}
3046 
3047   enc_class reg_mem(rRegI ereg, memory mem)
3048   %{
3049     // High registers handle in encode_RegMem
3050     int reg = $ereg$$reg;
3051     int base = $mem$$base;
3052     int index = $mem$$index;
3053     int scale = $mem$$scale;
3054     int disp = $mem$$disp;
3055     bool disp_is_oop = $mem->disp_is_oop();
3056 
3057     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3058   %}
3059 
3060   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3061   %{
3062     int rm_byte_opcode = $rm_opcode$$constant;
3063 
3064     // High registers handle in encode_RegMem
3065     int base = $mem$$base;
3066     int index = $mem$$index;
3067     int scale = $mem$$scale;
3068     int displace = $mem$$disp;
3069 
3070     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3071                                             // working with static
3072                                             // globals
3073     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3074                   disp_is_oop);
3075   %}
3076 
3077   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3078   %{
3079     int reg_encoding = $dst$$reg;
3080     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3081     int index        = 0x04;            // 0x04 indicates no index
3082     int scale        = 0x00;            // 0x00 indicates no scale
3083     int displace     = $src1$$constant; // 0x00 indicates no displacement
3084     bool disp_is_oop = false;
3085     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3086                   disp_is_oop);
3087   %}
3088 
3089   enc_class neg_reg(rRegI dst)
3090   %{
3091     int dstenc = $dst$$reg;
3092     if (dstenc >= 8) {
3093       emit_opcode(cbuf, Assembler::REX_B);
3094       dstenc -= 8;
3095     }
3096     // NEG $dst
3097     emit_opcode(cbuf, 0xF7);
3098     emit_rm(cbuf, 0x3, 0x03, dstenc);
3099   %}
3100 
3101   enc_class neg_reg_wide(rRegI dst)
3102   %{
3103     int dstenc = $dst$$reg;
3104     if (dstenc < 8) {
3105       emit_opcode(cbuf, Assembler::REX_W);
3106     } else {
3107       emit_opcode(cbuf, Assembler::REX_WB);
3108       dstenc -= 8;
3109     }
3110     // NEG $dst
3111     emit_opcode(cbuf, 0xF7);
3112     emit_rm(cbuf, 0x3, 0x03, dstenc);
3113   %}
3114 
3115   enc_class setLT_reg(rRegI dst)
3116   %{
3117     int dstenc = $dst$$reg;
3118     if (dstenc >= 8) {
3119       emit_opcode(cbuf, Assembler::REX_B);
3120       dstenc -= 8;
3121     } else if (dstenc >= 4) {
3122       emit_opcode(cbuf, Assembler::REX);
3123     }
3124     // SETLT $dst
3125     emit_opcode(cbuf, 0x0F);
3126     emit_opcode(cbuf, 0x9C);
3127     emit_rm(cbuf, 0x3, 0x0, dstenc);
3128   %}
3129 
3130   enc_class setNZ_reg(rRegI dst)
3131   %{
3132     int dstenc = $dst$$reg;
3133     if (dstenc >= 8) {
3134       emit_opcode(cbuf, Assembler::REX_B);
3135       dstenc -= 8;
3136     } else if (dstenc >= 4) {
3137       emit_opcode(cbuf, Assembler::REX);
3138     }
3139     // SETNZ $dst
3140     emit_opcode(cbuf, 0x0F);
3141     emit_opcode(cbuf, 0x95);
3142     emit_rm(cbuf, 0x3, 0x0, dstenc);
3143   %}
3144 
3145 
3146   // Compare the lonogs and set -1, 0, or 1 into dst
3147   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3148   %{
3149     int src1enc = $src1$$reg;
3150     int src2enc = $src2$$reg;
3151     int dstenc = $dst$$reg;
3152 
3153     // cmpq $src1, $src2
3154     if (src1enc < 8) {
3155       if (src2enc < 8) {
3156         emit_opcode(cbuf, Assembler::REX_W);
3157       } else {
3158         emit_opcode(cbuf, Assembler::REX_WB);
3159       }
3160     } else {
3161       if (src2enc < 8) {
3162         emit_opcode(cbuf, Assembler::REX_WR);
3163       } else {
3164         emit_opcode(cbuf, Assembler::REX_WRB);
3165       }
3166     }
3167     emit_opcode(cbuf, 0x3B);
3168     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3169 
3170     // movl $dst, -1
3171     if (dstenc >= 8) {
3172       emit_opcode(cbuf, Assembler::REX_B);
3173     }
3174     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3175     emit_d32(cbuf, -1);
3176 
3177     // jl,s done
3178     emit_opcode(cbuf, 0x7C);
3179     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3180 
3181     // setne $dst
3182     if (dstenc >= 4) {
3183       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3184     }
3185     emit_opcode(cbuf, 0x0F);
3186     emit_opcode(cbuf, 0x95);
3187     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3188 
3189     // movzbl $dst, $dst
3190     if (dstenc >= 4) {
3191       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3192     }
3193     emit_opcode(cbuf, 0x0F);
3194     emit_opcode(cbuf, 0xB6);
3195     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3196   %}
3197 
3198   enc_class Push_ResultXD(regD dst) %{
3199     int dstenc = $dst$$reg;
3200 
3201     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3202 
3203     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3204     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3205     if (dstenc >= 8) {
3206       emit_opcode(cbuf, Assembler::REX_R);
3207     }
3208     emit_opcode  (cbuf, 0x0F );
3209     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3210     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3211 
3212     // add rsp,8
3213     emit_opcode(cbuf, Assembler::REX_W);
3214     emit_opcode(cbuf,0x83);
3215     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3216     emit_d8(cbuf,0x08);
3217   %}
3218 
3219   enc_class Push_SrcXD(regD src) %{
3220     int srcenc = $src$$reg;
3221 
3222     // subq rsp,#8
3223     emit_opcode(cbuf, Assembler::REX_W);
3224     emit_opcode(cbuf, 0x83);
3225     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3226     emit_d8(cbuf, 0x8);
3227 
3228     // movsd [rsp],src
3229     emit_opcode(cbuf, 0xF2);
3230     if (srcenc >= 8) {
3231       emit_opcode(cbuf, Assembler::REX_R);
3232     }
3233     emit_opcode(cbuf, 0x0F);
3234     emit_opcode(cbuf, 0x11);
3235     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3236 
3237     // fldd [rsp]
3238     emit_opcode(cbuf, 0x66);
3239     emit_opcode(cbuf, 0xDD);
3240     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3241   %}
3242 
3243 
3244   enc_class movq_ld(regD dst, memory mem) %{
3245     MacroAssembler _masm(&cbuf);
3246     __ movq($dst$$XMMRegister, $mem$$Address);
3247   %}
3248 
3249   enc_class movq_st(memory mem, regD src) %{
3250     MacroAssembler _masm(&cbuf);
3251     __ movq($mem$$Address, $src$$XMMRegister);
3252   %}
3253 
3254   enc_class pshufd_8x8(regF dst, regF src) %{
3255     MacroAssembler _masm(&cbuf);
3256 
3257     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3258     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3259     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3260   %}
3261 
3262   enc_class pshufd_4x16(regF dst, regF src) %{
3263     MacroAssembler _masm(&cbuf);
3264 
3265     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3266   %}
3267 
3268   enc_class pshufd(regD dst, regD src, int mode) %{
3269     MacroAssembler _masm(&cbuf);
3270 
3271     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3272   %}
3273 
3274   enc_class pxor(regD dst, regD src) %{
3275     MacroAssembler _masm(&cbuf);
3276 
3277     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3278   %}
3279 
3280   enc_class mov_i2x(regD dst, rRegI src) %{
3281     MacroAssembler _masm(&cbuf);
3282 
3283     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3284   %}
3285 
3286   // obj: object to lock
3287   // box: box address (header location) -- killed
3288   // tmp: rax -- killed
3289   // scr: rbx -- killed
3290   //
3291   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3292   // from i486.ad.  See that file for comments.
3293   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3294   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3295 
3296 
3297   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3298   %{
3299     Register objReg = as_Register((int)$obj$$reg);
3300     Register boxReg = as_Register((int)$box$$reg);
3301     Register tmpReg = as_Register($tmp$$reg);
3302     Register scrReg = as_Register($scr$$reg);
3303     MacroAssembler masm(&cbuf);
3304 
3305     // Verify uniqueness of register assignments -- necessary but not sufficient
3306     assert (objReg != boxReg && objReg != tmpReg &&
3307             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3308 
3309     if (_counters != NULL) {
3310       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3311     }
3312     if (EmitSync & 1) {
3313         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3314         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3315         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3316     } else
3317     if (EmitSync & 2) {
3318         Label DONE_LABEL;
3319         if (UseBiasedLocking) {
3320            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3321           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3322         }
3323         // QQQ was movl...
3324         masm.movptr(tmpReg, 0x1);
3325         masm.orptr(tmpReg, Address(objReg, 0));
3326         masm.movptr(Address(boxReg, 0), tmpReg);
3327         if (os::is_MP()) {
3328           masm.lock();
3329         }
3330         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3331         masm.jcc(Assembler::equal, DONE_LABEL);
3332 
3333         // Recursive locking
3334         masm.subptr(tmpReg, rsp);
3335         masm.andptr(tmpReg, 7 - os::vm_page_size());
3336         masm.movptr(Address(boxReg, 0), tmpReg);
3337 
3338         masm.bind(DONE_LABEL);
3339         masm.nop(); // avoid branch to branch
3340     } else {
3341         Label DONE_LABEL, IsInflated, Egress;
3342 
3343         masm.movptr(tmpReg, Address(objReg, 0)) ;
3344         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3345         masm.jcc   (Assembler::notZero, IsInflated) ;
3346 
3347         // it's stack-locked, biased or neutral
3348         // TODO: optimize markword triage order to reduce the number of
3349         // conditional branches in the most common cases.
3350         // Beware -- there's a subtle invariant that fetch of the markword
3351         // at [FETCH], below, will never observe a biased encoding (*101b).
3352         // If this invariant is not held we'll suffer exclusion (safety) failure.
3353 
3354         if (UseBiasedLocking && !UseOptoBiasInlining) {
3355           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3356           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3357         }
3358 
3359         // was q will it destroy high?
3360         masm.orl   (tmpReg, 1) ;
3361         masm.movptr(Address(boxReg, 0), tmpReg) ;
3362         if (os::is_MP()) { masm.lock(); }
3363         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3364         if (_counters != NULL) {
3365            masm.cond_inc32(Assembler::equal,
3366                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3367         }
3368         masm.jcc   (Assembler::equal, DONE_LABEL);
3369 
3370         // Recursive locking
3371         masm.subptr(tmpReg, rsp);
3372         masm.andptr(tmpReg, 7 - os::vm_page_size());
3373         masm.movptr(Address(boxReg, 0), tmpReg);
3374         if (_counters != NULL) {
3375            masm.cond_inc32(Assembler::equal,
3376                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3377         }
3378         masm.jmp   (DONE_LABEL) ;
3379 
3380         masm.bind  (IsInflated) ;
3381         // It's inflated
3382 
3383         // TODO: someday avoid the ST-before-CAS penalty by
3384         // relocating (deferring) the following ST.
3385         // We should also think about trying a CAS without having
3386         // fetched _owner.  If the CAS is successful we may
3387         // avoid an RTO->RTS upgrade on the $line.
3388         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3389         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3390 
3391         masm.mov    (boxReg, tmpReg) ;
3392         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3393         masm.testptr(tmpReg, tmpReg) ;
3394         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3395 
3396         // It's inflated and appears unlocked
3397         if (os::is_MP()) { masm.lock(); }
3398         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3399         // Intentional fall-through into DONE_LABEL ...
3400 
3401         masm.bind  (DONE_LABEL) ;
3402         masm.nop   () ;                 // avoid jmp to jmp
3403     }
3404   %}
3405 
3406   // obj: object to unlock
3407   // box: box address (displaced header location), killed
3408   // RBX: killed tmp; cannot be obj nor box
3409   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3410   %{
3411 
3412     Register objReg = as_Register($obj$$reg);
3413     Register boxReg = as_Register($box$$reg);
3414     Register tmpReg = as_Register($tmp$$reg);
3415     MacroAssembler masm(&cbuf);
3416 
3417     if (EmitSync & 4) {
3418        masm.cmpptr(rsp, 0) ;
3419     } else
3420     if (EmitSync & 8) {
3421        Label DONE_LABEL;
3422        if (UseBiasedLocking) {
3423          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3424        }
3425 
3426        // Check whether the displaced header is 0
3427        //(=> recursive unlock)
3428        masm.movptr(tmpReg, Address(boxReg, 0));
3429        masm.testptr(tmpReg, tmpReg);
3430        masm.jcc(Assembler::zero, DONE_LABEL);
3431 
3432        // If not recursive lock, reset the header to displaced header
3433        if (os::is_MP()) {
3434          masm.lock();
3435        }
3436        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3437        masm.bind(DONE_LABEL);
3438        masm.nop(); // avoid branch to branch
3439     } else {
3440        Label DONE_LABEL, Stacked, CheckSucc ;
3441 
3442        if (UseBiasedLocking && !UseOptoBiasInlining) {
3443          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3444        }
3445 
3446        masm.movptr(tmpReg, Address(objReg, 0)) ;
3447        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3448        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3449        masm.testl (tmpReg, 0x02) ;
3450        masm.jcc   (Assembler::zero, Stacked) ;
3451 
3452        // It's inflated
3453        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3454        masm.xorptr(boxReg, r15_thread) ;
3455        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3456        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3457        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3458        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3459        masm.jcc   (Assembler::notZero, CheckSucc) ;
3460        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3461        masm.jmp   (DONE_LABEL) ;
3462 
3463        if ((EmitSync & 65536) == 0) {
3464          Label LSuccess, LGoSlowPath ;
3465          masm.bind  (CheckSucc) ;
3466          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3467          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3468 
3469          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3470          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3471          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3472          // are all faster when the write buffer is populated.
3473          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3474          if (os::is_MP()) {
3475             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3476          }
3477          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3478          masm.jcc   (Assembler::notZero, LSuccess) ;
3479 
3480          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3481          if (os::is_MP()) { masm.lock(); }
3482          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3483          masm.jcc   (Assembler::notEqual, LSuccess) ;
3484          // Intentional fall-through into slow-path
3485 
3486          masm.bind  (LGoSlowPath) ;
3487          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3488          masm.jmp   (DONE_LABEL) ;
3489 
3490          masm.bind  (LSuccess) ;
3491          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3492          masm.jmp   (DONE_LABEL) ;
3493        }
3494 
3495        masm.bind  (Stacked) ;
3496        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3497        if (os::is_MP()) { masm.lock(); }
3498        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3499 
3500        if (EmitSync & 65536) {
3501           masm.bind (CheckSucc) ;
3502        }
3503        masm.bind(DONE_LABEL);
3504        if (EmitSync & 32768) {
3505           masm.nop();                      // avoid branch to branch
3506        }
3507     }
3508   %}
3509 
3510 
3511   enc_class enc_rethrow()
3512   %{
3513     cbuf.set_insts_mark();
3514     emit_opcode(cbuf, 0xE9); // jmp entry
3515     emit_d32_reloc(cbuf,
3516                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3517                    runtime_call_Relocation::spec(),
3518                    RELOC_DISP32);
3519   %}
3520 
3521   enc_class absF_encoding(regF dst)
3522   %{
3523     int dstenc = $dst$$reg;
3524     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3525 
3526     cbuf.set_insts_mark();
3527     if (dstenc >= 8) {
3528       emit_opcode(cbuf, Assembler::REX_R);
3529       dstenc -= 8;
3530     }
3531     // XXX reg_mem doesn't support RIP-relative addressing yet
3532     emit_opcode(cbuf, 0x0F);
3533     emit_opcode(cbuf, 0x54);
3534     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3535     emit_d32_reloc(cbuf, signmask_address);
3536   %}
3537 
3538   enc_class absD_encoding(regD dst)
3539   %{
3540     int dstenc = $dst$$reg;
3541     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3542 
3543     cbuf.set_insts_mark();
3544     emit_opcode(cbuf, 0x66);
3545     if (dstenc >= 8) {
3546       emit_opcode(cbuf, Assembler::REX_R);
3547       dstenc -= 8;
3548     }
3549     // XXX reg_mem doesn't support RIP-relative addressing yet
3550     emit_opcode(cbuf, 0x0F);
3551     emit_opcode(cbuf, 0x54);
3552     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3553     emit_d32_reloc(cbuf, signmask_address);
3554   %}
3555 
3556   enc_class negF_encoding(regF dst)
3557   %{
3558     int dstenc = $dst$$reg;
3559     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3560 
3561     cbuf.set_insts_mark();
3562     if (dstenc >= 8) {
3563       emit_opcode(cbuf, Assembler::REX_R);
3564       dstenc -= 8;
3565     }
3566     // XXX reg_mem doesn't support RIP-relative addressing yet
3567     emit_opcode(cbuf, 0x0F);
3568     emit_opcode(cbuf, 0x57);
3569     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3570     emit_d32_reloc(cbuf, signflip_address);
3571   %}
3572 
3573   enc_class negD_encoding(regD dst)
3574   %{
3575     int dstenc = $dst$$reg;
3576     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3577 
3578     cbuf.set_insts_mark();
3579     emit_opcode(cbuf, 0x66);
3580     if (dstenc >= 8) {
3581       emit_opcode(cbuf, Assembler::REX_R);
3582       dstenc -= 8;
3583     }
3584     // XXX reg_mem doesn't support RIP-relative addressing yet
3585     emit_opcode(cbuf, 0x0F);
3586     emit_opcode(cbuf, 0x57);
3587     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3588     emit_d32_reloc(cbuf, signflip_address);
3589   %}
3590 
3591   enc_class f2i_fixup(rRegI dst, regF src)
3592   %{
3593     int dstenc = $dst$$reg;
3594     int srcenc = $src$$reg;
3595 
3596     // cmpl $dst, #0x80000000
3597     if (dstenc >= 8) {
3598       emit_opcode(cbuf, Assembler::REX_B);
3599     }
3600     emit_opcode(cbuf, 0x81);
3601     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3602     emit_d32(cbuf, 0x80000000);
3603 
3604     // jne,s done
3605     emit_opcode(cbuf, 0x75);
3606     if (srcenc < 8 && dstenc < 8) {
3607       emit_d8(cbuf, 0xF);
3608     } else if (srcenc >= 8 && dstenc >= 8) {
3609       emit_d8(cbuf, 0x11);
3610     } else {
3611       emit_d8(cbuf, 0x10);
3612     }
3613 
3614     // subq rsp, #8
3615     emit_opcode(cbuf, Assembler::REX_W);
3616     emit_opcode(cbuf, 0x83);
3617     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3618     emit_d8(cbuf, 8);
3619 
3620     // movss [rsp], $src
3621     emit_opcode(cbuf, 0xF3);
3622     if (srcenc >= 8) {
3623       emit_opcode(cbuf, Assembler::REX_R);
3624     }
3625     emit_opcode(cbuf, 0x0F);
3626     emit_opcode(cbuf, 0x11);
3627     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3628 
3629     // call f2i_fixup
3630     cbuf.set_insts_mark();
3631     emit_opcode(cbuf, 0xE8);
3632     emit_d32_reloc(cbuf,
3633                    (int)
3634                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3635                    runtime_call_Relocation::spec(),
3636                    RELOC_DISP32);
3637 
3638     // popq $dst
3639     if (dstenc >= 8) {
3640       emit_opcode(cbuf, Assembler::REX_B);
3641     }
3642     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3643 
3644     // done:
3645   %}
3646 
3647   enc_class f2l_fixup(rRegL dst, regF src)
3648   %{
3649     int dstenc = $dst$$reg;
3650     int srcenc = $src$$reg;
3651     address const_address = (address) StubRoutines::x86::double_sign_flip();
3652 
3653     // cmpq $dst, [0x8000000000000000]
3654     cbuf.set_insts_mark();
3655     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3656     emit_opcode(cbuf, 0x39);
3657     // XXX reg_mem doesn't support RIP-relative addressing yet
3658     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3659     emit_d32_reloc(cbuf, const_address);
3660 
3661 
3662     // jne,s done
3663     emit_opcode(cbuf, 0x75);
3664     if (srcenc < 8 && dstenc < 8) {
3665       emit_d8(cbuf, 0xF);
3666     } else if (srcenc >= 8 && dstenc >= 8) {
3667       emit_d8(cbuf, 0x11);
3668     } else {
3669       emit_d8(cbuf, 0x10);
3670     }
3671 
3672     // subq rsp, #8
3673     emit_opcode(cbuf, Assembler::REX_W);
3674     emit_opcode(cbuf, 0x83);
3675     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3676     emit_d8(cbuf, 8);
3677 
3678     // movss [rsp], $src
3679     emit_opcode(cbuf, 0xF3);
3680     if (srcenc >= 8) {
3681       emit_opcode(cbuf, Assembler::REX_R);
3682     }
3683     emit_opcode(cbuf, 0x0F);
3684     emit_opcode(cbuf, 0x11);
3685     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3686 
3687     // call f2l_fixup
3688     cbuf.set_insts_mark();
3689     emit_opcode(cbuf, 0xE8);
3690     emit_d32_reloc(cbuf,
3691                    (int)
3692                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3693                    runtime_call_Relocation::spec(),
3694                    RELOC_DISP32);
3695 
3696     // popq $dst
3697     if (dstenc >= 8) {
3698       emit_opcode(cbuf, Assembler::REX_B);
3699     }
3700     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3701 
3702     // done:
3703   %}
3704 
3705   enc_class d2i_fixup(rRegI dst, regD src)
3706   %{
3707     int dstenc = $dst$$reg;
3708     int srcenc = $src$$reg;
3709 
3710     // cmpl $dst, #0x80000000
3711     if (dstenc >= 8) {
3712       emit_opcode(cbuf, Assembler::REX_B);
3713     }
3714     emit_opcode(cbuf, 0x81);
3715     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3716     emit_d32(cbuf, 0x80000000);
3717 
3718     // jne,s done
3719     emit_opcode(cbuf, 0x75);
3720     if (srcenc < 8 && dstenc < 8) {
3721       emit_d8(cbuf, 0xF);
3722     } else if (srcenc >= 8 && dstenc >= 8) {
3723       emit_d8(cbuf, 0x11);
3724     } else {
3725       emit_d8(cbuf, 0x10);
3726     }
3727 
3728     // subq rsp, #8
3729     emit_opcode(cbuf, Assembler::REX_W);
3730     emit_opcode(cbuf, 0x83);
3731     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3732     emit_d8(cbuf, 8);
3733 
3734     // movsd [rsp], $src
3735     emit_opcode(cbuf, 0xF2);
3736     if (srcenc >= 8) {
3737       emit_opcode(cbuf, Assembler::REX_R);
3738     }
3739     emit_opcode(cbuf, 0x0F);
3740     emit_opcode(cbuf, 0x11);
3741     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3742 
3743     // call d2i_fixup
3744     cbuf.set_insts_mark();
3745     emit_opcode(cbuf, 0xE8);
3746     emit_d32_reloc(cbuf,
3747                    (int)
3748                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3749                    runtime_call_Relocation::spec(),
3750                    RELOC_DISP32);
3751 
3752     // popq $dst
3753     if (dstenc >= 8) {
3754       emit_opcode(cbuf, Assembler::REX_B);
3755     }
3756     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3757 
3758     // done:
3759   %}
3760 
3761   enc_class d2l_fixup(rRegL dst, regD src)
3762   %{
3763     int dstenc = $dst$$reg;
3764     int srcenc = $src$$reg;
3765     address const_address = (address) StubRoutines::x86::double_sign_flip();
3766 
3767     // cmpq $dst, [0x8000000000000000]
3768     cbuf.set_insts_mark();
3769     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3770     emit_opcode(cbuf, 0x39);
3771     // XXX reg_mem doesn't support RIP-relative addressing yet
3772     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3773     emit_d32_reloc(cbuf, const_address);
3774 
3775 
3776     // jne,s done
3777     emit_opcode(cbuf, 0x75);
3778     if (srcenc < 8 && dstenc < 8) {
3779       emit_d8(cbuf, 0xF);
3780     } else if (srcenc >= 8 && dstenc >= 8) {
3781       emit_d8(cbuf, 0x11);
3782     } else {
3783       emit_d8(cbuf, 0x10);
3784     }
3785 
3786     // subq rsp, #8
3787     emit_opcode(cbuf, Assembler::REX_W);
3788     emit_opcode(cbuf, 0x83);
3789     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3790     emit_d8(cbuf, 8);
3791 
3792     // movsd [rsp], $src
3793     emit_opcode(cbuf, 0xF2);
3794     if (srcenc >= 8) {
3795       emit_opcode(cbuf, Assembler::REX_R);
3796     }
3797     emit_opcode(cbuf, 0x0F);
3798     emit_opcode(cbuf, 0x11);
3799     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3800 
3801     // call d2l_fixup
3802     cbuf.set_insts_mark();
3803     emit_opcode(cbuf, 0xE8);
3804     emit_d32_reloc(cbuf,
3805                    (int)
3806                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3807                    runtime_call_Relocation::spec(),
3808                    RELOC_DISP32);
3809 
3810     // popq $dst
3811     if (dstenc >= 8) {
3812       emit_opcode(cbuf, Assembler::REX_B);
3813     }
3814     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3815 
3816     // done:
3817   %}
3818 %}
3819 
3820 
3821 
3822 //----------FRAME--------------------------------------------------------------
3823 // Definition of frame structure and management information.
3824 //
3825 //  S T A C K   L A Y O U T    Allocators stack-slot number
3826 //                             |   (to get allocators register number
3827 //  G  Owned by    |        |  v    add OptoReg::stack0())
3828 //  r   CALLER     |        |
3829 //  o     |        +--------+      pad to even-align allocators stack-slot
3830 //  w     V        |  pad0  |        numbers; owned by CALLER
3831 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3832 //  h     ^        |   in   |  5
3833 //        |        |  args  |  4   Holes in incoming args owned by SELF
3834 //  |     |        |        |  3
3835 //  |     |        +--------+
3836 //  V     |        | old out|      Empty on Intel, window on Sparc
3837 //        |    old |preserve|      Must be even aligned.
3838 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3839 //        |        |   in   |  3   area for Intel ret address
3840 //     Owned by    |preserve|      Empty on Sparc.
3841 //       SELF      +--------+
3842 //        |        |  pad2  |  2   pad to align old SP
3843 //        |        +--------+  1
3844 //        |        | locks  |  0
3845 //        |        +--------+----> OptoReg::stack0(), even aligned
3846 //        |        |  pad1  | 11   pad to align new SP
3847 //        |        +--------+
3848 //        |        |        | 10
3849 //        |        | spills |  9   spills
3850 //        V        |        |  8   (pad0 slot for callee)
3851 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3852 //        ^        |  out   |  7
3853 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3854 //     Owned by    +--------+
3855 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3856 //        |    new |preserve|      Must be even-aligned.
3857 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3858 //        |        |        |
3859 //
3860 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3861 //         known from SELF's arguments and the Java calling convention.
3862 //         Region 6-7 is determined per call site.
3863 // Note 2: If the calling convention leaves holes in the incoming argument
3864 //         area, those holes are owned by SELF.  Holes in the outgoing area
3865 //         are owned by the CALLEE.  Holes should not be nessecary in the
3866 //         incoming area, as the Java calling convention is completely under
3867 //         the control of the AD file.  Doubles can be sorted and packed to
3868 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3869 //         varargs C calling conventions.
3870 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3871 //         even aligned with pad0 as needed.
3872 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3873 //         region 6-11 is even aligned; it may be padded out more so that
3874 //         the region from SP to FP meets the minimum stack alignment.
3875 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3876 //         alignment.  Region 11, pad1, may be dynamically extended so that
3877 //         SP meets the minimum alignment.
3878 
3879 frame
3880 %{
3881   // What direction does stack grow in (assumed to be same for C & Java)
3882   stack_direction(TOWARDS_LOW);
3883 
3884   // These three registers define part of the calling convention
3885   // between compiled code and the interpreter.
3886   inline_cache_reg(RAX);                // Inline Cache Register
3887   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3888                                         // calling interpreter
3889 
3890   // Optional: name the operand used by cisc-spilling to access
3891   // [stack_pointer + offset]
3892   cisc_spilling_operand_name(indOffset32);
3893 
3894   // Number of stack slots consumed by locking an object
3895   sync_stack_slots(2);
3896 
3897   // Compiled code's Frame Pointer
3898   frame_pointer(RSP);
3899 
3900   // Interpreter stores its frame pointer in a register which is
3901   // stored to the stack by I2CAdaptors.
3902   // I2CAdaptors convert from interpreted java to compiled java.
3903   interpreter_frame_pointer(RBP);
3904 
3905   // Stack alignment requirement
3906   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3907 
3908   // Number of stack slots between incoming argument block and the start of
3909   // a new frame.  The PROLOG must add this many slots to the stack.  The
3910   // EPILOG must remove this many slots.  amd64 needs two slots for
3911   // return address.
3912   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3913 
3914   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3915   // for calls to C.  Supports the var-args backing area for register parms.
3916   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3917 
3918   // The after-PROLOG location of the return address.  Location of
3919   // return address specifies a type (REG or STACK) and a number
3920   // representing the register number (i.e. - use a register name) or
3921   // stack slot.
3922   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3923   // Otherwise, it is above the locks and verification slot and alignment word
3924   return_addr(STACK - 2 +
3925               round_to(2 + 2 * VerifyStackAtCalls +
3926                        Compile::current()->fixed_slots(),
3927                        WordsPerLong * 2));
3928 
3929   // Body of function which returns an integer array locating
3930   // arguments either in registers or in stack slots.  Passed an array
3931   // of ideal registers called "sig" and a "length" count.  Stack-slot
3932   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3933   // arguments for a CALLEE.  Incoming stack arguments are
3934   // automatically biased by the preserve_stack_slots field above.
3935 
3936   calling_convention
3937   %{
3938     // No difference between ingoing/outgoing just pass false
3939     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3940   %}
3941 
3942   c_calling_convention
3943   %{
3944     // This is obviously always outgoing
3945     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3946   %}
3947 
3948   // Location of compiled Java return values.  Same as C for now.
3949   return_value
3950   %{
3951     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3952            "only return normal values");
3953 
3954     static const int lo[Op_RegL + 1] = {
3955       0,
3956       0,
3957       RAX_num,  // Op_RegN
3958       RAX_num,  // Op_RegI
3959       RAX_num,  // Op_RegP
3960       XMM0_num, // Op_RegF
3961       XMM0_num, // Op_RegD
3962       RAX_num   // Op_RegL
3963     };
3964     static const int hi[Op_RegL + 1] = {
3965       0,
3966       0,
3967       OptoReg::Bad, // Op_RegN
3968       OptoReg::Bad, // Op_RegI
3969       RAX_H_num,    // Op_RegP
3970       OptoReg::Bad, // Op_RegF
3971       XMM0_H_num,   // Op_RegD
3972       RAX_H_num     // Op_RegL
3973     };
3974     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
3975     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3976   %}
3977 %}
3978 
3979 //----------ATTRIBUTES---------------------------------------------------------
3980 //----------Operand Attributes-------------------------------------------------
3981 op_attrib op_cost(0);        // Required cost attribute
3982 
3983 //----------Instruction Attributes---------------------------------------------
3984 ins_attrib ins_cost(100);       // Required cost attribute
3985 ins_attrib ins_size(8);         // Required size attribute (in bits)
3986 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3987                                 // a non-matching short branch variant
3988                                 // of some long branch?
3989 ins_attrib ins_alignment(1);    // Required alignment attribute (must
3990                                 // be a power of 2) specifies the
3991                                 // alignment that some part of the
3992                                 // instruction (not necessarily the
3993                                 // start) requires.  If > 1, a
3994                                 // compute_padding() function must be
3995                                 // provided for the instruction
3996 
3997 //----------OPERANDS-----------------------------------------------------------
3998 // Operand definitions must precede instruction definitions for correct parsing
3999 // in the ADLC because operands constitute user defined types which are used in
4000 // instruction definitions.
4001 
4002 //----------Simple Operands----------------------------------------------------
4003 // Immediate Operands
4004 // Integer Immediate
4005 operand immI()
4006 %{
4007   match(ConI);
4008 
4009   op_cost(10);
4010   format %{ %}
4011   interface(CONST_INTER);
4012 %}
4013 
4014 // Constant for test vs zero
4015 operand immI0()
4016 %{
4017   predicate(n->get_int() == 0);
4018   match(ConI);
4019 
4020   op_cost(0);
4021   format %{ %}
4022   interface(CONST_INTER);
4023 %}
4024 
4025 // Constant for increment
4026 operand immI1()
4027 %{
4028   predicate(n->get_int() == 1);
4029   match(ConI);
4030 
4031   op_cost(0);
4032   format %{ %}
4033   interface(CONST_INTER);
4034 %}
4035 
4036 // Constant for decrement
4037 operand immI_M1()
4038 %{
4039   predicate(n->get_int() == -1);
4040   match(ConI);
4041 
4042   op_cost(0);
4043   format %{ %}
4044   interface(CONST_INTER);
4045 %}
4046 
4047 // Valid scale values for addressing modes
4048 operand immI2()
4049 %{
4050   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4051   match(ConI);
4052 
4053   format %{ %}
4054   interface(CONST_INTER);
4055 %}
4056 
4057 operand immI8()
4058 %{
4059   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4060   match(ConI);
4061 
4062   op_cost(5);
4063   format %{ %}
4064   interface(CONST_INTER);
4065 %}
4066 
4067 operand immI16()
4068 %{
4069   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4070   match(ConI);
4071 
4072   op_cost(10);
4073   format %{ %}
4074   interface(CONST_INTER);
4075 %}
4076 
4077 // Constant for long shifts
4078 operand immI_32()
4079 %{
4080   predicate( n->get_int() == 32 );
4081   match(ConI);
4082 
4083   op_cost(0);
4084   format %{ %}
4085   interface(CONST_INTER);
4086 %}
4087 
4088 // Constant for long shifts
4089 operand immI_64()
4090 %{
4091   predicate( n->get_int() == 64 );
4092   match(ConI);
4093 
4094   op_cost(0);
4095   format %{ %}
4096   interface(CONST_INTER);
4097 %}
4098 
4099 // Pointer Immediate
4100 operand immP()
4101 %{
4102   match(ConP);
4103 
4104   op_cost(10);
4105   format %{ %}
4106   interface(CONST_INTER);
4107 %}
4108 
4109 // NULL Pointer Immediate
4110 operand immP0()
4111 %{
4112   predicate(n->get_ptr() == 0);
4113   match(ConP);
4114 
4115   op_cost(5);
4116   format %{ %}
4117   interface(CONST_INTER);
4118 %}
4119 
4120 operand immP_poll() %{
4121   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4122   match(ConP);
4123 
4124   // formats are generated automatically for constants and base registers
4125   format %{ %}
4126   interface(CONST_INTER);
4127 %}
4128 
4129 // Pointer Immediate
4130 operand immN() %{
4131   match(ConN);
4132 
4133   op_cost(10);
4134   format %{ %}
4135   interface(CONST_INTER);
4136 %}
4137 
4138 // NULL Pointer Immediate
4139 operand immN0() %{
4140   predicate(n->get_narrowcon() == 0);
4141   match(ConN);
4142 
4143   op_cost(5);
4144   format %{ %}
4145   interface(CONST_INTER);
4146 %}
4147 
4148 operand immP31()
4149 %{
4150   predicate(!n->as_Type()->type()->isa_oopptr()
4151             && (n->get_ptr() >> 31) == 0);
4152   match(ConP);
4153 
4154   op_cost(5);
4155   format %{ %}
4156   interface(CONST_INTER);
4157 %}
4158 
4159 
4160 // Long Immediate
4161 operand immL()
4162 %{
4163   match(ConL);
4164 
4165   op_cost(20);
4166   format %{ %}
4167   interface(CONST_INTER);
4168 %}
4169 
4170 // Long Immediate 8-bit
4171 operand immL8()
4172 %{
4173   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4174   match(ConL);
4175 
4176   op_cost(5);
4177   format %{ %}
4178   interface(CONST_INTER);
4179 %}
4180 
4181 // Long Immediate 32-bit unsigned
4182 operand immUL32()
4183 %{
4184   predicate(n->get_long() == (unsigned int) (n->get_long()));
4185   match(ConL);
4186 
4187   op_cost(10);
4188   format %{ %}
4189   interface(CONST_INTER);
4190 %}
4191 
4192 // Long Immediate 32-bit signed
4193 operand immL32()
4194 %{
4195   predicate(n->get_long() == (int) (n->get_long()));
4196   match(ConL);
4197 
4198   op_cost(15);
4199   format %{ %}
4200   interface(CONST_INTER);
4201 %}
4202 
4203 // Long Immediate zero
4204 operand immL0()
4205 %{
4206   predicate(n->get_long() == 0L);
4207   match(ConL);
4208 
4209   op_cost(10);
4210   format %{ %}
4211   interface(CONST_INTER);
4212 %}
4213 
4214 // Constant for increment
4215 operand immL1()
4216 %{
4217   predicate(n->get_long() == 1);
4218   match(ConL);
4219 
4220   format %{ %}
4221   interface(CONST_INTER);
4222 %}
4223 
4224 // Constant for decrement
4225 operand immL_M1()
4226 %{
4227   predicate(n->get_long() == -1);
4228   match(ConL);
4229 
4230   format %{ %}
4231   interface(CONST_INTER);
4232 %}
4233 
4234 // Long Immediate: the value 10
4235 operand immL10()
4236 %{
4237   predicate(n->get_long() == 10);
4238   match(ConL);
4239 
4240   format %{ %}
4241   interface(CONST_INTER);
4242 %}
4243 
4244 // Long immediate from 0 to 127.
4245 // Used for a shorter form of long mul by 10.
4246 operand immL_127()
4247 %{
4248   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4249   match(ConL);
4250 
4251   op_cost(10);
4252   format %{ %}
4253   interface(CONST_INTER);
4254 %}
4255 
4256 // Long Immediate: low 32-bit mask
4257 operand immL_32bits()
4258 %{
4259   predicate(n->get_long() == 0xFFFFFFFFL);
4260   match(ConL);
4261   op_cost(20);
4262 
4263   format %{ %}
4264   interface(CONST_INTER);
4265 %}
4266 
4267 // Float Immediate zero
4268 operand immF0()
4269 %{
4270   predicate(jint_cast(n->getf()) == 0);
4271   match(ConF);
4272 
4273   op_cost(5);
4274   format %{ %}
4275   interface(CONST_INTER);
4276 %}
4277 
4278 // Float Immediate
4279 operand immF()
4280 %{
4281   match(ConF);
4282 
4283   op_cost(15);
4284   format %{ %}
4285   interface(CONST_INTER);
4286 %}
4287 
4288 // Double Immediate zero
4289 operand immD0()
4290 %{
4291   predicate(jlong_cast(n->getd()) == 0);
4292   match(ConD);
4293 
4294   op_cost(5);
4295   format %{ %}
4296   interface(CONST_INTER);
4297 %}
4298 
4299 // Double Immediate
4300 operand immD()
4301 %{
4302   match(ConD);
4303 
4304   op_cost(15);
4305   format %{ %}
4306   interface(CONST_INTER);
4307 %}
4308 
4309 // Immediates for special shifts (sign extend)
4310 
4311 // Constants for increment
4312 operand immI_16()
4313 %{
4314   predicate(n->get_int() == 16);
4315   match(ConI);
4316 
4317   format %{ %}
4318   interface(CONST_INTER);
4319 %}
4320 
4321 operand immI_24()
4322 %{
4323   predicate(n->get_int() == 24);
4324   match(ConI);
4325 
4326   format %{ %}
4327   interface(CONST_INTER);
4328 %}
4329 
4330 // Constant for byte-wide masking
4331 operand immI_255()
4332 %{
4333   predicate(n->get_int() == 255);
4334   match(ConI);
4335 
4336   format %{ %}
4337   interface(CONST_INTER);
4338 %}
4339 
4340 // Constant for short-wide masking
4341 operand immI_65535()
4342 %{
4343   predicate(n->get_int() == 65535);
4344   match(ConI);
4345 
4346   format %{ %}
4347   interface(CONST_INTER);
4348 %}
4349 
4350 // Constant for byte-wide masking
4351 operand immL_255()
4352 %{
4353   predicate(n->get_long() == 255);
4354   match(ConL);
4355 
4356   format %{ %}
4357   interface(CONST_INTER);
4358 %}
4359 
4360 // Constant for short-wide masking
4361 operand immL_65535()
4362 %{
4363   predicate(n->get_long() == 65535);
4364   match(ConL);
4365 
4366   format %{ %}
4367   interface(CONST_INTER);
4368 %}
4369 
4370 // Register Operands
4371 // Integer Register
4372 operand rRegI()
4373 %{
4374   constraint(ALLOC_IN_RC(int_reg));
4375   match(RegI);
4376 
4377   match(rax_RegI);
4378   match(rbx_RegI);
4379   match(rcx_RegI);
4380   match(rdx_RegI);
4381   match(rdi_RegI);
4382 
4383   format %{ %}
4384   interface(REG_INTER);
4385 %}
4386 
4387 // Special Registers
4388 operand rax_RegI()
4389 %{
4390   constraint(ALLOC_IN_RC(int_rax_reg));
4391   match(RegI);
4392   match(rRegI);
4393 
4394   format %{ "RAX" %}
4395   interface(REG_INTER);
4396 %}
4397 
4398 // Special Registers
4399 operand rbx_RegI()
4400 %{
4401   constraint(ALLOC_IN_RC(int_rbx_reg));
4402   match(RegI);
4403   match(rRegI);
4404 
4405   format %{ "RBX" %}
4406   interface(REG_INTER);
4407 %}
4408 
4409 operand rcx_RegI()
4410 %{
4411   constraint(ALLOC_IN_RC(int_rcx_reg));
4412   match(RegI);
4413   match(rRegI);
4414 
4415   format %{ "RCX" %}
4416   interface(REG_INTER);
4417 %}
4418 
4419 operand rdx_RegI()
4420 %{
4421   constraint(ALLOC_IN_RC(int_rdx_reg));
4422   match(RegI);
4423   match(rRegI);
4424 
4425   format %{ "RDX" %}
4426   interface(REG_INTER);
4427 %}
4428 
4429 operand rdi_RegI()
4430 %{
4431   constraint(ALLOC_IN_RC(int_rdi_reg));
4432   match(RegI);
4433   match(rRegI);
4434 
4435   format %{ "RDI" %}
4436   interface(REG_INTER);
4437 %}
4438 
4439 operand no_rcx_RegI()
4440 %{
4441   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4442   match(RegI);
4443   match(rax_RegI);
4444   match(rbx_RegI);
4445   match(rdx_RegI);
4446   match(rdi_RegI);
4447 
4448   format %{ %}
4449   interface(REG_INTER);
4450 %}
4451 
4452 operand no_rax_rdx_RegI()
4453 %{
4454   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4455   match(RegI);
4456   match(rbx_RegI);
4457   match(rcx_RegI);
4458   match(rdi_RegI);
4459 
4460   format %{ %}
4461   interface(REG_INTER);
4462 %}
4463 
4464 // Pointer Register
4465 operand any_RegP()
4466 %{
4467   constraint(ALLOC_IN_RC(any_reg));
4468   match(RegP);
4469   match(rax_RegP);
4470   match(rbx_RegP);
4471   match(rdi_RegP);
4472   match(rsi_RegP);
4473   match(rbp_RegP);
4474   match(r15_RegP);
4475   match(rRegP);
4476 
4477   format %{ %}
4478   interface(REG_INTER);
4479 %}
4480 
4481 operand rRegP()
4482 %{
4483   constraint(ALLOC_IN_RC(ptr_reg));
4484   match(RegP);
4485   match(rax_RegP);
4486   match(rbx_RegP);
4487   match(rdi_RegP);
4488   match(rsi_RegP);
4489   match(rbp_RegP);
4490   match(r15_RegP);  // See Q&A below about r15_RegP.
4491 
4492   format %{ %}
4493   interface(REG_INTER);
4494 %}
4495 
4496 operand rRegN() %{
4497   constraint(ALLOC_IN_RC(int_reg));
4498   match(RegN);
4499 
4500   format %{ %}
4501   interface(REG_INTER);
4502 %}
4503 
4504 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4505 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4506 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4507 // The output of an instruction is controlled by the allocator, which respects
4508 // register class masks, not match rules.  Unless an instruction mentions
4509 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4510 // by the allocator as an input.
4511 
4512 operand no_rax_RegP()
4513 %{
4514   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4515   match(RegP);
4516   match(rbx_RegP);
4517   match(rsi_RegP);
4518   match(rdi_RegP);
4519 
4520   format %{ %}
4521   interface(REG_INTER);
4522 %}
4523 
4524 operand no_rbp_RegP()
4525 %{
4526   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4527   match(RegP);
4528   match(rbx_RegP);
4529   match(rsi_RegP);
4530   match(rdi_RegP);
4531 
4532   format %{ %}
4533   interface(REG_INTER);
4534 %}
4535 
4536 operand no_rax_rbx_RegP()
4537 %{
4538   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4539   match(RegP);
4540   match(rsi_RegP);
4541   match(rdi_RegP);
4542 
4543   format %{ %}
4544   interface(REG_INTER);
4545 %}
4546 
4547 // Special Registers
4548 // Return a pointer value
4549 operand rax_RegP()
4550 %{
4551   constraint(ALLOC_IN_RC(ptr_rax_reg));
4552   match(RegP);
4553   match(rRegP);
4554 
4555   format %{ %}
4556   interface(REG_INTER);
4557 %}
4558 
4559 // Special Registers
4560 // Return a compressed pointer value
4561 operand rax_RegN()
4562 %{
4563   constraint(ALLOC_IN_RC(int_rax_reg));
4564   match(RegN);
4565   match(rRegN);
4566 
4567   format %{ %}
4568   interface(REG_INTER);
4569 %}
4570 
4571 // Used in AtomicAdd
4572 operand rbx_RegP()
4573 %{
4574   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4575   match(RegP);
4576   match(rRegP);
4577 
4578   format %{ %}
4579   interface(REG_INTER);
4580 %}
4581 
4582 operand rsi_RegP()
4583 %{
4584   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4585   match(RegP);
4586   match(rRegP);
4587 
4588   format %{ %}
4589   interface(REG_INTER);
4590 %}
4591 
4592 // Used in rep stosq
4593 operand rdi_RegP()
4594 %{
4595   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4596   match(RegP);
4597   match(rRegP);
4598 
4599   format %{ %}
4600   interface(REG_INTER);
4601 %}
4602 
4603 operand rbp_RegP()
4604 %{
4605   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4606   match(RegP);
4607   match(rRegP);
4608 
4609   format %{ %}
4610   interface(REG_INTER);
4611 %}
4612 
4613 operand r15_RegP()
4614 %{
4615   constraint(ALLOC_IN_RC(ptr_r15_reg));
4616   match(RegP);
4617   match(rRegP);
4618 
4619   format %{ %}
4620   interface(REG_INTER);
4621 %}
4622 
4623 operand rRegL()
4624 %{
4625   constraint(ALLOC_IN_RC(long_reg));
4626   match(RegL);
4627   match(rax_RegL);
4628   match(rdx_RegL);
4629 
4630   format %{ %}
4631   interface(REG_INTER);
4632 %}
4633 
4634 // Special Registers
4635 operand no_rax_rdx_RegL()
4636 %{
4637   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4638   match(RegL);
4639   match(rRegL);
4640 
4641   format %{ %}
4642   interface(REG_INTER);
4643 %}
4644 
4645 operand no_rax_RegL()
4646 %{
4647   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4648   match(RegL);
4649   match(rRegL);
4650   match(rdx_RegL);
4651 
4652   format %{ %}
4653   interface(REG_INTER);
4654 %}
4655 
4656 operand no_rcx_RegL()
4657 %{
4658   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4659   match(RegL);
4660   match(rRegL);
4661 
4662   format %{ %}
4663   interface(REG_INTER);
4664 %}
4665 
4666 operand rax_RegL()
4667 %{
4668   constraint(ALLOC_IN_RC(long_rax_reg));
4669   match(RegL);
4670   match(rRegL);
4671 
4672   format %{ "RAX" %}
4673   interface(REG_INTER);
4674 %}
4675 
4676 operand rcx_RegL()
4677 %{
4678   constraint(ALLOC_IN_RC(long_rcx_reg));
4679   match(RegL);
4680   match(rRegL);
4681 
4682   format %{ %}
4683   interface(REG_INTER);
4684 %}
4685 
4686 operand rdx_RegL()
4687 %{
4688   constraint(ALLOC_IN_RC(long_rdx_reg));
4689   match(RegL);
4690   match(rRegL);
4691 
4692   format %{ %}
4693   interface(REG_INTER);
4694 %}
4695 
4696 // Flags register, used as output of compare instructions
4697 operand rFlagsReg()
4698 %{
4699   constraint(ALLOC_IN_RC(int_flags));
4700   match(RegFlags);
4701 
4702   format %{ "RFLAGS" %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 // Flags register, used as output of FLOATING POINT compare instructions
4707 operand rFlagsRegU()
4708 %{
4709   constraint(ALLOC_IN_RC(int_flags));
4710   match(RegFlags);
4711 
4712   format %{ "RFLAGS_U" %}
4713   interface(REG_INTER);
4714 %}
4715 
4716 operand rFlagsRegUCF() %{
4717   constraint(ALLOC_IN_RC(int_flags));
4718   match(RegFlags);
4719   predicate(false);
4720 
4721   format %{ "RFLAGS_U_CF" %}
4722   interface(REG_INTER);
4723 %}
4724 
4725 // Float register operands
4726 operand regF()
4727 %{
4728   constraint(ALLOC_IN_RC(float_reg));
4729   match(RegF);
4730 
4731   format %{ %}
4732   interface(REG_INTER);
4733 %}
4734 
4735 // Double register operands
4736 operand regD()
4737 %{
4738   constraint(ALLOC_IN_RC(double_reg));
4739   match(RegD);
4740 
4741   format %{ %}
4742   interface(REG_INTER);
4743 %}
4744 
4745 
4746 //----------Memory Operands----------------------------------------------------
4747 // Direct Memory Operand
4748 // operand direct(immP addr)
4749 // %{
4750 //   match(addr);
4751 
4752 //   format %{ "[$addr]" %}
4753 //   interface(MEMORY_INTER) %{
4754 //     base(0xFFFFFFFF);
4755 //     index(0x4);
4756 //     scale(0x0);
4757 //     disp($addr);
4758 //   %}
4759 // %}
4760 
4761 // Indirect Memory Operand
4762 operand indirect(any_RegP reg)
4763 %{
4764   constraint(ALLOC_IN_RC(ptr_reg));
4765   match(reg);
4766 
4767   format %{ "[$reg]" %}
4768   interface(MEMORY_INTER) %{
4769     base($reg);
4770     index(0x4);
4771     scale(0x0);
4772     disp(0x0);
4773   %}
4774 %}
4775 
4776 // Indirect Memory Plus Short Offset Operand
4777 operand indOffset8(any_RegP reg, immL8 off)
4778 %{
4779   constraint(ALLOC_IN_RC(ptr_reg));
4780   match(AddP reg off);
4781 
4782   format %{ "[$reg + $off (8-bit)]" %}
4783   interface(MEMORY_INTER) %{
4784     base($reg);
4785     index(0x4);
4786     scale(0x0);
4787     disp($off);
4788   %}
4789 %}
4790 
4791 // Indirect Memory Plus Long Offset Operand
4792 operand indOffset32(any_RegP reg, immL32 off)
4793 %{
4794   constraint(ALLOC_IN_RC(ptr_reg));
4795   match(AddP reg off);
4796 
4797   format %{ "[$reg + $off (32-bit)]" %}
4798   interface(MEMORY_INTER) %{
4799     base($reg);
4800     index(0x4);
4801     scale(0x0);
4802     disp($off);
4803   %}
4804 %}
4805 
4806 // Indirect Memory Plus Index Register Plus Offset Operand
4807 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4808 %{
4809   constraint(ALLOC_IN_RC(ptr_reg));
4810   match(AddP (AddP reg lreg) off);
4811 
4812   op_cost(10);
4813   format %{"[$reg + $off + $lreg]" %}
4814   interface(MEMORY_INTER) %{
4815     base($reg);
4816     index($lreg);
4817     scale(0x0);
4818     disp($off);
4819   %}
4820 %}
4821 
4822 // Indirect Memory Plus Index Register Plus Offset Operand
4823 operand indIndex(any_RegP reg, rRegL lreg)
4824 %{
4825   constraint(ALLOC_IN_RC(ptr_reg));
4826   match(AddP reg lreg);
4827 
4828   op_cost(10);
4829   format %{"[$reg + $lreg]" %}
4830   interface(MEMORY_INTER) %{
4831     base($reg);
4832     index($lreg);
4833     scale(0x0);
4834     disp(0x0);
4835   %}
4836 %}
4837 
4838 // Indirect Memory Times Scale Plus Index Register
4839 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4840 %{
4841   constraint(ALLOC_IN_RC(ptr_reg));
4842   match(AddP reg (LShiftL lreg scale));
4843 
4844   op_cost(10);
4845   format %{"[$reg + $lreg << $scale]" %}
4846   interface(MEMORY_INTER) %{
4847     base($reg);
4848     index($lreg);
4849     scale($scale);
4850     disp(0x0);
4851   %}
4852 %}
4853 
4854 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4855 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4856 %{
4857   constraint(ALLOC_IN_RC(ptr_reg));
4858   match(AddP (AddP reg (LShiftL lreg scale)) off);
4859 
4860   op_cost(10);
4861   format %{"[$reg + $off + $lreg << $scale]" %}
4862   interface(MEMORY_INTER) %{
4863     base($reg);
4864     index($lreg);
4865     scale($scale);
4866     disp($off);
4867   %}
4868 %}
4869 
4870 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4871 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4872 %{
4873   constraint(ALLOC_IN_RC(ptr_reg));
4874   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4875   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4876 
4877   op_cost(10);
4878   format %{"[$reg + $off + $idx << $scale]" %}
4879   interface(MEMORY_INTER) %{
4880     base($reg);
4881     index($idx);
4882     scale($scale);
4883     disp($off);
4884   %}
4885 %}
4886 
4887 // Indirect Narrow Oop Plus Offset Operand
4888 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4889 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4890 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4891   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4892   constraint(ALLOC_IN_RC(ptr_reg));
4893   match(AddP (DecodeN reg) off);
4894 
4895   op_cost(10);
4896   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4897   interface(MEMORY_INTER) %{
4898     base(0xc); // R12
4899     index($reg);
4900     scale(0x3);
4901     disp($off);
4902   %}
4903 %}
4904 
4905 // Indirect Memory Operand
4906 operand indirectNarrow(rRegN reg)
4907 %{
4908   predicate(Universe::narrow_oop_shift() == 0);
4909   constraint(ALLOC_IN_RC(ptr_reg));
4910   match(DecodeN reg);
4911 
4912   format %{ "[$reg]" %}
4913   interface(MEMORY_INTER) %{
4914     base($reg);
4915     index(0x4);
4916     scale(0x0);
4917     disp(0x0);
4918   %}
4919 %}
4920 
4921 // Indirect Memory Plus Short Offset Operand
4922 operand indOffset8Narrow(rRegN reg, immL8 off)
4923 %{
4924   predicate(Universe::narrow_oop_shift() == 0);
4925   constraint(ALLOC_IN_RC(ptr_reg));
4926   match(AddP (DecodeN reg) off);
4927 
4928   format %{ "[$reg + $off (8-bit)]" %}
4929   interface(MEMORY_INTER) %{
4930     base($reg);
4931     index(0x4);
4932     scale(0x0);
4933     disp($off);
4934   %}
4935 %}
4936 
4937 // Indirect Memory Plus Long Offset Operand
4938 operand indOffset32Narrow(rRegN reg, immL32 off)
4939 %{
4940   predicate(Universe::narrow_oop_shift() == 0);
4941   constraint(ALLOC_IN_RC(ptr_reg));
4942   match(AddP (DecodeN reg) off);
4943 
4944   format %{ "[$reg + $off (32-bit)]" %}
4945   interface(MEMORY_INTER) %{
4946     base($reg);
4947     index(0x4);
4948     scale(0x0);
4949     disp($off);
4950   %}
4951 %}
4952 
4953 // Indirect Memory Plus Index Register Plus Offset Operand
4954 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4955 %{
4956   predicate(Universe::narrow_oop_shift() == 0);
4957   constraint(ALLOC_IN_RC(ptr_reg));
4958   match(AddP (AddP (DecodeN reg) lreg) off);
4959 
4960   op_cost(10);
4961   format %{"[$reg + $off + $lreg]" %}
4962   interface(MEMORY_INTER) %{
4963     base($reg);
4964     index($lreg);
4965     scale(0x0);
4966     disp($off);
4967   %}
4968 %}
4969 
4970 // Indirect Memory Plus Index Register Plus Offset Operand
4971 operand indIndexNarrow(rRegN reg, rRegL lreg)
4972 %{
4973   predicate(Universe::narrow_oop_shift() == 0);
4974   constraint(ALLOC_IN_RC(ptr_reg));
4975   match(AddP (DecodeN reg) lreg);
4976 
4977   op_cost(10);
4978   format %{"[$reg + $lreg]" %}
4979   interface(MEMORY_INTER) %{
4980     base($reg);
4981     index($lreg);
4982     scale(0x0);
4983     disp(0x0);
4984   %}
4985 %}
4986 
4987 // Indirect Memory Times Scale Plus Index Register
4988 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4989 %{
4990   predicate(Universe::narrow_oop_shift() == 0);
4991   constraint(ALLOC_IN_RC(ptr_reg));
4992   match(AddP (DecodeN reg) (LShiftL lreg scale));
4993 
4994   op_cost(10);
4995   format %{"[$reg + $lreg << $scale]" %}
4996   interface(MEMORY_INTER) %{
4997     base($reg);
4998     index($lreg);
4999     scale($scale);
5000     disp(0x0);
5001   %}
5002 %}
5003 
5004 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5005 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5006 %{
5007   predicate(Universe::narrow_oop_shift() == 0);
5008   constraint(ALLOC_IN_RC(ptr_reg));
5009   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5010 
5011   op_cost(10);
5012   format %{"[$reg + $off + $lreg << $scale]" %}
5013   interface(MEMORY_INTER) %{
5014     base($reg);
5015     index($lreg);
5016     scale($scale);
5017     disp($off);
5018   %}
5019 %}
5020 
5021 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5022 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5023 %{
5024   constraint(ALLOC_IN_RC(ptr_reg));
5025   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5026   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5027 
5028   op_cost(10);
5029   format %{"[$reg + $off + $idx << $scale]" %}
5030   interface(MEMORY_INTER) %{
5031     base($reg);
5032     index($idx);
5033     scale($scale);
5034     disp($off);
5035   %}
5036 %}
5037 
5038 
5039 //----------Special Memory Operands--------------------------------------------
5040 // Stack Slot Operand - This operand is used for loading and storing temporary
5041 //                      values on the stack where a match requires a value to
5042 //                      flow through memory.
5043 operand stackSlotP(sRegP reg)
5044 %{
5045   constraint(ALLOC_IN_RC(stack_slots));
5046   // No match rule because this operand is only generated in matching
5047 
5048   format %{ "[$reg]" %}
5049   interface(MEMORY_INTER) %{
5050     base(0x4);   // RSP
5051     index(0x4);  // No Index
5052     scale(0x0);  // No Scale
5053     disp($reg);  // Stack Offset
5054   %}
5055 %}
5056 
5057 operand stackSlotI(sRegI reg)
5058 %{
5059   constraint(ALLOC_IN_RC(stack_slots));
5060   // No match rule because this operand is only generated in matching
5061 
5062   format %{ "[$reg]" %}
5063   interface(MEMORY_INTER) %{
5064     base(0x4);   // RSP
5065     index(0x4);  // No Index
5066     scale(0x0);  // No Scale
5067     disp($reg);  // Stack Offset
5068   %}
5069 %}
5070 
5071 operand stackSlotF(sRegF reg)
5072 %{
5073   constraint(ALLOC_IN_RC(stack_slots));
5074   // No match rule because this operand is only generated in matching
5075 
5076   format %{ "[$reg]" %}
5077   interface(MEMORY_INTER) %{
5078     base(0x4);   // RSP
5079     index(0x4);  // No Index
5080     scale(0x0);  // No Scale
5081     disp($reg);  // Stack Offset
5082   %}
5083 %}
5084 
5085 operand stackSlotD(sRegD reg)
5086 %{
5087   constraint(ALLOC_IN_RC(stack_slots));
5088   // No match rule because this operand is only generated in matching
5089 
5090   format %{ "[$reg]" %}
5091   interface(MEMORY_INTER) %{
5092     base(0x4);   // RSP
5093     index(0x4);  // No Index
5094     scale(0x0);  // No Scale
5095     disp($reg);  // Stack Offset
5096   %}
5097 %}
5098 operand stackSlotL(sRegL reg)
5099 %{
5100   constraint(ALLOC_IN_RC(stack_slots));
5101   // No match rule because this operand is only generated in matching
5102 
5103   format %{ "[$reg]" %}
5104   interface(MEMORY_INTER) %{
5105     base(0x4);   // RSP
5106     index(0x4);  // No Index
5107     scale(0x0);  // No Scale
5108     disp($reg);  // Stack Offset
5109   %}
5110 %}
5111 
5112 //----------Conditional Branch Operands----------------------------------------
5113 // Comparison Op  - This is the operation of the comparison, and is limited to
5114 //                  the following set of codes:
5115 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5116 //
5117 // Other attributes of the comparison, such as unsignedness, are specified
5118 // by the comparison instruction that sets a condition code flags register.
5119 // That result is represented by a flags operand whose subtype is appropriate
5120 // to the unsignedness (etc.) of the comparison.
5121 //
5122 // Later, the instruction which matches both the Comparison Op (a Bool) and
5123 // the flags (produced by the Cmp) specifies the coding of the comparison op
5124 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5125 
5126 // Comparision Code
5127 operand cmpOp()
5128 %{
5129   match(Bool);
5130 
5131   format %{ "" %}
5132   interface(COND_INTER) %{
5133     equal(0x4, "e");
5134     not_equal(0x5, "ne");
5135     less(0xC, "l");
5136     greater_equal(0xD, "ge");
5137     less_equal(0xE, "le");
5138     greater(0xF, "g");
5139   %}
5140 %}
5141 
5142 // Comparison Code, unsigned compare.  Used by FP also, with
5143 // C2 (unordered) turned into GT or LT already.  The other bits
5144 // C0 and C3 are turned into Carry & Zero flags.
5145 operand cmpOpU()
5146 %{
5147   match(Bool);
5148 
5149   format %{ "" %}
5150   interface(COND_INTER) %{
5151     equal(0x4, "e");
5152     not_equal(0x5, "ne");
5153     less(0x2, "b");
5154     greater_equal(0x3, "nb");
5155     less_equal(0x6, "be");
5156     greater(0x7, "nbe");
5157   %}
5158 %}
5159 
5160 
5161 // Floating comparisons that don't require any fixup for the unordered case
5162 operand cmpOpUCF() %{
5163   match(Bool);
5164   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5165             n->as_Bool()->_test._test == BoolTest::ge ||
5166             n->as_Bool()->_test._test == BoolTest::le ||
5167             n->as_Bool()->_test._test == BoolTest::gt);
5168   format %{ "" %}
5169   interface(COND_INTER) %{
5170     equal(0x4, "e");
5171     not_equal(0x5, "ne");
5172     less(0x2, "b");
5173     greater_equal(0x3, "nb");
5174     less_equal(0x6, "be");
5175     greater(0x7, "nbe");
5176   %}
5177 %}
5178 
5179 
5180 // Floating comparisons that can be fixed up with extra conditional jumps
5181 operand cmpOpUCF2() %{
5182   match(Bool);
5183   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5184             n->as_Bool()->_test._test == BoolTest::eq);
5185   format %{ "" %}
5186   interface(COND_INTER) %{
5187     equal(0x4, "e");
5188     not_equal(0x5, "ne");
5189     less(0x2, "b");
5190     greater_equal(0x3, "nb");
5191     less_equal(0x6, "be");
5192     greater(0x7, "nbe");
5193   %}
5194 %}
5195 
5196 
5197 //----------OPERAND CLASSES----------------------------------------------------
5198 // Operand Classes are groups of operands that are used as to simplify
5199 // instruction definitions by not requiring the AD writer to specify separate
5200 // instructions for every form of operand when the instruction accepts
5201 // multiple operand types with the same basic encoding and format.  The classic
5202 // case of this is memory operands.
5203 
5204 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5205                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5206                indCompressedOopOffset,
5207                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5208                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5209                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5210 
5211 //----------PIPELINE-----------------------------------------------------------
5212 // Rules which define the behavior of the target architectures pipeline.
5213 pipeline %{
5214 
5215 //----------ATTRIBUTES---------------------------------------------------------
5216 attributes %{
5217   variable_size_instructions;        // Fixed size instructions
5218   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5219   instruction_unit_size = 1;         // An instruction is 1 bytes long
5220   instruction_fetch_unit_size = 16;  // The processor fetches one line
5221   instruction_fetch_units = 1;       // of 16 bytes
5222 
5223   // List of nop instructions
5224   nops( MachNop );
5225 %}
5226 
5227 //----------RESOURCES----------------------------------------------------------
5228 // Resources are the functional units available to the machine
5229 
5230 // Generic P2/P3 pipeline
5231 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5232 // 3 instructions decoded per cycle.
5233 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5234 // 3 ALU op, only ALU0 handles mul instructions.
5235 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5236            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5237            BR, FPU,
5238            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5239 
5240 //----------PIPELINE DESCRIPTION-----------------------------------------------
5241 // Pipeline Description specifies the stages in the machine's pipeline
5242 
5243 // Generic P2/P3 pipeline
5244 pipe_desc(S0, S1, S2, S3, S4, S5);
5245 
5246 //----------PIPELINE CLASSES---------------------------------------------------
5247 // Pipeline Classes describe the stages in which input and output are
5248 // referenced by the hardware pipeline.
5249 
5250 // Naming convention: ialu or fpu
5251 // Then: _reg
5252 // Then: _reg if there is a 2nd register
5253 // Then: _long if it's a pair of instructions implementing a long
5254 // Then: _fat if it requires the big decoder
5255 //   Or: _mem if it requires the big decoder and a memory unit.
5256 
5257 // Integer ALU reg operation
5258 pipe_class ialu_reg(rRegI dst)
5259 %{
5260     single_instruction;
5261     dst    : S4(write);
5262     dst    : S3(read);
5263     DECODE : S0;        // any decoder
5264     ALU    : S3;        // any alu
5265 %}
5266 
5267 // Long ALU reg operation
5268 pipe_class ialu_reg_long(rRegL dst)
5269 %{
5270     instruction_count(2);
5271     dst    : S4(write);
5272     dst    : S3(read);
5273     DECODE : S0(2);     // any 2 decoders
5274     ALU    : S3(2);     // both alus
5275 %}
5276 
5277 // Integer ALU reg operation using big decoder
5278 pipe_class ialu_reg_fat(rRegI dst)
5279 %{
5280     single_instruction;
5281     dst    : S4(write);
5282     dst    : S3(read);
5283     D0     : S0;        // big decoder only
5284     ALU    : S3;        // any alu
5285 %}
5286 
5287 // Long ALU reg operation using big decoder
5288 pipe_class ialu_reg_long_fat(rRegL dst)
5289 %{
5290     instruction_count(2);
5291     dst    : S4(write);
5292     dst    : S3(read);
5293     D0     : S0(2);     // big decoder only; twice
5294     ALU    : S3(2);     // any 2 alus
5295 %}
5296 
5297 // Integer ALU reg-reg operation
5298 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5299 %{
5300     single_instruction;
5301     dst    : S4(write);
5302     src    : S3(read);
5303     DECODE : S0;        // any decoder
5304     ALU    : S3;        // any alu
5305 %}
5306 
5307 // Long ALU reg-reg operation
5308 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5309 %{
5310     instruction_count(2);
5311     dst    : S4(write);
5312     src    : S3(read);
5313     DECODE : S0(2);     // any 2 decoders
5314     ALU    : S3(2);     // both alus
5315 %}
5316 
5317 // Integer ALU reg-reg operation
5318 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5319 %{
5320     single_instruction;
5321     dst    : S4(write);
5322     src    : S3(read);
5323     D0     : S0;        // big decoder only
5324     ALU    : S3;        // any alu
5325 %}
5326 
5327 // Long ALU reg-reg operation
5328 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5329 %{
5330     instruction_count(2);
5331     dst    : S4(write);
5332     src    : S3(read);
5333     D0     : S0(2);     // big decoder only; twice
5334     ALU    : S3(2);     // both alus
5335 %}
5336 
5337 // Integer ALU reg-mem operation
5338 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5339 %{
5340     single_instruction;
5341     dst    : S5(write);
5342     mem    : S3(read);
5343     D0     : S0;        // big decoder only
5344     ALU    : S4;        // any alu
5345     MEM    : S3;        // any mem
5346 %}
5347 
5348 // Integer mem operation (prefetch)
5349 pipe_class ialu_mem(memory mem)
5350 %{
5351     single_instruction;
5352     mem    : S3(read);
5353     D0     : S0;        // big decoder only
5354     MEM    : S3;        // any mem
5355 %}
5356 
5357 // Integer Store to Memory
5358 pipe_class ialu_mem_reg(memory mem, rRegI src)
5359 %{
5360     single_instruction;
5361     mem    : S3(read);
5362     src    : S5(read);
5363     D0     : S0;        // big decoder only
5364     ALU    : S4;        // any alu
5365     MEM    : S3;
5366 %}
5367 
5368 // // Long Store to Memory
5369 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5370 // %{
5371 //     instruction_count(2);
5372 //     mem    : S3(read);
5373 //     src    : S5(read);
5374 //     D0     : S0(2);          // big decoder only; twice
5375 //     ALU    : S4(2);     // any 2 alus
5376 //     MEM    : S3(2);  // Both mems
5377 // %}
5378 
5379 // Integer Store to Memory
5380 pipe_class ialu_mem_imm(memory mem)
5381 %{
5382     single_instruction;
5383     mem    : S3(read);
5384     D0     : S0;        // big decoder only
5385     ALU    : S4;        // any alu
5386     MEM    : S3;
5387 %}
5388 
5389 // Integer ALU0 reg-reg operation
5390 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5391 %{
5392     single_instruction;
5393     dst    : S4(write);
5394     src    : S3(read);
5395     D0     : S0;        // Big decoder only
5396     ALU0   : S3;        // only alu0
5397 %}
5398 
5399 // Integer ALU0 reg-mem operation
5400 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5401 %{
5402     single_instruction;
5403     dst    : S5(write);
5404     mem    : S3(read);
5405     D0     : S0;        // big decoder only
5406     ALU0   : S4;        // ALU0 only
5407     MEM    : S3;        // any mem
5408 %}
5409 
5410 // Integer ALU reg-reg operation
5411 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5412 %{
5413     single_instruction;
5414     cr     : S4(write);
5415     src1   : S3(read);
5416     src2   : S3(read);
5417     DECODE : S0;        // any decoder
5418     ALU    : S3;        // any alu
5419 %}
5420 
5421 // Integer ALU reg-imm operation
5422 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5423 %{
5424     single_instruction;
5425     cr     : S4(write);
5426     src1   : S3(read);
5427     DECODE : S0;        // any decoder
5428     ALU    : S3;        // any alu
5429 %}
5430 
5431 // Integer ALU reg-mem operation
5432 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5433 %{
5434     single_instruction;
5435     cr     : S4(write);
5436     src1   : S3(read);
5437     src2   : S3(read);
5438     D0     : S0;        // big decoder only
5439     ALU    : S4;        // any alu
5440     MEM    : S3;
5441 %}
5442 
5443 // Conditional move reg-reg
5444 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5445 %{
5446     instruction_count(4);
5447     y      : S4(read);
5448     q      : S3(read);
5449     p      : S3(read);
5450     DECODE : S0(4);     // any decoder
5451 %}
5452 
5453 // Conditional move reg-reg
5454 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5455 %{
5456     single_instruction;
5457     dst    : S4(write);
5458     src    : S3(read);
5459     cr     : S3(read);
5460     DECODE : S0;        // any decoder
5461 %}
5462 
5463 // Conditional move reg-mem
5464 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5465 %{
5466     single_instruction;
5467     dst    : S4(write);
5468     src    : S3(read);
5469     cr     : S3(read);
5470     DECODE : S0;        // any decoder
5471     MEM    : S3;
5472 %}
5473 
5474 // Conditional move reg-reg long
5475 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5476 %{
5477     single_instruction;
5478     dst    : S4(write);
5479     src    : S3(read);
5480     cr     : S3(read);
5481     DECODE : S0(2);     // any 2 decoders
5482 %}
5483 
5484 // XXX
5485 // // Conditional move double reg-reg
5486 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5487 // %{
5488 //     single_instruction;
5489 //     dst    : S4(write);
5490 //     src    : S3(read);
5491 //     cr     : S3(read);
5492 //     DECODE : S0;     // any decoder
5493 // %}
5494 
5495 // Float reg-reg operation
5496 pipe_class fpu_reg(regD dst)
5497 %{
5498     instruction_count(2);
5499     dst    : S3(read);
5500     DECODE : S0(2);     // any 2 decoders
5501     FPU    : S3;
5502 %}
5503 
5504 // Float reg-reg operation
5505 pipe_class fpu_reg_reg(regD dst, regD src)
5506 %{
5507     instruction_count(2);
5508     dst    : S4(write);
5509     src    : S3(read);
5510     DECODE : S0(2);     // any 2 decoders
5511     FPU    : S3;
5512 %}
5513 
5514 // Float reg-reg operation
5515 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5516 %{
5517     instruction_count(3);
5518     dst    : S4(write);
5519     src1   : S3(read);
5520     src2   : S3(read);
5521     DECODE : S0(3);     // any 3 decoders
5522     FPU    : S3(2);
5523 %}
5524 
5525 // Float reg-reg operation
5526 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5527 %{
5528     instruction_count(4);
5529     dst    : S4(write);
5530     src1   : S3(read);
5531     src2   : S3(read);
5532     src3   : S3(read);
5533     DECODE : S0(4);     // any 3 decoders
5534     FPU    : S3(2);
5535 %}
5536 
5537 // Float reg-reg operation
5538 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5539 %{
5540     instruction_count(4);
5541     dst    : S4(write);
5542     src1   : S3(read);
5543     src2   : S3(read);
5544     src3   : S3(read);
5545     DECODE : S1(3);     // any 3 decoders
5546     D0     : S0;        // Big decoder only
5547     FPU    : S3(2);
5548     MEM    : S3;
5549 %}
5550 
5551 // Float reg-mem operation
5552 pipe_class fpu_reg_mem(regD dst, memory mem)
5553 %{
5554     instruction_count(2);
5555     dst    : S5(write);
5556     mem    : S3(read);
5557     D0     : S0;        // big decoder only
5558     DECODE : S1;        // any decoder for FPU POP
5559     FPU    : S4;
5560     MEM    : S3;        // any mem
5561 %}
5562 
5563 // Float reg-mem operation
5564 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5565 %{
5566     instruction_count(3);
5567     dst    : S5(write);
5568     src1   : S3(read);
5569     mem    : S3(read);
5570     D0     : S0;        // big decoder only
5571     DECODE : S1(2);     // any decoder for FPU POP
5572     FPU    : S4;
5573     MEM    : S3;        // any mem
5574 %}
5575 
5576 // Float mem-reg operation
5577 pipe_class fpu_mem_reg(memory mem, regD src)
5578 %{
5579     instruction_count(2);
5580     src    : S5(read);
5581     mem    : S3(read);
5582     DECODE : S0;        // any decoder for FPU PUSH
5583     D0     : S1;        // big decoder only
5584     FPU    : S4;
5585     MEM    : S3;        // any mem
5586 %}
5587 
5588 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5589 %{
5590     instruction_count(3);
5591     src1   : S3(read);
5592     src2   : S3(read);
5593     mem    : S3(read);
5594     DECODE : S0(2);     // any decoder for FPU PUSH
5595     D0     : S1;        // big decoder only
5596     FPU    : S4;
5597     MEM    : S3;        // any mem
5598 %}
5599 
5600 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5601 %{
5602     instruction_count(3);
5603     src1   : S3(read);
5604     src2   : S3(read);
5605     mem    : S4(read);
5606     DECODE : S0;        // any decoder for FPU PUSH
5607     D0     : S0(2);     // big decoder only
5608     FPU    : S4;
5609     MEM    : S3(2);     // any mem
5610 %}
5611 
5612 pipe_class fpu_mem_mem(memory dst, memory src1)
5613 %{
5614     instruction_count(2);
5615     src1   : S3(read);
5616     dst    : S4(read);
5617     D0     : S0(2);     // big decoder only
5618     MEM    : S3(2);     // any mem
5619 %}
5620 
5621 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5622 %{
5623     instruction_count(3);
5624     src1   : S3(read);
5625     src2   : S3(read);
5626     dst    : S4(read);
5627     D0     : S0(3);     // big decoder only
5628     FPU    : S4;
5629     MEM    : S3(3);     // any mem
5630 %}
5631 
5632 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5633 %{
5634     instruction_count(3);
5635     src1   : S4(read);
5636     mem    : S4(read);
5637     DECODE : S0;        // any decoder for FPU PUSH
5638     D0     : S0(2);     // big decoder only
5639     FPU    : S4;
5640     MEM    : S3(2);     // any mem
5641 %}
5642 
5643 // Float load constant
5644 pipe_class fpu_reg_con(regD dst)
5645 %{
5646     instruction_count(2);
5647     dst    : S5(write);
5648     D0     : S0;        // big decoder only for the load
5649     DECODE : S1;        // any decoder for FPU POP
5650     FPU    : S4;
5651     MEM    : S3;        // any mem
5652 %}
5653 
5654 // Float load constant
5655 pipe_class fpu_reg_reg_con(regD dst, regD src)
5656 %{
5657     instruction_count(3);
5658     dst    : S5(write);
5659     src    : S3(read);
5660     D0     : S0;        // big decoder only for the load
5661     DECODE : S1(2);     // any decoder for FPU POP
5662     FPU    : S4;
5663     MEM    : S3;        // any mem
5664 %}
5665 
5666 // UnConditional branch
5667 pipe_class pipe_jmp(label labl)
5668 %{
5669     single_instruction;
5670     BR   : S3;
5671 %}
5672 
5673 // Conditional branch
5674 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5675 %{
5676     single_instruction;
5677     cr    : S1(read);
5678     BR    : S3;
5679 %}
5680 
5681 // Allocation idiom
5682 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5683 %{
5684     instruction_count(1); force_serialization;
5685     fixed_latency(6);
5686     heap_ptr : S3(read);
5687     DECODE   : S0(3);
5688     D0       : S2;
5689     MEM      : S3;
5690     ALU      : S3(2);
5691     dst      : S5(write);
5692     BR       : S5;
5693 %}
5694 
5695 // Generic big/slow expanded idiom
5696 pipe_class pipe_slow()
5697 %{
5698     instruction_count(10); multiple_bundles; force_serialization;
5699     fixed_latency(100);
5700     D0  : S0(2);
5701     MEM : S3(2);
5702 %}
5703 
5704 // The real do-nothing guy
5705 pipe_class empty()
5706 %{
5707     instruction_count(0);
5708 %}
5709 
5710 // Define the class for the Nop node
5711 define
5712 %{
5713    MachNop = empty;
5714 %}
5715 
5716 %}
5717 
5718 //----------INSTRUCTIONS-------------------------------------------------------
5719 //
5720 // match      -- States which machine-independent subtree may be replaced
5721 //               by this instruction.
5722 // ins_cost   -- The estimated cost of this instruction is used by instruction
5723 //               selection to identify a minimum cost tree of machine
5724 //               instructions that matches a tree of machine-independent
5725 //               instructions.
5726 // format     -- A string providing the disassembly for this instruction.
5727 //               The value of an instruction's operand may be inserted
5728 //               by referring to it with a '$' prefix.
5729 // opcode     -- Three instruction opcodes may be provided.  These are referred
5730 //               to within an encode class as $primary, $secondary, and $tertiary
5731 //               rrspectively.  The primary opcode is commonly used to
5732 //               indicate the type of machine instruction, while secondary
5733 //               and tertiary are often used for prefix options or addressing
5734 //               modes.
5735 // ins_encode -- A list of encode classes with parameters. The encode class
5736 //               name must have been defined in an 'enc_class' specification
5737 //               in the encode section of the architecture description.
5738 
5739 
5740 //----------Load/Store/Move Instructions---------------------------------------
5741 //----------Load Instructions--------------------------------------------------
5742 
5743 // Load Byte (8 bit signed)
5744 instruct loadB(rRegI dst, memory mem)
5745 %{
5746   match(Set dst (LoadB mem));
5747 
5748   ins_cost(125);
5749   format %{ "movsbl  $dst, $mem\t# byte" %}
5750 
5751   ins_encode %{
5752     __ movsbl($dst$$Register, $mem$$Address);
5753   %}
5754 
5755   ins_pipe(ialu_reg_mem);
5756 %}
5757 
5758 // Load Byte (8 bit signed) into Long Register
5759 instruct loadB2L(rRegL dst, memory mem)
5760 %{
5761   match(Set dst (ConvI2L (LoadB mem)));
5762 
5763   ins_cost(125);
5764   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5765 
5766   ins_encode %{
5767     __ movsbq($dst$$Register, $mem$$Address);
5768   %}
5769 
5770   ins_pipe(ialu_reg_mem);
5771 %}
5772 
5773 // Load Unsigned Byte (8 bit UNsigned)
5774 instruct loadUB(rRegI dst, memory mem)
5775 %{
5776   match(Set dst (LoadUB mem));
5777 
5778   ins_cost(125);
5779   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5780 
5781   ins_encode %{
5782     __ movzbl($dst$$Register, $mem$$Address);
5783   %}
5784 
5785   ins_pipe(ialu_reg_mem);
5786 %}
5787 
5788 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5789 instruct loadUB2L(rRegL dst, memory mem)
5790 %{
5791   match(Set dst (ConvI2L (LoadUB mem)));
5792 
5793   ins_cost(125);
5794   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5795 
5796   ins_encode %{
5797     __ movzbq($dst$$Register, $mem$$Address);
5798   %}
5799 
5800   ins_pipe(ialu_reg_mem);
5801 %}
5802 
5803 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5804 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5805   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5806   effect(KILL cr);
5807 
5808   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5809             "andl    $dst, $mask" %}
5810   ins_encode %{
5811     Register Rdst = $dst$$Register;
5812     __ movzbq(Rdst, $mem$$Address);
5813     __ andl(Rdst, $mask$$constant);
5814   %}
5815   ins_pipe(ialu_reg_mem);
5816 %}
5817 
5818 // Load Short (16 bit signed)
5819 instruct loadS(rRegI dst, memory mem)
5820 %{
5821   match(Set dst (LoadS mem));
5822 
5823   ins_cost(125);
5824   format %{ "movswl $dst, $mem\t# short" %}
5825 
5826   ins_encode %{
5827     __ movswl($dst$$Register, $mem$$Address);
5828   %}
5829 
5830   ins_pipe(ialu_reg_mem);
5831 %}
5832 
5833 // Load Short (16 bit signed) to Byte (8 bit signed)
5834 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5835   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5836 
5837   ins_cost(125);
5838   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5839   ins_encode %{
5840     __ movsbl($dst$$Register, $mem$$Address);
5841   %}
5842   ins_pipe(ialu_reg_mem);
5843 %}
5844 
5845 // Load Short (16 bit signed) into Long Register
5846 instruct loadS2L(rRegL dst, memory mem)
5847 %{
5848   match(Set dst (ConvI2L (LoadS mem)));
5849 
5850   ins_cost(125);
5851   format %{ "movswq $dst, $mem\t# short -> long" %}
5852 
5853   ins_encode %{
5854     __ movswq($dst$$Register, $mem$$Address);
5855   %}
5856 
5857   ins_pipe(ialu_reg_mem);
5858 %}
5859 
5860 // Load Unsigned Short/Char (16 bit UNsigned)
5861 instruct loadUS(rRegI dst, memory mem)
5862 %{
5863   match(Set dst (LoadUS mem));
5864 
5865   ins_cost(125);
5866   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5867 
5868   ins_encode %{
5869     __ movzwl($dst$$Register, $mem$$Address);
5870   %}
5871 
5872   ins_pipe(ialu_reg_mem);
5873 %}
5874 
5875 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5876 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5877   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5878 
5879   ins_cost(125);
5880   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5881   ins_encode %{
5882     __ movsbl($dst$$Register, $mem$$Address);
5883   %}
5884   ins_pipe(ialu_reg_mem);
5885 %}
5886 
5887 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5888 instruct loadUS2L(rRegL dst, memory mem)
5889 %{
5890   match(Set dst (ConvI2L (LoadUS mem)));
5891 
5892   ins_cost(125);
5893   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5894 
5895   ins_encode %{
5896     __ movzwq($dst$$Register, $mem$$Address);
5897   %}
5898 
5899   ins_pipe(ialu_reg_mem);
5900 %}
5901 
5902 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5903 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5904   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5905 
5906   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5907   ins_encode %{
5908     __ movzbq($dst$$Register, $mem$$Address);
5909   %}
5910   ins_pipe(ialu_reg_mem);
5911 %}
5912 
5913 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5914 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5915   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5916   effect(KILL cr);
5917 
5918   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5919             "andl    $dst, $mask" %}
5920   ins_encode %{
5921     Register Rdst = $dst$$Register;
5922     __ movzwq(Rdst, $mem$$Address);
5923     __ andl(Rdst, $mask$$constant);
5924   %}
5925   ins_pipe(ialu_reg_mem);
5926 %}
5927 
5928 // Load Integer
5929 instruct loadI(rRegI dst, memory mem)
5930 %{
5931   match(Set dst (LoadI mem));
5932 
5933   ins_cost(125);
5934   format %{ "movl    $dst, $mem\t# int" %}
5935 
5936   ins_encode %{
5937     __ movl($dst$$Register, $mem$$Address);
5938   %}
5939 
5940   ins_pipe(ialu_reg_mem);
5941 %}
5942 
5943 // Load Integer (32 bit signed) to Byte (8 bit signed)
5944 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5945   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5946 
5947   ins_cost(125);
5948   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5949   ins_encode %{
5950     __ movsbl($dst$$Register, $mem$$Address);
5951   %}
5952   ins_pipe(ialu_reg_mem);
5953 %}
5954 
5955 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5956 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5957   match(Set dst (AndI (LoadI mem) mask));
5958 
5959   ins_cost(125);
5960   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5961   ins_encode %{
5962     __ movzbl($dst$$Register, $mem$$Address);
5963   %}
5964   ins_pipe(ialu_reg_mem);
5965 %}
5966 
5967 // Load Integer (32 bit signed) to Short (16 bit signed)
5968 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5969   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5970 
5971   ins_cost(125);
5972   format %{ "movswl  $dst, $mem\t# int -> short" %}
5973   ins_encode %{
5974     __ movswl($dst$$Register, $mem$$Address);
5975   %}
5976   ins_pipe(ialu_reg_mem);
5977 %}
5978 
5979 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5980 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5981   match(Set dst (AndI (LoadI mem) mask));
5982 
5983   ins_cost(125);
5984   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5985   ins_encode %{
5986     __ movzwl($dst$$Register, $mem$$Address);
5987   %}
5988   ins_pipe(ialu_reg_mem);
5989 %}
5990 
5991 // Load Integer into Long Register
5992 instruct loadI2L(rRegL dst, memory mem)
5993 %{
5994   match(Set dst (ConvI2L (LoadI mem)));
5995 
5996   ins_cost(125);
5997   format %{ "movslq  $dst, $mem\t# int -> long" %}
5998 
5999   ins_encode %{
6000     __ movslq($dst$$Register, $mem$$Address);
6001   %}
6002 
6003   ins_pipe(ialu_reg_mem);
6004 %}
6005 
6006 // Load Integer with mask 0xFF into Long Register
6007 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6008   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6009 
6010   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6011   ins_encode %{
6012     __ movzbq($dst$$Register, $mem$$Address);
6013   %}
6014   ins_pipe(ialu_reg_mem);
6015 %}
6016 
6017 // Load Integer with mask 0xFFFF into Long Register
6018 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6019   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6020 
6021   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6022   ins_encode %{
6023     __ movzwq($dst$$Register, $mem$$Address);
6024   %}
6025   ins_pipe(ialu_reg_mem);
6026 %}
6027 
6028 // Load Integer with a 32-bit mask into Long Register
6029 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6030   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6031   effect(KILL cr);
6032 
6033   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6034             "andl    $dst, $mask" %}
6035   ins_encode %{
6036     Register Rdst = $dst$$Register;
6037     __ movl(Rdst, $mem$$Address);
6038     __ andl(Rdst, $mask$$constant);
6039   %}
6040   ins_pipe(ialu_reg_mem);
6041 %}
6042 
6043 // Load Unsigned Integer into Long Register
6044 instruct loadUI2L(rRegL dst, memory mem)
6045 %{
6046   match(Set dst (LoadUI2L mem));
6047 
6048   ins_cost(125);
6049   format %{ "movl    $dst, $mem\t# uint -> long" %}
6050 
6051   ins_encode %{
6052     __ movl($dst$$Register, $mem$$Address);
6053   %}
6054 
6055   ins_pipe(ialu_reg_mem);
6056 %}
6057 
6058 // Load Long
6059 instruct loadL(rRegL dst, memory mem)
6060 %{
6061   match(Set dst (LoadL mem));
6062 
6063   ins_cost(125);
6064   format %{ "movq    $dst, $mem\t# long" %}
6065 
6066   ins_encode %{
6067     __ movq($dst$$Register, $mem$$Address);
6068   %}
6069 
6070   ins_pipe(ialu_reg_mem); // XXX
6071 %}
6072 
6073 // Load Range
6074 instruct loadRange(rRegI dst, memory mem)
6075 %{
6076   match(Set dst (LoadRange mem));
6077 
6078   ins_cost(125); // XXX
6079   format %{ "movl    $dst, $mem\t# range" %}
6080   opcode(0x8B);
6081   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6082   ins_pipe(ialu_reg_mem);
6083 %}
6084 
6085 // Load Pointer
6086 instruct loadP(rRegP dst, memory mem)
6087 %{
6088   match(Set dst (LoadP mem));
6089 
6090   ins_cost(125); // XXX
6091   format %{ "movq    $dst, $mem\t# ptr" %}
6092   opcode(0x8B);
6093   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6094   ins_pipe(ialu_reg_mem); // XXX
6095 %}
6096 
6097 // Load Compressed Pointer
6098 instruct loadN(rRegN dst, memory mem)
6099 %{
6100    match(Set dst (LoadN mem));
6101 
6102    ins_cost(125); // XXX
6103    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6104    ins_encode %{
6105      __ movl($dst$$Register, $mem$$Address);
6106    %}
6107    ins_pipe(ialu_reg_mem); // XXX
6108 %}
6109 
6110 
6111 // Load Klass Pointer
6112 instruct loadKlass(rRegP dst, memory mem)
6113 %{
6114   match(Set dst (LoadKlass mem));
6115 
6116   ins_cost(125); // XXX
6117   format %{ "movq    $dst, $mem\t# class" %}
6118   opcode(0x8B);
6119   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6120   ins_pipe(ialu_reg_mem); // XXX
6121 %}
6122 
6123 // Load narrow Klass Pointer
6124 instruct loadNKlass(rRegN dst, memory mem)
6125 %{
6126   match(Set dst (LoadNKlass mem));
6127 
6128   ins_cost(125); // XXX
6129   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6130   ins_encode %{
6131     __ movl($dst$$Register, $mem$$Address);
6132   %}
6133   ins_pipe(ialu_reg_mem); // XXX
6134 %}
6135 
6136 // Load Float
6137 instruct loadF(regF dst, memory mem)
6138 %{
6139   match(Set dst (LoadF mem));
6140 
6141   ins_cost(145); // XXX
6142   format %{ "movss   $dst, $mem\t# float" %}
6143   opcode(0xF3, 0x0F, 0x10);
6144   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6145   ins_pipe(pipe_slow); // XXX
6146 %}
6147 
6148 // Load Double
6149 instruct loadD_partial(regD dst, memory mem)
6150 %{
6151   predicate(!UseXmmLoadAndClearUpper);
6152   match(Set dst (LoadD mem));
6153 
6154   ins_cost(145); // XXX
6155   format %{ "movlpd  $dst, $mem\t# double" %}
6156   opcode(0x66, 0x0F, 0x12);
6157   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6158   ins_pipe(pipe_slow); // XXX
6159 %}
6160 
6161 instruct loadD(regD dst, memory mem)
6162 %{
6163   predicate(UseXmmLoadAndClearUpper);
6164   match(Set dst (LoadD mem));
6165 
6166   ins_cost(145); // XXX
6167   format %{ "movsd   $dst, $mem\t# double" %}
6168   opcode(0xF2, 0x0F, 0x10);
6169   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6170   ins_pipe(pipe_slow); // XXX
6171 %}
6172 
6173 // Load Aligned Packed Byte to XMM register
6174 instruct loadA8B(regD dst, memory mem) %{
6175   match(Set dst (Load8B mem));
6176   ins_cost(125);
6177   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6178   ins_encode( movq_ld(dst, mem));
6179   ins_pipe( pipe_slow );
6180 %}
6181 
6182 // Load Aligned Packed Short to XMM register
6183 instruct loadA4S(regD dst, memory mem) %{
6184   match(Set dst (Load4S mem));
6185   ins_cost(125);
6186   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6187   ins_encode( movq_ld(dst, mem));
6188   ins_pipe( pipe_slow );
6189 %}
6190 
6191 // Load Aligned Packed Char to XMM register
6192 instruct loadA4C(regD dst, memory mem) %{
6193   match(Set dst (Load4C mem));
6194   ins_cost(125);
6195   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6196   ins_encode( movq_ld(dst, mem));
6197   ins_pipe( pipe_slow );
6198 %}
6199 
6200 // Load Aligned Packed Integer to XMM register
6201 instruct load2IU(regD dst, memory mem) %{
6202   match(Set dst (Load2I mem));
6203   ins_cost(125);
6204   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6205   ins_encode( movq_ld(dst, mem));
6206   ins_pipe( pipe_slow );
6207 %}
6208 
6209 // Load Aligned Packed Single to XMM
6210 instruct loadA2F(regD dst, memory mem) %{
6211   match(Set dst (Load2F mem));
6212   ins_cost(145);
6213   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6214   ins_encode( movq_ld(dst, mem));
6215   ins_pipe( pipe_slow );
6216 %}
6217 
6218 // Load Effective Address
6219 instruct leaP8(rRegP dst, indOffset8 mem)
6220 %{
6221   match(Set dst mem);
6222 
6223   ins_cost(110); // XXX
6224   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6225   opcode(0x8D);
6226   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6227   ins_pipe(ialu_reg_reg_fat);
6228 %}
6229 
6230 instruct leaP32(rRegP dst, indOffset32 mem)
6231 %{
6232   match(Set dst mem);
6233 
6234   ins_cost(110);
6235   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6236   opcode(0x8D);
6237   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6238   ins_pipe(ialu_reg_reg_fat);
6239 %}
6240 
6241 // instruct leaPIdx(rRegP dst, indIndex mem)
6242 // %{
6243 //   match(Set dst mem);
6244 
6245 //   ins_cost(110);
6246 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6247 //   opcode(0x8D);
6248 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6249 //   ins_pipe(ialu_reg_reg_fat);
6250 // %}
6251 
6252 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6253 %{
6254   match(Set dst mem);
6255 
6256   ins_cost(110);
6257   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6258   opcode(0x8D);
6259   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6260   ins_pipe(ialu_reg_reg_fat);
6261 %}
6262 
6263 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6264 %{
6265   match(Set dst mem);
6266 
6267   ins_cost(110);
6268   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6269   opcode(0x8D);
6270   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6271   ins_pipe(ialu_reg_reg_fat);
6272 %}
6273 
6274 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6275 %{
6276   match(Set dst mem);
6277 
6278   ins_cost(110);
6279   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6280   opcode(0x8D);
6281   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6282   ins_pipe(ialu_reg_reg_fat);
6283 %}
6284 
6285 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6286 %{
6287   match(Set dst mem);
6288 
6289   ins_cost(110);
6290   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6291   opcode(0x8D);
6292   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6293   ins_pipe(ialu_reg_reg_fat);
6294 %}
6295 
6296 // Load Effective Address which uses Narrow (32-bits) oop
6297 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6298 %{
6299   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6300   match(Set dst mem);
6301 
6302   ins_cost(110);
6303   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6304   opcode(0x8D);
6305   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6306   ins_pipe(ialu_reg_reg_fat);
6307 %}
6308 
6309 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6310 %{
6311   predicate(Universe::narrow_oop_shift() == 0);
6312   match(Set dst mem);
6313 
6314   ins_cost(110); // XXX
6315   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6316   opcode(0x8D);
6317   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6318   ins_pipe(ialu_reg_reg_fat);
6319 %}
6320 
6321 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6322 %{
6323   predicate(Universe::narrow_oop_shift() == 0);
6324   match(Set dst mem);
6325 
6326   ins_cost(110);
6327   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6328   opcode(0x8D);
6329   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6330   ins_pipe(ialu_reg_reg_fat);
6331 %}
6332 
6333 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6334 %{
6335   predicate(Universe::narrow_oop_shift() == 0);
6336   match(Set dst mem);
6337 
6338   ins_cost(110);
6339   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6340   opcode(0x8D);
6341   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6342   ins_pipe(ialu_reg_reg_fat);
6343 %}
6344 
6345 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6346 %{
6347   predicate(Universe::narrow_oop_shift() == 0);
6348   match(Set dst mem);
6349 
6350   ins_cost(110);
6351   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6352   opcode(0x8D);
6353   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6354   ins_pipe(ialu_reg_reg_fat);
6355 %}
6356 
6357 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6358 %{
6359   predicate(Universe::narrow_oop_shift() == 0);
6360   match(Set dst mem);
6361 
6362   ins_cost(110);
6363   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6364   opcode(0x8D);
6365   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6366   ins_pipe(ialu_reg_reg_fat);
6367 %}
6368 
6369 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6370 %{
6371   predicate(Universe::narrow_oop_shift() == 0);
6372   match(Set dst mem);
6373 
6374   ins_cost(110);
6375   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6376   opcode(0x8D);
6377   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6378   ins_pipe(ialu_reg_reg_fat);
6379 %}
6380 
6381 instruct loadConI(rRegI dst, immI src)
6382 %{
6383   match(Set dst src);
6384 
6385   format %{ "movl    $dst, $src\t# int" %}
6386   ins_encode(load_immI(dst, src));
6387   ins_pipe(ialu_reg_fat); // XXX
6388 %}
6389 
6390 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6391 %{
6392   match(Set dst src);
6393   effect(KILL cr);
6394 
6395   ins_cost(50);
6396   format %{ "xorl    $dst, $dst\t# int" %}
6397   opcode(0x33); /* + rd */
6398   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6399   ins_pipe(ialu_reg);
6400 %}
6401 
6402 instruct loadConL(rRegL dst, immL src)
6403 %{
6404   match(Set dst src);
6405 
6406   ins_cost(150);
6407   format %{ "movq    $dst, $src\t# long" %}
6408   ins_encode(load_immL(dst, src));
6409   ins_pipe(ialu_reg);
6410 %}
6411 
6412 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6413 %{
6414   match(Set dst src);
6415   effect(KILL cr);
6416 
6417   ins_cost(50);
6418   format %{ "xorl    $dst, $dst\t# long" %}
6419   opcode(0x33); /* + rd */
6420   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6421   ins_pipe(ialu_reg); // XXX
6422 %}
6423 
6424 instruct loadConUL32(rRegL dst, immUL32 src)
6425 %{
6426   match(Set dst src);
6427 
6428   ins_cost(60);
6429   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6430   ins_encode(load_immUL32(dst, src));
6431   ins_pipe(ialu_reg);
6432 %}
6433 
6434 instruct loadConL32(rRegL dst, immL32 src)
6435 %{
6436   match(Set dst src);
6437 
6438   ins_cost(70);
6439   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6440   ins_encode(load_immL32(dst, src));
6441   ins_pipe(ialu_reg);
6442 %}
6443 
6444 instruct loadConP(rRegP dst, immP con) %{
6445   match(Set dst con);
6446 
6447   format %{ "movq    $dst, $con\t# ptr" %}
6448   ins_encode(load_immP(dst, con));
6449   ins_pipe(ialu_reg_fat); // XXX
6450 %}
6451 
6452 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6453 %{
6454   match(Set dst src);
6455   effect(KILL cr);
6456 
6457   ins_cost(50);
6458   format %{ "xorl    $dst, $dst\t# ptr" %}
6459   opcode(0x33); /* + rd */
6460   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6461   ins_pipe(ialu_reg);
6462 %}
6463 
6464 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6465   match(Set dst src);
6466   format %{ "movq    $dst, $src\t!ptr" %}
6467   ins_encode %{
6468     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6469     __ lea($dst$$Register, polling_page);
6470   %}
6471   ins_pipe(ialu_reg_fat);
6472 %}
6473 
6474 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6475 %{
6476   match(Set dst src);
6477   effect(KILL cr);
6478 
6479   ins_cost(60);
6480   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6481   ins_encode(load_immP31(dst, src));
6482   ins_pipe(ialu_reg);
6483 %}
6484 
6485 instruct loadConF(regF dst, immF con) %{
6486   match(Set dst con);
6487   ins_cost(125);
6488   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6489   ins_encode %{
6490     __ movflt($dst$$XMMRegister, $constantaddress($con));
6491   %}
6492   ins_pipe(pipe_slow);
6493 %}
6494 
6495 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6496   match(Set dst src);
6497   effect(KILL cr);
6498   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6499   ins_encode %{
6500     __ xorq($dst$$Register, $dst$$Register);
6501   %}
6502   ins_pipe(ialu_reg);
6503 %}
6504 
6505 instruct loadConN(rRegN dst, immN src) %{
6506   match(Set dst src);
6507 
6508   ins_cost(125);
6509   format %{ "movl    $dst, $src\t# compressed ptr" %}
6510   ins_encode %{
6511     address con = (address)$src$$constant;
6512     if (con == NULL) {
6513       ShouldNotReachHere();
6514     } else {
6515       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6516     }
6517   %}
6518   ins_pipe(ialu_reg_fat); // XXX
6519 %}
6520 
6521 instruct loadConF0(regF dst, immF0 src)
6522 %{
6523   match(Set dst src);
6524   ins_cost(100);
6525 
6526   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6527   opcode(0x0F, 0x57);
6528   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6529   ins_pipe(pipe_slow);
6530 %}
6531 
6532 // Use the same format since predicate() can not be used here.
6533 instruct loadConD(regD dst, immD con) %{
6534   match(Set dst con);
6535   ins_cost(125);
6536   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6537   ins_encode %{
6538     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6539   %}
6540   ins_pipe(pipe_slow);
6541 %}
6542 
6543 instruct loadConD0(regD dst, immD0 src)
6544 %{
6545   match(Set dst src);
6546   ins_cost(100);
6547 
6548   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6549   opcode(0x66, 0x0F, 0x57);
6550   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6551   ins_pipe(pipe_slow);
6552 %}
6553 
6554 instruct loadSSI(rRegI dst, stackSlotI src)
6555 %{
6556   match(Set dst src);
6557 
6558   ins_cost(125);
6559   format %{ "movl    $dst, $src\t# int stk" %}
6560   opcode(0x8B);
6561   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6562   ins_pipe(ialu_reg_mem);
6563 %}
6564 
6565 instruct loadSSL(rRegL dst, stackSlotL src)
6566 %{
6567   match(Set dst src);
6568 
6569   ins_cost(125);
6570   format %{ "movq    $dst, $src\t# long stk" %}
6571   opcode(0x8B);
6572   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6573   ins_pipe(ialu_reg_mem);
6574 %}
6575 
6576 instruct loadSSP(rRegP dst, stackSlotP src)
6577 %{
6578   match(Set dst src);
6579 
6580   ins_cost(125);
6581   format %{ "movq    $dst, $src\t# ptr stk" %}
6582   opcode(0x8B);
6583   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6584   ins_pipe(ialu_reg_mem);
6585 %}
6586 
6587 instruct loadSSF(regF dst, stackSlotF src)
6588 %{
6589   match(Set dst src);
6590 
6591   ins_cost(125);
6592   format %{ "movss   $dst, $src\t# float stk" %}
6593   opcode(0xF3, 0x0F, 0x10);
6594   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6595   ins_pipe(pipe_slow); // XXX
6596 %}
6597 
6598 // Use the same format since predicate() can not be used here.
6599 instruct loadSSD(regD dst, stackSlotD src)
6600 %{
6601   match(Set dst src);
6602 
6603   ins_cost(125);
6604   format %{ "movsd   $dst, $src\t# double stk" %}
6605   ins_encode  %{
6606     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6607   %}
6608   ins_pipe(pipe_slow); // XXX
6609 %}
6610 
6611 // Prefetch instructions.
6612 // Must be safe to execute with invalid address (cannot fault).
6613 
6614 instruct prefetchr( memory mem ) %{
6615   predicate(ReadPrefetchInstr==3);
6616   match(PrefetchRead mem);
6617   ins_cost(125);
6618 
6619   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6620   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6621   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6622   ins_pipe(ialu_mem);
6623 %}
6624 
6625 instruct prefetchrNTA( memory mem ) %{
6626   predicate(ReadPrefetchInstr==0);
6627   match(PrefetchRead mem);
6628   ins_cost(125);
6629 
6630   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6631   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6632   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6633   ins_pipe(ialu_mem);
6634 %}
6635 
6636 instruct prefetchrT0( memory mem ) %{
6637   predicate(ReadPrefetchInstr==1);
6638   match(PrefetchRead mem);
6639   ins_cost(125);
6640 
6641   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6642   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6643   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6644   ins_pipe(ialu_mem);
6645 %}
6646 
6647 instruct prefetchrT2( memory mem ) %{
6648   predicate(ReadPrefetchInstr==2);
6649   match(PrefetchRead mem);
6650   ins_cost(125);
6651 
6652   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6653   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6654   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6655   ins_pipe(ialu_mem);
6656 %}
6657 
6658 instruct prefetchw( memory mem ) %{
6659   predicate(AllocatePrefetchInstr==3);
6660   match(PrefetchWrite mem);
6661   ins_cost(125);
6662 
6663   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6664   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6665   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6666   ins_pipe(ialu_mem);
6667 %}
6668 
6669 instruct prefetchwNTA( memory mem ) %{
6670   predicate(AllocatePrefetchInstr==0);
6671   match(PrefetchWrite mem);
6672   ins_cost(125);
6673 
6674   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6675   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6676   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6677   ins_pipe(ialu_mem);
6678 %}
6679 
6680 instruct prefetchwT0( memory mem ) %{
6681   predicate(AllocatePrefetchInstr==1);
6682   match(PrefetchWrite mem);
6683   ins_cost(125);
6684 
6685   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6686   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6687   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6688   ins_pipe(ialu_mem);
6689 %}
6690 
6691 instruct prefetchwT2( memory mem ) %{
6692   predicate(AllocatePrefetchInstr==2);
6693   match(PrefetchWrite mem);
6694   ins_cost(125);
6695 
6696   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6697   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6698   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6699   ins_pipe(ialu_mem);
6700 %}
6701 
6702 //----------Store Instructions-------------------------------------------------
6703 
6704 // Store Byte
6705 instruct storeB(memory mem, rRegI src)
6706 %{
6707   match(Set mem (StoreB mem src));
6708 
6709   ins_cost(125); // XXX
6710   format %{ "movb    $mem, $src\t# byte" %}
6711   opcode(0x88);
6712   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6713   ins_pipe(ialu_mem_reg);
6714 %}
6715 
6716 // Store Char/Short
6717 instruct storeC(memory mem, rRegI src)
6718 %{
6719   match(Set mem (StoreC mem src));
6720 
6721   ins_cost(125); // XXX
6722   format %{ "movw    $mem, $src\t# char/short" %}
6723   opcode(0x89);
6724   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6725   ins_pipe(ialu_mem_reg);
6726 %}
6727 
6728 // Store Integer
6729 instruct storeI(memory mem, rRegI src)
6730 %{
6731   match(Set mem (StoreI mem src));
6732 
6733   ins_cost(125); // XXX
6734   format %{ "movl    $mem, $src\t# int" %}
6735   opcode(0x89);
6736   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6737   ins_pipe(ialu_mem_reg);
6738 %}
6739 
6740 // Store Long
6741 instruct storeL(memory mem, rRegL src)
6742 %{
6743   match(Set mem (StoreL mem src));
6744 
6745   ins_cost(125); // XXX
6746   format %{ "movq    $mem, $src\t# long" %}
6747   opcode(0x89);
6748   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6749   ins_pipe(ialu_mem_reg); // XXX
6750 %}
6751 
6752 // Store Pointer
6753 instruct storeP(memory mem, any_RegP src)
6754 %{
6755   match(Set mem (StoreP mem src));
6756 
6757   ins_cost(125); // XXX
6758   format %{ "movq    $mem, $src\t# ptr" %}
6759   opcode(0x89);
6760   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6761   ins_pipe(ialu_mem_reg);
6762 %}
6763 
6764 instruct storeImmP0(memory mem, immP0 zero)
6765 %{
6766   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6767   match(Set mem (StoreP mem zero));
6768 
6769   ins_cost(125); // XXX
6770   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6771   ins_encode %{
6772     __ movq($mem$$Address, r12);
6773   %}
6774   ins_pipe(ialu_mem_reg);
6775 %}
6776 
6777 // Store NULL Pointer, mark word, or other simple pointer constant.
6778 instruct storeImmP(memory mem, immP31 src)
6779 %{
6780   match(Set mem (StoreP mem src));
6781 
6782   ins_cost(150); // XXX
6783   format %{ "movq    $mem, $src\t# ptr" %}
6784   opcode(0xC7); /* C7 /0 */
6785   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6786   ins_pipe(ialu_mem_imm);
6787 %}
6788 
6789 // Store Compressed Pointer
6790 instruct storeN(memory mem, rRegN src)
6791 %{
6792   match(Set mem (StoreN mem src));
6793 
6794   ins_cost(125); // XXX
6795   format %{ "movl    $mem, $src\t# compressed ptr" %}
6796   ins_encode %{
6797     __ movl($mem$$Address, $src$$Register);
6798   %}
6799   ins_pipe(ialu_mem_reg);
6800 %}
6801 
6802 instruct storeImmN0(memory mem, immN0 zero)
6803 %{
6804   predicate(Universe::narrow_oop_base() == NULL);
6805   match(Set mem (StoreN mem zero));
6806 
6807   ins_cost(125); // XXX
6808   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6809   ins_encode %{
6810     __ movl($mem$$Address, r12);
6811   %}
6812   ins_pipe(ialu_mem_reg);
6813 %}
6814 
6815 instruct storeImmN(memory mem, immN src)
6816 %{
6817   match(Set mem (StoreN mem src));
6818 
6819   ins_cost(150); // XXX
6820   format %{ "movl    $mem, $src\t# compressed ptr" %}
6821   ins_encode %{
6822     address con = (address)$src$$constant;
6823     if (con == NULL) {
6824       __ movl($mem$$Address, (int32_t)0);
6825     } else {
6826       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6827     }
6828   %}
6829   ins_pipe(ialu_mem_imm);
6830 %}
6831 
6832 // Store Integer Immediate
6833 instruct storeImmI0(memory mem, immI0 zero)
6834 %{
6835   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6836   match(Set mem (StoreI mem zero));
6837 
6838   ins_cost(125); // XXX
6839   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6840   ins_encode %{
6841     __ movl($mem$$Address, r12);
6842   %}
6843   ins_pipe(ialu_mem_reg);
6844 %}
6845 
6846 instruct storeImmI(memory mem, immI src)
6847 %{
6848   match(Set mem (StoreI mem src));
6849 
6850   ins_cost(150);
6851   format %{ "movl    $mem, $src\t# int" %}
6852   opcode(0xC7); /* C7 /0 */
6853   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6854   ins_pipe(ialu_mem_imm);
6855 %}
6856 
6857 // Store Long Immediate
6858 instruct storeImmL0(memory mem, immL0 zero)
6859 %{
6860   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6861   match(Set mem (StoreL mem zero));
6862 
6863   ins_cost(125); // XXX
6864   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6865   ins_encode %{
6866     __ movq($mem$$Address, r12);
6867   %}
6868   ins_pipe(ialu_mem_reg);
6869 %}
6870 
6871 instruct storeImmL(memory mem, immL32 src)
6872 %{
6873   match(Set mem (StoreL mem src));
6874 
6875   ins_cost(150);
6876   format %{ "movq    $mem, $src\t# long" %}
6877   opcode(0xC7); /* C7 /0 */
6878   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6879   ins_pipe(ialu_mem_imm);
6880 %}
6881 
6882 // Store Short/Char Immediate
6883 instruct storeImmC0(memory mem, immI0 zero)
6884 %{
6885   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6886   match(Set mem (StoreC mem zero));
6887 
6888   ins_cost(125); // XXX
6889   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6890   ins_encode %{
6891     __ movw($mem$$Address, r12);
6892   %}
6893   ins_pipe(ialu_mem_reg);
6894 %}
6895 
6896 instruct storeImmI16(memory mem, immI16 src)
6897 %{
6898   predicate(UseStoreImmI16);
6899   match(Set mem (StoreC mem src));
6900 
6901   ins_cost(150);
6902   format %{ "movw    $mem, $src\t# short/char" %}
6903   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6904   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6905   ins_pipe(ialu_mem_imm);
6906 %}
6907 
6908 // Store Byte Immediate
6909 instruct storeImmB0(memory mem, immI0 zero)
6910 %{
6911   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6912   match(Set mem (StoreB mem zero));
6913 
6914   ins_cost(125); // XXX
6915   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6916   ins_encode %{
6917     __ movb($mem$$Address, r12);
6918   %}
6919   ins_pipe(ialu_mem_reg);
6920 %}
6921 
6922 instruct storeImmB(memory mem, immI8 src)
6923 %{
6924   match(Set mem (StoreB mem src));
6925 
6926   ins_cost(150); // XXX
6927   format %{ "movb    $mem, $src\t# byte" %}
6928   opcode(0xC6); /* C6 /0 */
6929   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6930   ins_pipe(ialu_mem_imm);
6931 %}
6932 
6933 // Store Aligned Packed Byte XMM register to memory
6934 instruct storeA8B(memory mem, regD src) %{
6935   match(Set mem (Store8B mem src));
6936   ins_cost(145);
6937   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6938   ins_encode( movq_st(mem, src));
6939   ins_pipe( pipe_slow );
6940 %}
6941 
6942 // Store Aligned Packed Char/Short XMM register to memory
6943 instruct storeA4C(memory mem, regD src) %{
6944   match(Set mem (Store4C mem src));
6945   ins_cost(145);
6946   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6947   ins_encode( movq_st(mem, src));
6948   ins_pipe( pipe_slow );
6949 %}
6950 
6951 // Store Aligned Packed Integer XMM register to memory
6952 instruct storeA2I(memory mem, regD src) %{
6953   match(Set mem (Store2I mem src));
6954   ins_cost(145);
6955   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6956   ins_encode( movq_st(mem, src));
6957   ins_pipe( pipe_slow );
6958 %}
6959 
6960 // Store CMS card-mark Immediate
6961 instruct storeImmCM0_reg(memory mem, immI0 zero)
6962 %{
6963   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6964   match(Set mem (StoreCM mem zero));
6965 
6966   ins_cost(125); // XXX
6967   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6968   ins_encode %{
6969     __ movb($mem$$Address, r12);
6970   %}
6971   ins_pipe(ialu_mem_reg);
6972 %}
6973 
6974 instruct storeImmCM0(memory mem, immI0 src)
6975 %{
6976   match(Set mem (StoreCM mem src));
6977 
6978   ins_cost(150); // XXX
6979   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6980   opcode(0xC6); /* C6 /0 */
6981   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6982   ins_pipe(ialu_mem_imm);
6983 %}
6984 
6985 // Store Aligned Packed Single Float XMM register to memory
6986 instruct storeA2F(memory mem, regD src) %{
6987   match(Set mem (Store2F mem src));
6988   ins_cost(145);
6989   format %{ "MOVQ  $mem,$src\t! packed2F" %}
6990   ins_encode( movq_st(mem, src));
6991   ins_pipe( pipe_slow );
6992 %}
6993 
6994 // Store Float
6995 instruct storeF(memory mem, regF src)
6996 %{
6997   match(Set mem (StoreF mem src));
6998 
6999   ins_cost(95); // XXX
7000   format %{ "movss   $mem, $src\t# float" %}
7001   opcode(0xF3, 0x0F, 0x11);
7002   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7003   ins_pipe(pipe_slow); // XXX
7004 %}
7005 
7006 // Store immediate Float value (it is faster than store from XMM register)
7007 instruct storeF0(memory mem, immF0 zero)
7008 %{
7009   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7010   match(Set mem (StoreF mem zero));
7011 
7012   ins_cost(25); // XXX
7013   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7014   ins_encode %{
7015     __ movl($mem$$Address, r12);
7016   %}
7017   ins_pipe(ialu_mem_reg);
7018 %}
7019 
7020 instruct storeF_imm(memory mem, immF src)
7021 %{
7022   match(Set mem (StoreF mem src));
7023 
7024   ins_cost(50);
7025   format %{ "movl    $mem, $src\t# float" %}
7026   opcode(0xC7); /* C7 /0 */
7027   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7028   ins_pipe(ialu_mem_imm);
7029 %}
7030 
7031 // Store Double
7032 instruct storeD(memory mem, regD src)
7033 %{
7034   match(Set mem (StoreD mem src));
7035 
7036   ins_cost(95); // XXX
7037   format %{ "movsd   $mem, $src\t# double" %}
7038   opcode(0xF2, 0x0F, 0x11);
7039   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7040   ins_pipe(pipe_slow); // XXX
7041 %}
7042 
7043 // Store immediate double 0.0 (it is faster than store from XMM register)
7044 instruct storeD0_imm(memory mem, immD0 src)
7045 %{
7046   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7047   match(Set mem (StoreD mem src));
7048 
7049   ins_cost(50);
7050   format %{ "movq    $mem, $src\t# double 0." %}
7051   opcode(0xC7); /* C7 /0 */
7052   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7053   ins_pipe(ialu_mem_imm);
7054 %}
7055 
7056 instruct storeD0(memory mem, immD0 zero)
7057 %{
7058   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7059   match(Set mem (StoreD mem zero));
7060 
7061   ins_cost(25); // XXX
7062   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7063   ins_encode %{
7064     __ movq($mem$$Address, r12);
7065   %}
7066   ins_pipe(ialu_mem_reg);
7067 %}
7068 
7069 instruct storeSSI(stackSlotI dst, rRegI src)
7070 %{
7071   match(Set dst src);
7072 
7073   ins_cost(100);
7074   format %{ "movl    $dst, $src\t# int stk" %}
7075   opcode(0x89);
7076   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7077   ins_pipe( ialu_mem_reg );
7078 %}
7079 
7080 instruct storeSSL(stackSlotL dst, rRegL src)
7081 %{
7082   match(Set dst src);
7083 
7084   ins_cost(100);
7085   format %{ "movq    $dst, $src\t# long stk" %}
7086   opcode(0x89);
7087   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7088   ins_pipe(ialu_mem_reg);
7089 %}
7090 
7091 instruct storeSSP(stackSlotP dst, rRegP src)
7092 %{
7093   match(Set dst src);
7094 
7095   ins_cost(100);
7096   format %{ "movq    $dst, $src\t# ptr stk" %}
7097   opcode(0x89);
7098   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7099   ins_pipe(ialu_mem_reg);
7100 %}
7101 
7102 instruct storeSSF(stackSlotF dst, regF src)
7103 %{
7104   match(Set dst src);
7105 
7106   ins_cost(95); // XXX
7107   format %{ "movss   $dst, $src\t# float stk" %}
7108   opcode(0xF3, 0x0F, 0x11);
7109   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7110   ins_pipe(pipe_slow); // XXX
7111 %}
7112 
7113 instruct storeSSD(stackSlotD dst, regD src)
7114 %{
7115   match(Set dst src);
7116 
7117   ins_cost(95); // XXX
7118   format %{ "movsd   $dst, $src\t# double stk" %}
7119   opcode(0xF2, 0x0F, 0x11);
7120   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7121   ins_pipe(pipe_slow); // XXX
7122 %}
7123 
7124 //----------BSWAP Instructions-------------------------------------------------
7125 instruct bytes_reverse_int(rRegI dst) %{
7126   match(Set dst (ReverseBytesI dst));
7127 
7128   format %{ "bswapl  $dst" %}
7129   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7130   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7131   ins_pipe( ialu_reg );
7132 %}
7133 
7134 instruct bytes_reverse_long(rRegL dst) %{
7135   match(Set dst (ReverseBytesL dst));
7136 
7137   format %{ "bswapq  $dst" %}
7138 
7139   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7140   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7141   ins_pipe( ialu_reg);
7142 %}
7143 
7144 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7145   match(Set dst (ReverseBytesUS dst));
7146 
7147   format %{ "bswapl  $dst\n\t"
7148             "shrl    $dst,16\n\t" %}
7149   ins_encode %{
7150     __ bswapl($dst$$Register);
7151     __ shrl($dst$$Register, 16);
7152   %}
7153   ins_pipe( ialu_reg );
7154 %}
7155 
7156 instruct bytes_reverse_short(rRegI dst) %{
7157   match(Set dst (ReverseBytesS dst));
7158 
7159   format %{ "bswapl  $dst\n\t"
7160             "sar     $dst,16\n\t" %}
7161   ins_encode %{
7162     __ bswapl($dst$$Register);
7163     __ sarl($dst$$Register, 16);
7164   %}
7165   ins_pipe( ialu_reg );
7166 %}
7167 
7168 //---------- Zeros Count Instructions ------------------------------------------
7169 
7170 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7171   predicate(UseCountLeadingZerosInstruction);
7172   match(Set dst (CountLeadingZerosI src));
7173   effect(KILL cr);
7174 
7175   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7176   ins_encode %{
7177     __ lzcntl($dst$$Register, $src$$Register);
7178   %}
7179   ins_pipe(ialu_reg);
7180 %}
7181 
7182 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7183   predicate(!UseCountLeadingZerosInstruction);
7184   match(Set dst (CountLeadingZerosI src));
7185   effect(KILL cr);
7186 
7187   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7188             "jnz     skip\n\t"
7189             "movl    $dst, -1\n"
7190       "skip:\n\t"
7191             "negl    $dst\n\t"
7192             "addl    $dst, 31" %}
7193   ins_encode %{
7194     Register Rdst = $dst$$Register;
7195     Register Rsrc = $src$$Register;
7196     Label skip;
7197     __ bsrl(Rdst, Rsrc);
7198     __ jccb(Assembler::notZero, skip);
7199     __ movl(Rdst, -1);
7200     __ bind(skip);
7201     __ negl(Rdst);
7202     __ addl(Rdst, BitsPerInt - 1);
7203   %}
7204   ins_pipe(ialu_reg);
7205 %}
7206 
7207 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7208   predicate(UseCountLeadingZerosInstruction);
7209   match(Set dst (CountLeadingZerosL src));
7210   effect(KILL cr);
7211 
7212   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7213   ins_encode %{
7214     __ lzcntq($dst$$Register, $src$$Register);
7215   %}
7216   ins_pipe(ialu_reg);
7217 %}
7218 
7219 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7220   predicate(!UseCountLeadingZerosInstruction);
7221   match(Set dst (CountLeadingZerosL src));
7222   effect(KILL cr);
7223 
7224   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7225             "jnz     skip\n\t"
7226             "movl    $dst, -1\n"
7227       "skip:\n\t"
7228             "negl    $dst\n\t"
7229             "addl    $dst, 63" %}
7230   ins_encode %{
7231     Register Rdst = $dst$$Register;
7232     Register Rsrc = $src$$Register;
7233     Label skip;
7234     __ bsrq(Rdst, Rsrc);
7235     __ jccb(Assembler::notZero, skip);
7236     __ movl(Rdst, -1);
7237     __ bind(skip);
7238     __ negl(Rdst);
7239     __ addl(Rdst, BitsPerLong - 1);
7240   %}
7241   ins_pipe(ialu_reg);
7242 %}
7243 
7244 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7245   match(Set dst (CountTrailingZerosI src));
7246   effect(KILL cr);
7247 
7248   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7249             "jnz     done\n\t"
7250             "movl    $dst, 32\n"
7251       "done:" %}
7252   ins_encode %{
7253     Register Rdst = $dst$$Register;
7254     Label done;
7255     __ bsfl(Rdst, $src$$Register);
7256     __ jccb(Assembler::notZero, done);
7257     __ movl(Rdst, BitsPerInt);
7258     __ bind(done);
7259   %}
7260   ins_pipe(ialu_reg);
7261 %}
7262 
7263 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7264   match(Set dst (CountTrailingZerosL src));
7265   effect(KILL cr);
7266 
7267   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7268             "jnz     done\n\t"
7269             "movl    $dst, 64\n"
7270       "done:" %}
7271   ins_encode %{
7272     Register Rdst = $dst$$Register;
7273     Label done;
7274     __ bsfq(Rdst, $src$$Register);
7275     __ jccb(Assembler::notZero, done);
7276     __ movl(Rdst, BitsPerLong);
7277     __ bind(done);
7278   %}
7279   ins_pipe(ialu_reg);
7280 %}
7281 
7282 
7283 //---------- Population Count Instructions -------------------------------------
7284 
7285 instruct popCountI(rRegI dst, rRegI src) %{
7286   predicate(UsePopCountInstruction);
7287   match(Set dst (PopCountI src));
7288 
7289   format %{ "popcnt  $dst, $src" %}
7290   ins_encode %{
7291     __ popcntl($dst$$Register, $src$$Register);
7292   %}
7293   ins_pipe(ialu_reg);
7294 %}
7295 
7296 instruct popCountI_mem(rRegI dst, memory mem) %{
7297   predicate(UsePopCountInstruction);
7298   match(Set dst (PopCountI (LoadI mem)));
7299 
7300   format %{ "popcnt  $dst, $mem" %}
7301   ins_encode %{
7302     __ popcntl($dst$$Register, $mem$$Address);
7303   %}
7304   ins_pipe(ialu_reg);
7305 %}
7306 
7307 // Note: Long.bitCount(long) returns an int.
7308 instruct popCountL(rRegI dst, rRegL src) %{
7309   predicate(UsePopCountInstruction);
7310   match(Set dst (PopCountL src));
7311 
7312   format %{ "popcnt  $dst, $src" %}
7313   ins_encode %{
7314     __ popcntq($dst$$Register, $src$$Register);
7315   %}
7316   ins_pipe(ialu_reg);
7317 %}
7318 
7319 // Note: Long.bitCount(long) returns an int.
7320 instruct popCountL_mem(rRegI dst, memory mem) %{
7321   predicate(UsePopCountInstruction);
7322   match(Set dst (PopCountL (LoadL mem)));
7323 
7324   format %{ "popcnt  $dst, $mem" %}
7325   ins_encode %{
7326     __ popcntq($dst$$Register, $mem$$Address);
7327   %}
7328   ins_pipe(ialu_reg);
7329 %}
7330 
7331 
7332 //----------MemBar Instructions-----------------------------------------------
7333 // Memory barrier flavors
7334 
7335 instruct membar_acquire()
7336 %{
7337   match(MemBarAcquire);
7338   ins_cost(0);
7339 
7340   size(0);
7341   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7342   ins_encode();
7343   ins_pipe(empty);
7344 %}
7345 
7346 instruct membar_acquire_lock()
7347 %{
7348   match(MemBarAcquireLock);
7349   ins_cost(0);
7350 
7351   size(0);
7352   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7353   ins_encode();
7354   ins_pipe(empty);
7355 %}
7356 
7357 instruct membar_release()
7358 %{
7359   match(MemBarRelease);
7360   ins_cost(0);
7361 
7362   size(0);
7363   format %{ "MEMBAR-release ! (empty encoding)" %}
7364   ins_encode();
7365   ins_pipe(empty);
7366 %}
7367 
7368 instruct membar_release_lock()
7369 %{
7370   match(MemBarReleaseLock);
7371   ins_cost(0);
7372 
7373   size(0);
7374   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7375   ins_encode();
7376   ins_pipe(empty);
7377 %}
7378 
7379 instruct membar_volatile(rFlagsReg cr) %{
7380   match(MemBarVolatile);
7381   effect(KILL cr);
7382   ins_cost(400);
7383 
7384   format %{
7385     $$template
7386     if (os::is_MP()) {
7387       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7388     } else {
7389       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7390     }
7391   %}
7392   ins_encode %{
7393     __ membar(Assembler::StoreLoad);
7394   %}
7395   ins_pipe(pipe_slow);
7396 %}
7397 
7398 instruct unnecessary_membar_volatile()
7399 %{
7400   match(MemBarVolatile);
7401   predicate(Matcher::post_store_load_barrier(n));
7402   ins_cost(0);
7403 
7404   size(0);
7405   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7406   ins_encode();
7407   ins_pipe(empty);
7408 %}
7409 
7410 //----------Move Instructions--------------------------------------------------
7411 
7412 instruct castX2P(rRegP dst, rRegL src)
7413 %{
7414   match(Set dst (CastX2P src));
7415 
7416   format %{ "movq    $dst, $src\t# long->ptr" %}
7417   ins_encode(enc_copy_wide(dst, src));
7418   ins_pipe(ialu_reg_reg); // XXX
7419 %}
7420 
7421 instruct castP2X(rRegL dst, rRegP src)
7422 %{
7423   match(Set dst (CastP2X src));
7424 
7425   format %{ "movq    $dst, $src\t# ptr -> long" %}
7426   ins_encode(enc_copy_wide(dst, src));
7427   ins_pipe(ialu_reg_reg); // XXX
7428 %}
7429 
7430 
7431 // Convert oop pointer into compressed form
7432 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7433   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7434   match(Set dst (EncodeP src));
7435   effect(KILL cr);
7436   format %{ "encode_heap_oop $dst,$src" %}
7437   ins_encode %{
7438     Register s = $src$$Register;
7439     Register d = $dst$$Register;
7440     if (s != d) {
7441       __ movq(d, s);
7442     }
7443     __ encode_heap_oop(d);
7444   %}
7445   ins_pipe(ialu_reg_long);
7446 %}
7447 
7448 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7449   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7450   match(Set dst (EncodeP src));
7451   effect(KILL cr);
7452   format %{ "encode_heap_oop_not_null $dst,$src" %}
7453   ins_encode %{
7454     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7455   %}
7456   ins_pipe(ialu_reg_long);
7457 %}
7458 
7459 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7460   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7461             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7462   match(Set dst (DecodeN src));
7463   effect(KILL cr);
7464   format %{ "decode_heap_oop $dst,$src" %}
7465   ins_encode %{
7466     Register s = $src$$Register;
7467     Register d = $dst$$Register;
7468     if (s != d) {
7469       __ movq(d, s);
7470     }
7471     __ decode_heap_oop(d);
7472   %}
7473   ins_pipe(ialu_reg_long);
7474 %}
7475 
7476 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7477   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7478             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7479   match(Set dst (DecodeN src));
7480   effect(KILL cr);
7481   format %{ "decode_heap_oop_not_null $dst,$src" %}
7482   ins_encode %{
7483     Register s = $src$$Register;
7484     Register d = $dst$$Register;
7485     if (s != d) {
7486       __ decode_heap_oop_not_null(d, s);
7487     } else {
7488       __ decode_heap_oop_not_null(d);
7489     }
7490   %}
7491   ins_pipe(ialu_reg_long);
7492 %}
7493 
7494 
7495 //----------Conditional Move---------------------------------------------------
7496 // Jump
7497 // dummy instruction for generating temp registers
7498 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7499   match(Jump (LShiftL switch_val shift));
7500   ins_cost(350);
7501   predicate(false);
7502   effect(TEMP dest);
7503 
7504   format %{ "leaq    $dest, [$constantaddress]\n\t"
7505             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7506   ins_encode %{
7507     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7508     // to do that and the compiler is using that register as one it can allocate.
7509     // So we build it all by hand.
7510     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7511     // ArrayAddress dispatch(table, index);
7512     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7513     __ lea($dest$$Register, $constantaddress);
7514     __ jmp(dispatch);
7515   %}
7516   ins_pipe(pipe_jmp);
7517 %}
7518 
7519 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7520   match(Jump (AddL (LShiftL switch_val shift) offset));
7521   ins_cost(350);
7522   effect(TEMP dest);
7523 
7524   format %{ "leaq    $dest, [$constantaddress]\n\t"
7525             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7526   ins_encode %{
7527     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7528     // to do that and the compiler is using that register as one it can allocate.
7529     // So we build it all by hand.
7530     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7531     // ArrayAddress dispatch(table, index);
7532     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7533     __ lea($dest$$Register, $constantaddress);
7534     __ jmp(dispatch);
7535   %}
7536   ins_pipe(pipe_jmp);
7537 %}
7538 
7539 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7540   match(Jump switch_val);
7541   ins_cost(350);
7542   effect(TEMP dest);
7543 
7544   format %{ "leaq    $dest, [$constantaddress]\n\t"
7545             "jmp     [$dest + $switch_val]\n\t" %}
7546   ins_encode %{
7547     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7548     // to do that and the compiler is using that register as one it can allocate.
7549     // So we build it all by hand.
7550     // Address index(noreg, switch_reg, Address::times_1);
7551     // ArrayAddress dispatch(table, index);
7552     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7553     __ lea($dest$$Register, $constantaddress);
7554     __ jmp(dispatch);
7555   %}
7556   ins_pipe(pipe_jmp);
7557 %}
7558 
7559 // Conditional move
7560 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7561 %{
7562   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7563 
7564   ins_cost(200); // XXX
7565   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7566   opcode(0x0F, 0x40);
7567   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7568   ins_pipe(pipe_cmov_reg);
7569 %}
7570 
7571 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7572   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7573 
7574   ins_cost(200); // XXX
7575   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7576   opcode(0x0F, 0x40);
7577   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7578   ins_pipe(pipe_cmov_reg);
7579 %}
7580 
7581 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7582   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7583   ins_cost(200);
7584   expand %{
7585     cmovI_regU(cop, cr, dst, src);
7586   %}
7587 %}
7588 
7589 // Conditional move
7590 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7591   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7592 
7593   ins_cost(250); // XXX
7594   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7595   opcode(0x0F, 0x40);
7596   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7597   ins_pipe(pipe_cmov_mem);
7598 %}
7599 
7600 // Conditional move
7601 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7602 %{
7603   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7604 
7605   ins_cost(250); // XXX
7606   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7607   opcode(0x0F, 0x40);
7608   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7609   ins_pipe(pipe_cmov_mem);
7610 %}
7611 
7612 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7613   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7614   ins_cost(250);
7615   expand %{
7616     cmovI_memU(cop, cr, dst, src);
7617   %}
7618 %}
7619 
7620 // Conditional move
7621 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7622 %{
7623   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7624 
7625   ins_cost(200); // XXX
7626   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7627   opcode(0x0F, 0x40);
7628   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7629   ins_pipe(pipe_cmov_reg);
7630 %}
7631 
7632 // Conditional move
7633 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7634 %{
7635   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7636 
7637   ins_cost(200); // XXX
7638   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7639   opcode(0x0F, 0x40);
7640   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7641   ins_pipe(pipe_cmov_reg);
7642 %}
7643 
7644 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7645   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7646   ins_cost(200);
7647   expand %{
7648     cmovN_regU(cop, cr, dst, src);
7649   %}
7650 %}
7651 
7652 // Conditional move
7653 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7654 %{
7655   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7656 
7657   ins_cost(200); // XXX
7658   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7659   opcode(0x0F, 0x40);
7660   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7661   ins_pipe(pipe_cmov_reg);  // XXX
7662 %}
7663 
7664 // Conditional move
7665 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7666 %{
7667   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7668 
7669   ins_cost(200); // XXX
7670   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7671   opcode(0x0F, 0x40);
7672   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7673   ins_pipe(pipe_cmov_reg); // XXX
7674 %}
7675 
7676 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7677   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7678   ins_cost(200);
7679   expand %{
7680     cmovP_regU(cop, cr, dst, src);
7681   %}
7682 %}
7683 
7684 // DISABLED: Requires the ADLC to emit a bottom_type call that
7685 // correctly meets the two pointer arguments; one is an incoming
7686 // register but the other is a memory operand.  ALSO appears to
7687 // be buggy with implicit null checks.
7688 //
7689 //// Conditional move
7690 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7691 //%{
7692 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7693 //  ins_cost(250);
7694 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7695 //  opcode(0x0F,0x40);
7696 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7697 //  ins_pipe( pipe_cmov_mem );
7698 //%}
7699 //
7700 //// Conditional move
7701 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7702 //%{
7703 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7704 //  ins_cost(250);
7705 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7706 //  opcode(0x0F,0x40);
7707 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7708 //  ins_pipe( pipe_cmov_mem );
7709 //%}
7710 
7711 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7712 %{
7713   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7714 
7715   ins_cost(200); // XXX
7716   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7717   opcode(0x0F, 0x40);
7718   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7719   ins_pipe(pipe_cmov_reg);  // XXX
7720 %}
7721 
7722 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7723 %{
7724   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7725 
7726   ins_cost(200); // XXX
7727   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7728   opcode(0x0F, 0x40);
7729   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7730   ins_pipe(pipe_cmov_mem);  // XXX
7731 %}
7732 
7733 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7734 %{
7735   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7736 
7737   ins_cost(200); // XXX
7738   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7739   opcode(0x0F, 0x40);
7740   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7741   ins_pipe(pipe_cmov_reg); // XXX
7742 %}
7743 
7744 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7745   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7746   ins_cost(200);
7747   expand %{
7748     cmovL_regU(cop, cr, dst, src);
7749   %}
7750 %}
7751 
7752 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7753 %{
7754   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7755 
7756   ins_cost(200); // XXX
7757   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7758   opcode(0x0F, 0x40);
7759   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7760   ins_pipe(pipe_cmov_mem); // XXX
7761 %}
7762 
7763 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7764   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7765   ins_cost(200);
7766   expand %{
7767     cmovL_memU(cop, cr, dst, src);
7768   %}
7769 %}
7770 
7771 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7772 %{
7773   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7774 
7775   ins_cost(200); // XXX
7776   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7777             "movss     $dst, $src\n"
7778     "skip:" %}
7779   ins_encode(enc_cmovf_branch(cop, dst, src));
7780   ins_pipe(pipe_slow);
7781 %}
7782 
7783 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7784 // %{
7785 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7786 
7787 //   ins_cost(200); // XXX
7788 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7789 //             "movss     $dst, $src\n"
7790 //     "skip:" %}
7791 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7792 //   ins_pipe(pipe_slow);
7793 // %}
7794 
7795 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7796 %{
7797   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7798 
7799   ins_cost(200); // XXX
7800   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7801             "movss     $dst, $src\n"
7802     "skip:" %}
7803   ins_encode(enc_cmovf_branch(cop, dst, src));
7804   ins_pipe(pipe_slow);
7805 %}
7806 
7807 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7808   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7809   ins_cost(200);
7810   expand %{
7811     cmovF_regU(cop, cr, dst, src);
7812   %}
7813 %}
7814 
7815 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7816 %{
7817   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7818 
7819   ins_cost(200); // XXX
7820   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7821             "movsd     $dst, $src\n"
7822     "skip:" %}
7823   ins_encode(enc_cmovd_branch(cop, dst, src));
7824   ins_pipe(pipe_slow);
7825 %}
7826 
7827 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7828 %{
7829   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7830 
7831   ins_cost(200); // XXX
7832   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7833             "movsd     $dst, $src\n"
7834     "skip:" %}
7835   ins_encode(enc_cmovd_branch(cop, dst, src));
7836   ins_pipe(pipe_slow);
7837 %}
7838 
7839 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7840   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7841   ins_cost(200);
7842   expand %{
7843     cmovD_regU(cop, cr, dst, src);
7844   %}
7845 %}
7846 
7847 //----------Arithmetic Instructions--------------------------------------------
7848 //----------Addition Instructions----------------------------------------------
7849 
7850 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7851 %{
7852   match(Set dst (AddI dst src));
7853   effect(KILL cr);
7854 
7855   format %{ "addl    $dst, $src\t# int" %}
7856   opcode(0x03);
7857   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7858   ins_pipe(ialu_reg_reg);
7859 %}
7860 
7861 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7862 %{
7863   match(Set dst (AddI dst src));
7864   effect(KILL cr);
7865 
7866   format %{ "addl    $dst, $src\t# int" %}
7867   opcode(0x81, 0x00); /* /0 id */
7868   ins_encode(OpcSErm(dst, src), Con8or32(src));
7869   ins_pipe( ialu_reg );
7870 %}
7871 
7872 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7873 %{
7874   match(Set dst (AddI dst (LoadI src)));
7875   effect(KILL cr);
7876 
7877   ins_cost(125); // XXX
7878   format %{ "addl    $dst, $src\t# int" %}
7879   opcode(0x03);
7880   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7881   ins_pipe(ialu_reg_mem);
7882 %}
7883 
7884 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7885 %{
7886   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7887   effect(KILL cr);
7888 
7889   ins_cost(150); // XXX
7890   format %{ "addl    $dst, $src\t# int" %}
7891   opcode(0x01); /* Opcode 01 /r */
7892   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7893   ins_pipe(ialu_mem_reg);
7894 %}
7895 
7896 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7897 %{
7898   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7899   effect(KILL cr);
7900 
7901   ins_cost(125); // XXX
7902   format %{ "addl    $dst, $src\t# int" %}
7903   opcode(0x81); /* Opcode 81 /0 id */
7904   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7905   ins_pipe(ialu_mem_imm);
7906 %}
7907 
7908 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7909 %{
7910   predicate(UseIncDec);
7911   match(Set dst (AddI dst src));
7912   effect(KILL cr);
7913 
7914   format %{ "incl    $dst\t# int" %}
7915   opcode(0xFF, 0x00); // FF /0
7916   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7917   ins_pipe(ialu_reg);
7918 %}
7919 
7920 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7921 %{
7922   predicate(UseIncDec);
7923   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7924   effect(KILL cr);
7925 
7926   ins_cost(125); // XXX
7927   format %{ "incl    $dst\t# int" %}
7928   opcode(0xFF); /* Opcode FF /0 */
7929   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7930   ins_pipe(ialu_mem_imm);
7931 %}
7932 
7933 // XXX why does that use AddI
7934 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7935 %{
7936   predicate(UseIncDec);
7937   match(Set dst (AddI dst src));
7938   effect(KILL cr);
7939 
7940   format %{ "decl    $dst\t# int" %}
7941   opcode(0xFF, 0x01); // FF /1
7942   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7943   ins_pipe(ialu_reg);
7944 %}
7945 
7946 // XXX why does that use AddI
7947 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7948 %{
7949   predicate(UseIncDec);
7950   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7951   effect(KILL cr);
7952 
7953   ins_cost(125); // XXX
7954   format %{ "decl    $dst\t# int" %}
7955   opcode(0xFF); /* Opcode FF /1 */
7956   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7957   ins_pipe(ialu_mem_imm);
7958 %}
7959 
7960 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7961 %{
7962   match(Set dst (AddI src0 src1));
7963 
7964   ins_cost(110);
7965   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7966   opcode(0x8D); /* 0x8D /r */
7967   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7968   ins_pipe(ialu_reg_reg);
7969 %}
7970 
7971 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7972 %{
7973   match(Set dst (AddL dst src));
7974   effect(KILL cr);
7975 
7976   format %{ "addq    $dst, $src\t# long" %}
7977   opcode(0x03);
7978   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7979   ins_pipe(ialu_reg_reg);
7980 %}
7981 
7982 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7983 %{
7984   match(Set dst (AddL dst src));
7985   effect(KILL cr);
7986 
7987   format %{ "addq    $dst, $src\t# long" %}
7988   opcode(0x81, 0x00); /* /0 id */
7989   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7990   ins_pipe( ialu_reg );
7991 %}
7992 
7993 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7994 %{
7995   match(Set dst (AddL dst (LoadL src)));
7996   effect(KILL cr);
7997 
7998   ins_cost(125); // XXX
7999   format %{ "addq    $dst, $src\t# long" %}
8000   opcode(0x03);
8001   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8002   ins_pipe(ialu_reg_mem);
8003 %}
8004 
8005 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8006 %{
8007   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8008   effect(KILL cr);
8009 
8010   ins_cost(150); // XXX
8011   format %{ "addq    $dst, $src\t# long" %}
8012   opcode(0x01); /* Opcode 01 /r */
8013   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8014   ins_pipe(ialu_mem_reg);
8015 %}
8016 
8017 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8018 %{
8019   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8020   effect(KILL cr);
8021 
8022   ins_cost(125); // XXX
8023   format %{ "addq    $dst, $src\t# long" %}
8024   opcode(0x81); /* Opcode 81 /0 id */
8025   ins_encode(REX_mem_wide(dst),
8026              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8027   ins_pipe(ialu_mem_imm);
8028 %}
8029 
8030 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8031 %{
8032   predicate(UseIncDec);
8033   match(Set dst (AddL dst src));
8034   effect(KILL cr);
8035 
8036   format %{ "incq    $dst\t# long" %}
8037   opcode(0xFF, 0x00); // FF /0
8038   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8039   ins_pipe(ialu_reg);
8040 %}
8041 
8042 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8043 %{
8044   predicate(UseIncDec);
8045   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8046   effect(KILL cr);
8047 
8048   ins_cost(125); // XXX
8049   format %{ "incq    $dst\t# long" %}
8050   opcode(0xFF); /* Opcode FF /0 */
8051   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8052   ins_pipe(ialu_mem_imm);
8053 %}
8054 
8055 // XXX why does that use AddL
8056 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8057 %{
8058   predicate(UseIncDec);
8059   match(Set dst (AddL dst src));
8060   effect(KILL cr);
8061 
8062   format %{ "decq    $dst\t# long" %}
8063   opcode(0xFF, 0x01); // FF /1
8064   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8065   ins_pipe(ialu_reg);
8066 %}
8067 
8068 // XXX why does that use AddL
8069 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8070 %{
8071   predicate(UseIncDec);
8072   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8073   effect(KILL cr);
8074 
8075   ins_cost(125); // XXX
8076   format %{ "decq    $dst\t# long" %}
8077   opcode(0xFF); /* Opcode FF /1 */
8078   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8079   ins_pipe(ialu_mem_imm);
8080 %}
8081 
8082 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8083 %{
8084   match(Set dst (AddL src0 src1));
8085 
8086   ins_cost(110);
8087   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8088   opcode(0x8D); /* 0x8D /r */
8089   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8090   ins_pipe(ialu_reg_reg);
8091 %}
8092 
8093 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8094 %{
8095   match(Set dst (AddP dst src));
8096   effect(KILL cr);
8097 
8098   format %{ "addq    $dst, $src\t# ptr" %}
8099   opcode(0x03);
8100   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8101   ins_pipe(ialu_reg_reg);
8102 %}
8103 
8104 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8105 %{
8106   match(Set dst (AddP dst src));
8107   effect(KILL cr);
8108 
8109   format %{ "addq    $dst, $src\t# ptr" %}
8110   opcode(0x81, 0x00); /* /0 id */
8111   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8112   ins_pipe( ialu_reg );
8113 %}
8114 
8115 // XXX addP mem ops ????
8116 
8117 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8118 %{
8119   match(Set dst (AddP src0 src1));
8120 
8121   ins_cost(110);
8122   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8123   opcode(0x8D); /* 0x8D /r */
8124   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8125   ins_pipe(ialu_reg_reg);
8126 %}
8127 
8128 instruct checkCastPP(rRegP dst)
8129 %{
8130   match(Set dst (CheckCastPP dst));
8131 
8132   size(0);
8133   format %{ "# checkcastPP of $dst" %}
8134   ins_encode(/* empty encoding */);
8135   ins_pipe(empty);
8136 %}
8137 
8138 instruct castPP(rRegP dst)
8139 %{
8140   match(Set dst (CastPP dst));
8141 
8142   size(0);
8143   format %{ "# castPP of $dst" %}
8144   ins_encode(/* empty encoding */);
8145   ins_pipe(empty);
8146 %}
8147 
8148 instruct castII(rRegI dst)
8149 %{
8150   match(Set dst (CastII dst));
8151 
8152   size(0);
8153   format %{ "# castII of $dst" %}
8154   ins_encode(/* empty encoding */);
8155   ins_cost(0);
8156   ins_pipe(empty);
8157 %}
8158 
8159 // LoadP-locked same as a regular LoadP when used with compare-swap
8160 instruct loadPLocked(rRegP dst, memory mem)
8161 %{
8162   match(Set dst (LoadPLocked mem));
8163 
8164   ins_cost(125); // XXX
8165   format %{ "movq    $dst, $mem\t# ptr locked" %}
8166   opcode(0x8B);
8167   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8168   ins_pipe(ialu_reg_mem); // XXX
8169 %}
8170 
8171 // LoadL-locked - same as a regular LoadL when used with compare-swap
8172 instruct loadLLocked(rRegL dst, memory mem)
8173 %{
8174   match(Set dst (LoadLLocked mem));
8175 
8176   ins_cost(125); // XXX
8177   format %{ "movq    $dst, $mem\t# long locked" %}
8178   opcode(0x8B);
8179   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8180   ins_pipe(ialu_reg_mem); // XXX
8181 %}
8182 
8183 // Conditional-store of the updated heap-top.
8184 // Used during allocation of the shared heap.
8185 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8186 
8187 instruct storePConditional(memory heap_top_ptr,
8188                            rax_RegP oldval, rRegP newval,
8189                            rFlagsReg cr)
8190 %{
8191   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8192 
8193   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8194             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8195   opcode(0x0F, 0xB1);
8196   ins_encode(lock_prefix,
8197              REX_reg_mem_wide(newval, heap_top_ptr),
8198              OpcP, OpcS,
8199              reg_mem(newval, heap_top_ptr));
8200   ins_pipe(pipe_cmpxchg);
8201 %}
8202 
8203 // Conditional-store of an int value.
8204 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8205 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8206 %{
8207   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8208   effect(KILL oldval);
8209 
8210   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8211   opcode(0x0F, 0xB1);
8212   ins_encode(lock_prefix,
8213              REX_reg_mem(newval, mem),
8214              OpcP, OpcS,
8215              reg_mem(newval, mem));
8216   ins_pipe(pipe_cmpxchg);
8217 %}
8218 
8219 // Conditional-store of a long value.
8220 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8221 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8222 %{
8223   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8224   effect(KILL oldval);
8225 
8226   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8227   opcode(0x0F, 0xB1);
8228   ins_encode(lock_prefix,
8229              REX_reg_mem_wide(newval, mem),
8230              OpcP, OpcS,
8231              reg_mem(newval, mem));
8232   ins_pipe(pipe_cmpxchg);
8233 %}
8234 
8235 
8236 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8237 instruct compareAndSwapP(rRegI res,
8238                          memory mem_ptr,
8239                          rax_RegP oldval, rRegP newval,
8240                          rFlagsReg cr)
8241 %{
8242   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8243   effect(KILL cr, KILL oldval);
8244 
8245   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8246             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8247             "sete    $res\n\t"
8248             "movzbl  $res, $res" %}
8249   opcode(0x0F, 0xB1);
8250   ins_encode(lock_prefix,
8251              REX_reg_mem_wide(newval, mem_ptr),
8252              OpcP, OpcS,
8253              reg_mem(newval, mem_ptr),
8254              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8255              REX_reg_breg(res, res), // movzbl
8256              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8257   ins_pipe( pipe_cmpxchg );
8258 %}
8259 
8260 instruct compareAndSwapL(rRegI res,
8261                          memory mem_ptr,
8262                          rax_RegL oldval, rRegL newval,
8263                          rFlagsReg cr)
8264 %{
8265   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8266   effect(KILL cr, KILL oldval);
8267 
8268   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8269             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8270             "sete    $res\n\t"
8271             "movzbl  $res, $res" %}
8272   opcode(0x0F, 0xB1);
8273   ins_encode(lock_prefix,
8274              REX_reg_mem_wide(newval, mem_ptr),
8275              OpcP, OpcS,
8276              reg_mem(newval, mem_ptr),
8277              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8278              REX_reg_breg(res, res), // movzbl
8279              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8280   ins_pipe( pipe_cmpxchg );
8281 %}
8282 
8283 instruct compareAndSwapI(rRegI res,
8284                          memory mem_ptr,
8285                          rax_RegI oldval, rRegI newval,
8286                          rFlagsReg cr)
8287 %{
8288   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8289   effect(KILL cr, KILL oldval);
8290 
8291   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8292             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8293             "sete    $res\n\t"
8294             "movzbl  $res, $res" %}
8295   opcode(0x0F, 0xB1);
8296   ins_encode(lock_prefix,
8297              REX_reg_mem(newval, mem_ptr),
8298              OpcP, OpcS,
8299              reg_mem(newval, mem_ptr),
8300              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8301              REX_reg_breg(res, res), // movzbl
8302              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8303   ins_pipe( pipe_cmpxchg );
8304 %}
8305 
8306 
8307 instruct compareAndSwapN(rRegI res,
8308                           memory mem_ptr,
8309                           rax_RegN oldval, rRegN newval,
8310                           rFlagsReg cr) %{
8311   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8312   effect(KILL cr, KILL oldval);
8313 
8314   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8315             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8316             "sete    $res\n\t"
8317             "movzbl  $res, $res" %}
8318   opcode(0x0F, 0xB1);
8319   ins_encode(lock_prefix,
8320              REX_reg_mem(newval, mem_ptr),
8321              OpcP, OpcS,
8322              reg_mem(newval, mem_ptr),
8323              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8324              REX_reg_breg(res, res), // movzbl
8325              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8326   ins_pipe( pipe_cmpxchg );
8327 %}
8328 
8329 //----------Subtraction Instructions-------------------------------------------
8330 
8331 // Integer Subtraction Instructions
8332 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8333 %{
8334   match(Set dst (SubI dst src));
8335   effect(KILL cr);
8336 
8337   format %{ "subl    $dst, $src\t# int" %}
8338   opcode(0x2B);
8339   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8340   ins_pipe(ialu_reg_reg);
8341 %}
8342 
8343 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8344 %{
8345   match(Set dst (SubI dst src));
8346   effect(KILL cr);
8347 
8348   format %{ "subl    $dst, $src\t# int" %}
8349   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8350   ins_encode(OpcSErm(dst, src), Con8or32(src));
8351   ins_pipe(ialu_reg);
8352 %}
8353 
8354 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8355 %{
8356   match(Set dst (SubI dst (LoadI src)));
8357   effect(KILL cr);
8358 
8359   ins_cost(125);
8360   format %{ "subl    $dst, $src\t# int" %}
8361   opcode(0x2B);
8362   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8363   ins_pipe(ialu_reg_mem);
8364 %}
8365 
8366 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8367 %{
8368   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8369   effect(KILL cr);
8370 
8371   ins_cost(150);
8372   format %{ "subl    $dst, $src\t# int" %}
8373   opcode(0x29); /* Opcode 29 /r */
8374   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8375   ins_pipe(ialu_mem_reg);
8376 %}
8377 
8378 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8379 %{
8380   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8381   effect(KILL cr);
8382 
8383   ins_cost(125); // XXX
8384   format %{ "subl    $dst, $src\t# int" %}
8385   opcode(0x81); /* Opcode 81 /5 id */
8386   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8387   ins_pipe(ialu_mem_imm);
8388 %}
8389 
8390 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8391 %{
8392   match(Set dst (SubL dst src));
8393   effect(KILL cr);
8394 
8395   format %{ "subq    $dst, $src\t# long" %}
8396   opcode(0x2B);
8397   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8398   ins_pipe(ialu_reg_reg);
8399 %}
8400 
8401 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8402 %{
8403   match(Set dst (SubL dst src));
8404   effect(KILL cr);
8405 
8406   format %{ "subq    $dst, $src\t# long" %}
8407   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8408   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8409   ins_pipe(ialu_reg);
8410 %}
8411 
8412 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8413 %{
8414   match(Set dst (SubL dst (LoadL src)));
8415   effect(KILL cr);
8416 
8417   ins_cost(125);
8418   format %{ "subq    $dst, $src\t# long" %}
8419   opcode(0x2B);
8420   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8421   ins_pipe(ialu_reg_mem);
8422 %}
8423 
8424 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8425 %{
8426   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8427   effect(KILL cr);
8428 
8429   ins_cost(150);
8430   format %{ "subq    $dst, $src\t# long" %}
8431   opcode(0x29); /* Opcode 29 /r */
8432   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8433   ins_pipe(ialu_mem_reg);
8434 %}
8435 
8436 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8437 %{
8438   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8439   effect(KILL cr);
8440 
8441   ins_cost(125); // XXX
8442   format %{ "subq    $dst, $src\t# long" %}
8443   opcode(0x81); /* Opcode 81 /5 id */
8444   ins_encode(REX_mem_wide(dst),
8445              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8446   ins_pipe(ialu_mem_imm);
8447 %}
8448 
8449 // Subtract from a pointer
8450 // XXX hmpf???
8451 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8452 %{
8453   match(Set dst (AddP dst (SubI zero src)));
8454   effect(KILL cr);
8455 
8456   format %{ "subq    $dst, $src\t# ptr - int" %}
8457   opcode(0x2B);
8458   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8459   ins_pipe(ialu_reg_reg);
8460 %}
8461 
8462 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8463 %{
8464   match(Set dst (SubI zero dst));
8465   effect(KILL cr);
8466 
8467   format %{ "negl    $dst\t# int" %}
8468   opcode(0xF7, 0x03);  // Opcode F7 /3
8469   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8470   ins_pipe(ialu_reg);
8471 %}
8472 
8473 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8474 %{
8475   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8476   effect(KILL cr);
8477 
8478   format %{ "negl    $dst\t# int" %}
8479   opcode(0xF7, 0x03);  // Opcode F7 /3
8480   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8481   ins_pipe(ialu_reg);
8482 %}
8483 
8484 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8485 %{
8486   match(Set dst (SubL zero dst));
8487   effect(KILL cr);
8488 
8489   format %{ "negq    $dst\t# long" %}
8490   opcode(0xF7, 0x03);  // Opcode F7 /3
8491   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8492   ins_pipe(ialu_reg);
8493 %}
8494 
8495 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8496 %{
8497   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8498   effect(KILL cr);
8499 
8500   format %{ "negq    $dst\t# long" %}
8501   opcode(0xF7, 0x03);  // Opcode F7 /3
8502   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8503   ins_pipe(ialu_reg);
8504 %}
8505 
8506 
8507 //----------Multiplication/Division Instructions-------------------------------
8508 // Integer Multiplication Instructions
8509 // Multiply Register
8510 
8511 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8512 %{
8513   match(Set dst (MulI dst src));
8514   effect(KILL cr);
8515 
8516   ins_cost(300);
8517   format %{ "imull   $dst, $src\t# int" %}
8518   opcode(0x0F, 0xAF);
8519   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8520   ins_pipe(ialu_reg_reg_alu0);
8521 %}
8522 
8523 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8524 %{
8525   match(Set dst (MulI src imm));
8526   effect(KILL cr);
8527 
8528   ins_cost(300);
8529   format %{ "imull   $dst, $src, $imm\t# int" %}
8530   opcode(0x69); /* 69 /r id */
8531   ins_encode(REX_reg_reg(dst, src),
8532              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8533   ins_pipe(ialu_reg_reg_alu0);
8534 %}
8535 
8536 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8537 %{
8538   match(Set dst (MulI dst (LoadI src)));
8539   effect(KILL cr);
8540 
8541   ins_cost(350);
8542   format %{ "imull   $dst, $src\t# int" %}
8543   opcode(0x0F, 0xAF);
8544   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8545   ins_pipe(ialu_reg_mem_alu0);
8546 %}
8547 
8548 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8549 %{
8550   match(Set dst (MulI (LoadI src) imm));
8551   effect(KILL cr);
8552 
8553   ins_cost(300);
8554   format %{ "imull   $dst, $src, $imm\t# int" %}
8555   opcode(0x69); /* 69 /r id */
8556   ins_encode(REX_reg_mem(dst, src),
8557              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8558   ins_pipe(ialu_reg_mem_alu0);
8559 %}
8560 
8561 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8562 %{
8563   match(Set dst (MulL dst src));
8564   effect(KILL cr);
8565 
8566   ins_cost(300);
8567   format %{ "imulq   $dst, $src\t# long" %}
8568   opcode(0x0F, 0xAF);
8569   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8570   ins_pipe(ialu_reg_reg_alu0);
8571 %}
8572 
8573 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8574 %{
8575   match(Set dst (MulL src imm));
8576   effect(KILL cr);
8577 
8578   ins_cost(300);
8579   format %{ "imulq   $dst, $src, $imm\t# long" %}
8580   opcode(0x69); /* 69 /r id */
8581   ins_encode(REX_reg_reg_wide(dst, src),
8582              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8583   ins_pipe(ialu_reg_reg_alu0);
8584 %}
8585 
8586 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8587 %{
8588   match(Set dst (MulL dst (LoadL src)));
8589   effect(KILL cr);
8590 
8591   ins_cost(350);
8592   format %{ "imulq   $dst, $src\t# long" %}
8593   opcode(0x0F, 0xAF);
8594   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8595   ins_pipe(ialu_reg_mem_alu0);
8596 %}
8597 
8598 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8599 %{
8600   match(Set dst (MulL (LoadL src) imm));
8601   effect(KILL cr);
8602 
8603   ins_cost(300);
8604   format %{ "imulq   $dst, $src, $imm\t# long" %}
8605   opcode(0x69); /* 69 /r id */
8606   ins_encode(REX_reg_mem_wide(dst, src),
8607              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8608   ins_pipe(ialu_reg_mem_alu0);
8609 %}
8610 
8611 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8612 %{
8613   match(Set dst (MulHiL src rax));
8614   effect(USE_KILL rax, KILL cr);
8615 
8616   ins_cost(300);
8617   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8618   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8619   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8620   ins_pipe(ialu_reg_reg_alu0);
8621 %}
8622 
8623 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8624                    rFlagsReg cr)
8625 %{
8626   match(Set rax (DivI rax div));
8627   effect(KILL rdx, KILL cr);
8628 
8629   ins_cost(30*100+10*100); // XXX
8630   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8631             "jne,s   normal\n\t"
8632             "xorl    rdx, rdx\n\t"
8633             "cmpl    $div, -1\n\t"
8634             "je,s    done\n"
8635     "normal: cdql\n\t"
8636             "idivl   $div\n"
8637     "done:"        %}
8638   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8639   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8640   ins_pipe(ialu_reg_reg_alu0);
8641 %}
8642 
8643 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8644                    rFlagsReg cr)
8645 %{
8646   match(Set rax (DivL rax div));
8647   effect(KILL rdx, KILL cr);
8648 
8649   ins_cost(30*100+10*100); // XXX
8650   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8651             "cmpq    rax, rdx\n\t"
8652             "jne,s   normal\n\t"
8653             "xorl    rdx, rdx\n\t"
8654             "cmpq    $div, -1\n\t"
8655             "je,s    done\n"
8656     "normal: cdqq\n\t"
8657             "idivq   $div\n"
8658     "done:"        %}
8659   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8660   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8661   ins_pipe(ialu_reg_reg_alu0);
8662 %}
8663 
8664 // Integer DIVMOD with Register, both quotient and mod results
8665 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8666                              rFlagsReg cr)
8667 %{
8668   match(DivModI rax div);
8669   effect(KILL cr);
8670 
8671   ins_cost(30*100+10*100); // XXX
8672   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8673             "jne,s   normal\n\t"
8674             "xorl    rdx, rdx\n\t"
8675             "cmpl    $div, -1\n\t"
8676             "je,s    done\n"
8677     "normal: cdql\n\t"
8678             "idivl   $div\n"
8679     "done:"        %}
8680   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8681   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8682   ins_pipe(pipe_slow);
8683 %}
8684 
8685 // Long DIVMOD with Register, both quotient and mod results
8686 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8687                              rFlagsReg cr)
8688 %{
8689   match(DivModL rax div);
8690   effect(KILL cr);
8691 
8692   ins_cost(30*100+10*100); // XXX
8693   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8694             "cmpq    rax, rdx\n\t"
8695             "jne,s   normal\n\t"
8696             "xorl    rdx, rdx\n\t"
8697             "cmpq    $div, -1\n\t"
8698             "je,s    done\n"
8699     "normal: cdqq\n\t"
8700             "idivq   $div\n"
8701     "done:"        %}
8702   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8703   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8704   ins_pipe(pipe_slow);
8705 %}
8706 
8707 //----------- DivL-By-Constant-Expansions--------------------------------------
8708 // DivI cases are handled by the compiler
8709 
8710 // Magic constant, reciprocal of 10
8711 instruct loadConL_0x6666666666666667(rRegL dst)
8712 %{
8713   effect(DEF dst);
8714 
8715   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8716   ins_encode(load_immL(dst, 0x6666666666666667));
8717   ins_pipe(ialu_reg);
8718 %}
8719 
8720 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8721 %{
8722   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8723 
8724   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8725   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8726   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8727   ins_pipe(ialu_reg_reg_alu0);
8728 %}
8729 
8730 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8731 %{
8732   effect(USE_DEF dst, KILL cr);
8733 
8734   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8735   opcode(0xC1, 0x7); /* C1 /7 ib */
8736   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8737   ins_pipe(ialu_reg);
8738 %}
8739 
8740 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8741 %{
8742   effect(USE_DEF dst, KILL cr);
8743 
8744   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8745   opcode(0xC1, 0x7); /* C1 /7 ib */
8746   ins_encode(reg_opc_imm_wide(dst, 0x2));
8747   ins_pipe(ialu_reg);
8748 %}
8749 
8750 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8751 %{
8752   match(Set dst (DivL src div));
8753 
8754   ins_cost((5+8)*100);
8755   expand %{
8756     rax_RegL rax;                     // Killed temp
8757     rFlagsReg cr;                     // Killed
8758     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8759     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8760     sarL_rReg_63(src, cr);            // sarq  src, 63
8761     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8762     subL_rReg(dst, src, cr);          // subl  rdx, src
8763   %}
8764 %}
8765 
8766 //-----------------------------------------------------------------------------
8767 
8768 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8769                    rFlagsReg cr)
8770 %{
8771   match(Set rdx (ModI rax div));
8772   effect(KILL rax, KILL cr);
8773 
8774   ins_cost(300); // XXX
8775   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8776             "jne,s   normal\n\t"
8777             "xorl    rdx, rdx\n\t"
8778             "cmpl    $div, -1\n\t"
8779             "je,s    done\n"
8780     "normal: cdql\n\t"
8781             "idivl   $div\n"
8782     "done:"        %}
8783   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8784   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8785   ins_pipe(ialu_reg_reg_alu0);
8786 %}
8787 
8788 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8789                    rFlagsReg cr)
8790 %{
8791   match(Set rdx (ModL rax div));
8792   effect(KILL rax, KILL cr);
8793 
8794   ins_cost(300); // XXX
8795   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8796             "cmpq    rax, rdx\n\t"
8797             "jne,s   normal\n\t"
8798             "xorl    rdx, rdx\n\t"
8799             "cmpq    $div, -1\n\t"
8800             "je,s    done\n"
8801     "normal: cdqq\n\t"
8802             "idivq   $div\n"
8803     "done:"        %}
8804   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8805   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8806   ins_pipe(ialu_reg_reg_alu0);
8807 %}
8808 
8809 // Integer Shift Instructions
8810 // Shift Left by one
8811 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8812 %{
8813   match(Set dst (LShiftI dst shift));
8814   effect(KILL cr);
8815 
8816   format %{ "sall    $dst, $shift" %}
8817   opcode(0xD1, 0x4); /* D1 /4 */
8818   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8819   ins_pipe(ialu_reg);
8820 %}
8821 
8822 // Shift Left by one
8823 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8824 %{
8825   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8826   effect(KILL cr);
8827 
8828   format %{ "sall    $dst, $shift\t" %}
8829   opcode(0xD1, 0x4); /* D1 /4 */
8830   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8831   ins_pipe(ialu_mem_imm);
8832 %}
8833 
8834 // Shift Left by 8-bit immediate
8835 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8836 %{
8837   match(Set dst (LShiftI dst shift));
8838   effect(KILL cr);
8839 
8840   format %{ "sall    $dst, $shift" %}
8841   opcode(0xC1, 0x4); /* C1 /4 ib */
8842   ins_encode(reg_opc_imm(dst, shift));
8843   ins_pipe(ialu_reg);
8844 %}
8845 
8846 // Shift Left by 8-bit immediate
8847 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8848 %{
8849   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8850   effect(KILL cr);
8851 
8852   format %{ "sall    $dst, $shift" %}
8853   opcode(0xC1, 0x4); /* C1 /4 ib */
8854   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8855   ins_pipe(ialu_mem_imm);
8856 %}
8857 
8858 // Shift Left by variable
8859 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8860 %{
8861   match(Set dst (LShiftI dst shift));
8862   effect(KILL cr);
8863 
8864   format %{ "sall    $dst, $shift" %}
8865   opcode(0xD3, 0x4); /* D3 /4 */
8866   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8867   ins_pipe(ialu_reg_reg);
8868 %}
8869 
8870 // Shift Left by variable
8871 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8872 %{
8873   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8874   effect(KILL cr);
8875 
8876   format %{ "sall    $dst, $shift" %}
8877   opcode(0xD3, 0x4); /* D3 /4 */
8878   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8879   ins_pipe(ialu_mem_reg);
8880 %}
8881 
8882 // Arithmetic shift right by one
8883 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8884 %{
8885   match(Set dst (RShiftI dst shift));
8886   effect(KILL cr);
8887 
8888   format %{ "sarl    $dst, $shift" %}
8889   opcode(0xD1, 0x7); /* D1 /7 */
8890   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8891   ins_pipe(ialu_reg);
8892 %}
8893 
8894 // Arithmetic shift right by one
8895 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8896 %{
8897   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8898   effect(KILL cr);
8899 
8900   format %{ "sarl    $dst, $shift" %}
8901   opcode(0xD1, 0x7); /* D1 /7 */
8902   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8903   ins_pipe(ialu_mem_imm);
8904 %}
8905 
8906 // Arithmetic Shift Right by 8-bit immediate
8907 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8908 %{
8909   match(Set dst (RShiftI dst shift));
8910   effect(KILL cr);
8911 
8912   format %{ "sarl    $dst, $shift" %}
8913   opcode(0xC1, 0x7); /* C1 /7 ib */
8914   ins_encode(reg_opc_imm(dst, shift));
8915   ins_pipe(ialu_mem_imm);
8916 %}
8917 
8918 // Arithmetic Shift Right by 8-bit immediate
8919 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8920 %{
8921   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8922   effect(KILL cr);
8923 
8924   format %{ "sarl    $dst, $shift" %}
8925   opcode(0xC1, 0x7); /* C1 /7 ib */
8926   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8927   ins_pipe(ialu_mem_imm);
8928 %}
8929 
8930 // Arithmetic Shift Right by variable
8931 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8932 %{
8933   match(Set dst (RShiftI dst shift));
8934   effect(KILL cr);
8935 
8936   format %{ "sarl    $dst, $shift" %}
8937   opcode(0xD3, 0x7); /* D3 /7 */
8938   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8939   ins_pipe(ialu_reg_reg);
8940 %}
8941 
8942 // Arithmetic Shift Right by variable
8943 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8944 %{
8945   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8946   effect(KILL cr);
8947 
8948   format %{ "sarl    $dst, $shift" %}
8949   opcode(0xD3, 0x7); /* D3 /7 */
8950   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8951   ins_pipe(ialu_mem_reg);
8952 %}
8953 
8954 // Logical shift right by one
8955 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8956 %{
8957   match(Set dst (URShiftI dst shift));
8958   effect(KILL cr);
8959 
8960   format %{ "shrl    $dst, $shift" %}
8961   opcode(0xD1, 0x5); /* D1 /5 */
8962   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8963   ins_pipe(ialu_reg);
8964 %}
8965 
8966 // Logical shift right by one
8967 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8968 %{
8969   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8970   effect(KILL cr);
8971 
8972   format %{ "shrl    $dst, $shift" %}
8973   opcode(0xD1, 0x5); /* D1 /5 */
8974   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8975   ins_pipe(ialu_mem_imm);
8976 %}
8977 
8978 // Logical Shift Right by 8-bit immediate
8979 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8980 %{
8981   match(Set dst (URShiftI dst shift));
8982   effect(KILL cr);
8983 
8984   format %{ "shrl    $dst, $shift" %}
8985   opcode(0xC1, 0x5); /* C1 /5 ib */
8986   ins_encode(reg_opc_imm(dst, shift));
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 // Logical Shift Right by 8-bit immediate
8991 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8992 %{
8993   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8994   effect(KILL cr);
8995 
8996   format %{ "shrl    $dst, $shift" %}
8997   opcode(0xC1, 0x5); /* C1 /5 ib */
8998   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8999   ins_pipe(ialu_mem_imm);
9000 %}
9001 
9002 // Logical Shift Right by variable
9003 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9004 %{
9005   match(Set dst (URShiftI dst shift));
9006   effect(KILL cr);
9007 
9008   format %{ "shrl    $dst, $shift" %}
9009   opcode(0xD3, 0x5); /* D3 /5 */
9010   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9011   ins_pipe(ialu_reg_reg);
9012 %}
9013 
9014 // Logical Shift Right by variable
9015 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9016 %{
9017   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9018   effect(KILL cr);
9019 
9020   format %{ "shrl    $dst, $shift" %}
9021   opcode(0xD3, 0x5); /* D3 /5 */
9022   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9023   ins_pipe(ialu_mem_reg);
9024 %}
9025 
9026 // Long Shift Instructions
9027 // Shift Left by one
9028 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9029 %{
9030   match(Set dst (LShiftL dst shift));
9031   effect(KILL cr);
9032 
9033   format %{ "salq    $dst, $shift" %}
9034   opcode(0xD1, 0x4); /* D1 /4 */
9035   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9036   ins_pipe(ialu_reg);
9037 %}
9038 
9039 // Shift Left by one
9040 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9041 %{
9042   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9043   effect(KILL cr);
9044 
9045   format %{ "salq    $dst, $shift" %}
9046   opcode(0xD1, 0x4); /* D1 /4 */
9047   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9048   ins_pipe(ialu_mem_imm);
9049 %}
9050 
9051 // Shift Left by 8-bit immediate
9052 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9053 %{
9054   match(Set dst (LShiftL dst shift));
9055   effect(KILL cr);
9056 
9057   format %{ "salq    $dst, $shift" %}
9058   opcode(0xC1, 0x4); /* C1 /4 ib */
9059   ins_encode(reg_opc_imm_wide(dst, shift));
9060   ins_pipe(ialu_reg);
9061 %}
9062 
9063 // Shift Left by 8-bit immediate
9064 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9065 %{
9066   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9067   effect(KILL cr);
9068 
9069   format %{ "salq    $dst, $shift" %}
9070   opcode(0xC1, 0x4); /* C1 /4 ib */
9071   ins_encode(REX_mem_wide(dst), OpcP,
9072              RM_opc_mem(secondary, dst), Con8or32(shift));
9073   ins_pipe(ialu_mem_imm);
9074 %}
9075 
9076 // Shift Left by variable
9077 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9078 %{
9079   match(Set dst (LShiftL dst shift));
9080   effect(KILL cr);
9081 
9082   format %{ "salq    $dst, $shift" %}
9083   opcode(0xD3, 0x4); /* D3 /4 */
9084   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9085   ins_pipe(ialu_reg_reg);
9086 %}
9087 
9088 // Shift Left by variable
9089 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9090 %{
9091   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9092   effect(KILL cr);
9093 
9094   format %{ "salq    $dst, $shift" %}
9095   opcode(0xD3, 0x4); /* D3 /4 */
9096   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9097   ins_pipe(ialu_mem_reg);
9098 %}
9099 
9100 // Arithmetic shift right by one
9101 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9102 %{
9103   match(Set dst (RShiftL dst shift));
9104   effect(KILL cr);
9105 
9106   format %{ "sarq    $dst, $shift" %}
9107   opcode(0xD1, 0x7); /* D1 /7 */
9108   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9109   ins_pipe(ialu_reg);
9110 %}
9111 
9112 // Arithmetic shift right by one
9113 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9114 %{
9115   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9116   effect(KILL cr);
9117 
9118   format %{ "sarq    $dst, $shift" %}
9119   opcode(0xD1, 0x7); /* D1 /7 */
9120   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9121   ins_pipe(ialu_mem_imm);
9122 %}
9123 
9124 // Arithmetic Shift Right by 8-bit immediate
9125 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9126 %{
9127   match(Set dst (RShiftL dst shift));
9128   effect(KILL cr);
9129 
9130   format %{ "sarq    $dst, $shift" %}
9131   opcode(0xC1, 0x7); /* C1 /7 ib */
9132   ins_encode(reg_opc_imm_wide(dst, shift));
9133   ins_pipe(ialu_mem_imm);
9134 %}
9135 
9136 // Arithmetic Shift Right by 8-bit immediate
9137 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9138 %{
9139   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9140   effect(KILL cr);
9141 
9142   format %{ "sarq    $dst, $shift" %}
9143   opcode(0xC1, 0x7); /* C1 /7 ib */
9144   ins_encode(REX_mem_wide(dst), OpcP,
9145              RM_opc_mem(secondary, dst), Con8or32(shift));
9146   ins_pipe(ialu_mem_imm);
9147 %}
9148 
9149 // Arithmetic Shift Right by variable
9150 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9151 %{
9152   match(Set dst (RShiftL dst shift));
9153   effect(KILL cr);
9154 
9155   format %{ "sarq    $dst, $shift" %}
9156   opcode(0xD3, 0x7); /* D3 /7 */
9157   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9158   ins_pipe(ialu_reg_reg);
9159 %}
9160 
9161 // Arithmetic Shift Right by variable
9162 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9163 %{
9164   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9165   effect(KILL cr);
9166 
9167   format %{ "sarq    $dst, $shift" %}
9168   opcode(0xD3, 0x7); /* D3 /7 */
9169   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9170   ins_pipe(ialu_mem_reg);
9171 %}
9172 
9173 // Logical shift right by one
9174 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9175 %{
9176   match(Set dst (URShiftL dst shift));
9177   effect(KILL cr);
9178 
9179   format %{ "shrq    $dst, $shift" %}
9180   opcode(0xD1, 0x5); /* D1 /5 */
9181   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9182   ins_pipe(ialu_reg);
9183 %}
9184 
9185 // Logical shift right by one
9186 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9187 %{
9188   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9189   effect(KILL cr);
9190 
9191   format %{ "shrq    $dst, $shift" %}
9192   opcode(0xD1, 0x5); /* D1 /5 */
9193   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9194   ins_pipe(ialu_mem_imm);
9195 %}
9196 
9197 // Logical Shift Right by 8-bit immediate
9198 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9199 %{
9200   match(Set dst (URShiftL dst shift));
9201   effect(KILL cr);
9202 
9203   format %{ "shrq    $dst, $shift" %}
9204   opcode(0xC1, 0x5); /* C1 /5 ib */
9205   ins_encode(reg_opc_imm_wide(dst, shift));
9206   ins_pipe(ialu_reg);
9207 %}
9208 
9209 
9210 // Logical Shift Right by 8-bit immediate
9211 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9212 %{
9213   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9214   effect(KILL cr);
9215 
9216   format %{ "shrq    $dst, $shift" %}
9217   opcode(0xC1, 0x5); /* C1 /5 ib */
9218   ins_encode(REX_mem_wide(dst), OpcP,
9219              RM_opc_mem(secondary, dst), Con8or32(shift));
9220   ins_pipe(ialu_mem_imm);
9221 %}
9222 
9223 // Logical Shift Right by variable
9224 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9225 %{
9226   match(Set dst (URShiftL dst shift));
9227   effect(KILL cr);
9228 
9229   format %{ "shrq    $dst, $shift" %}
9230   opcode(0xD3, 0x5); /* D3 /5 */
9231   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9232   ins_pipe(ialu_reg_reg);
9233 %}
9234 
9235 // Logical Shift Right by variable
9236 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9237 %{
9238   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9239   effect(KILL cr);
9240 
9241   format %{ "shrq    $dst, $shift" %}
9242   opcode(0xD3, 0x5); /* D3 /5 */
9243   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9244   ins_pipe(ialu_mem_reg);
9245 %}
9246 
9247 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9248 // This idiom is used by the compiler for the i2b bytecode.
9249 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9250 %{
9251   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9252 
9253   format %{ "movsbl  $dst, $src\t# i2b" %}
9254   opcode(0x0F, 0xBE);
9255   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9256   ins_pipe(ialu_reg_reg);
9257 %}
9258 
9259 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9260 // This idiom is used by the compiler the i2s bytecode.
9261 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9262 %{
9263   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9264 
9265   format %{ "movswl  $dst, $src\t# i2s" %}
9266   opcode(0x0F, 0xBF);
9267   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9268   ins_pipe(ialu_reg_reg);
9269 %}
9270 
9271 // ROL/ROR instructions
9272 
9273 // ROL expand
9274 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9275   effect(KILL cr, USE_DEF dst);
9276 
9277   format %{ "roll    $dst" %}
9278   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9279   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9280   ins_pipe(ialu_reg);
9281 %}
9282 
9283 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9284   effect(USE_DEF dst, USE shift, KILL cr);
9285 
9286   format %{ "roll    $dst, $shift" %}
9287   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9288   ins_encode( reg_opc_imm(dst, shift) );
9289   ins_pipe(ialu_reg);
9290 %}
9291 
9292 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9293 %{
9294   effect(USE_DEF dst, USE shift, KILL cr);
9295 
9296   format %{ "roll    $dst, $shift" %}
9297   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9298   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9299   ins_pipe(ialu_reg_reg);
9300 %}
9301 // end of ROL expand
9302 
9303 // Rotate Left by one
9304 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9305 %{
9306   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9307 
9308   expand %{
9309     rolI_rReg_imm1(dst, cr);
9310   %}
9311 %}
9312 
9313 // Rotate Left by 8-bit immediate
9314 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9315 %{
9316   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9317   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9318 
9319   expand %{
9320     rolI_rReg_imm8(dst, lshift, cr);
9321   %}
9322 %}
9323 
9324 // Rotate Left by variable
9325 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9326 %{
9327   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9328 
9329   expand %{
9330     rolI_rReg_CL(dst, shift, cr);
9331   %}
9332 %}
9333 
9334 // Rotate Left by variable
9335 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9336 %{
9337   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9338 
9339   expand %{
9340     rolI_rReg_CL(dst, shift, cr);
9341   %}
9342 %}
9343 
9344 // ROR expand
9345 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9346 %{
9347   effect(USE_DEF dst, KILL cr);
9348 
9349   format %{ "rorl    $dst" %}
9350   opcode(0xD1, 0x1); /* D1 /1 */
9351   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9352   ins_pipe(ialu_reg);
9353 %}
9354 
9355 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9356 %{
9357   effect(USE_DEF dst, USE shift, KILL cr);
9358 
9359   format %{ "rorl    $dst, $shift" %}
9360   opcode(0xC1, 0x1); /* C1 /1 ib */
9361   ins_encode(reg_opc_imm(dst, shift));
9362   ins_pipe(ialu_reg);
9363 %}
9364 
9365 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9366 %{
9367   effect(USE_DEF dst, USE shift, KILL cr);
9368 
9369   format %{ "rorl    $dst, $shift" %}
9370   opcode(0xD3, 0x1); /* D3 /1 */
9371   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9372   ins_pipe(ialu_reg_reg);
9373 %}
9374 // end of ROR expand
9375 
9376 // Rotate Right by one
9377 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9378 %{
9379   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9380 
9381   expand %{
9382     rorI_rReg_imm1(dst, cr);
9383   %}
9384 %}
9385 
9386 // Rotate Right by 8-bit immediate
9387 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9388 %{
9389   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9390   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9391 
9392   expand %{
9393     rorI_rReg_imm8(dst, rshift, cr);
9394   %}
9395 %}
9396 
9397 // Rotate Right by variable
9398 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9399 %{
9400   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9401 
9402   expand %{
9403     rorI_rReg_CL(dst, shift, cr);
9404   %}
9405 %}
9406 
9407 // Rotate Right by variable
9408 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9409 %{
9410   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9411 
9412   expand %{
9413     rorI_rReg_CL(dst, shift, cr);
9414   %}
9415 %}
9416 
9417 // for long rotate
9418 // ROL expand
9419 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9420   effect(USE_DEF dst, KILL cr);
9421 
9422   format %{ "rolq    $dst" %}
9423   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9424   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9425   ins_pipe(ialu_reg);
9426 %}
9427 
9428 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9429   effect(USE_DEF dst, USE shift, KILL cr);
9430 
9431   format %{ "rolq    $dst, $shift" %}
9432   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9433   ins_encode( reg_opc_imm_wide(dst, shift) );
9434   ins_pipe(ialu_reg);
9435 %}
9436 
9437 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9438 %{
9439   effect(USE_DEF dst, USE shift, KILL cr);
9440 
9441   format %{ "rolq    $dst, $shift" %}
9442   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9443   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9444   ins_pipe(ialu_reg_reg);
9445 %}
9446 // end of ROL expand
9447 
9448 // Rotate Left by one
9449 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9450 %{
9451   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9452 
9453   expand %{
9454     rolL_rReg_imm1(dst, cr);
9455   %}
9456 %}
9457 
9458 // Rotate Left by 8-bit immediate
9459 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9460 %{
9461   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9462   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9463 
9464   expand %{
9465     rolL_rReg_imm8(dst, lshift, cr);
9466   %}
9467 %}
9468 
9469 // Rotate Left by variable
9470 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9471 %{
9472   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9473 
9474   expand %{
9475     rolL_rReg_CL(dst, shift, cr);
9476   %}
9477 %}
9478 
9479 // Rotate Left by variable
9480 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9481 %{
9482   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9483 
9484   expand %{
9485     rolL_rReg_CL(dst, shift, cr);
9486   %}
9487 %}
9488 
9489 // ROR expand
9490 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9491 %{
9492   effect(USE_DEF dst, KILL cr);
9493 
9494   format %{ "rorq    $dst" %}
9495   opcode(0xD1, 0x1); /* D1 /1 */
9496   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9497   ins_pipe(ialu_reg);
9498 %}
9499 
9500 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9501 %{
9502   effect(USE_DEF dst, USE shift, KILL cr);
9503 
9504   format %{ "rorq    $dst, $shift" %}
9505   opcode(0xC1, 0x1); /* C1 /1 ib */
9506   ins_encode(reg_opc_imm_wide(dst, shift));
9507   ins_pipe(ialu_reg);
9508 %}
9509 
9510 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9511 %{
9512   effect(USE_DEF dst, USE shift, KILL cr);
9513 
9514   format %{ "rorq    $dst, $shift" %}
9515   opcode(0xD3, 0x1); /* D3 /1 */
9516   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9517   ins_pipe(ialu_reg_reg);
9518 %}
9519 // end of ROR expand
9520 
9521 // Rotate Right by one
9522 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9523 %{
9524   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9525 
9526   expand %{
9527     rorL_rReg_imm1(dst, cr);
9528   %}
9529 %}
9530 
9531 // Rotate Right by 8-bit immediate
9532 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9533 %{
9534   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9535   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9536 
9537   expand %{
9538     rorL_rReg_imm8(dst, rshift, cr);
9539   %}
9540 %}
9541 
9542 // Rotate Right by variable
9543 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9544 %{
9545   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9546 
9547   expand %{
9548     rorL_rReg_CL(dst, shift, cr);
9549   %}
9550 %}
9551 
9552 // Rotate Right by variable
9553 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9554 %{
9555   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9556 
9557   expand %{
9558     rorL_rReg_CL(dst, shift, cr);
9559   %}
9560 %}
9561 
9562 // Logical Instructions
9563 
9564 // Integer Logical Instructions
9565 
9566 // And Instructions
9567 // And Register with Register
9568 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9569 %{
9570   match(Set dst (AndI dst src));
9571   effect(KILL cr);
9572 
9573   format %{ "andl    $dst, $src\t# int" %}
9574   opcode(0x23);
9575   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9576   ins_pipe(ialu_reg_reg);
9577 %}
9578 
9579 // And Register with Immediate 255
9580 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9581 %{
9582   match(Set dst (AndI dst src));
9583 
9584   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9585   opcode(0x0F, 0xB6);
9586   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9587   ins_pipe(ialu_reg);
9588 %}
9589 
9590 // And Register with Immediate 255 and promote to long
9591 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9592 %{
9593   match(Set dst (ConvI2L (AndI src mask)));
9594 
9595   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9596   opcode(0x0F, 0xB6);
9597   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9598   ins_pipe(ialu_reg);
9599 %}
9600 
9601 // And Register with Immediate 65535
9602 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9603 %{
9604   match(Set dst (AndI dst src));
9605 
9606   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9607   opcode(0x0F, 0xB7);
9608   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9609   ins_pipe(ialu_reg);
9610 %}
9611 
9612 // And Register with Immediate 65535 and promote to long
9613 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9614 %{
9615   match(Set dst (ConvI2L (AndI src mask)));
9616 
9617   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9618   opcode(0x0F, 0xB7);
9619   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9620   ins_pipe(ialu_reg);
9621 %}
9622 
9623 // And Register with Immediate
9624 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9625 %{
9626   match(Set dst (AndI dst src));
9627   effect(KILL cr);
9628 
9629   format %{ "andl    $dst, $src\t# int" %}
9630   opcode(0x81, 0x04); /* Opcode 81 /4 */
9631   ins_encode(OpcSErm(dst, src), Con8or32(src));
9632   ins_pipe(ialu_reg);
9633 %}
9634 
9635 // And Register with Memory
9636 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9637 %{
9638   match(Set dst (AndI dst (LoadI src)));
9639   effect(KILL cr);
9640 
9641   ins_cost(125);
9642   format %{ "andl    $dst, $src\t# int" %}
9643   opcode(0x23);
9644   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9645   ins_pipe(ialu_reg_mem);
9646 %}
9647 
9648 // And Memory with Register
9649 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9650 %{
9651   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9652   effect(KILL cr);
9653 
9654   ins_cost(150);
9655   format %{ "andl    $dst, $src\t# int" %}
9656   opcode(0x21); /* Opcode 21 /r */
9657   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9658   ins_pipe(ialu_mem_reg);
9659 %}
9660 
9661 // And Memory with Immediate
9662 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9663 %{
9664   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9665   effect(KILL cr);
9666 
9667   ins_cost(125);
9668   format %{ "andl    $dst, $src\t# int" %}
9669   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9670   ins_encode(REX_mem(dst), OpcSE(src),
9671              RM_opc_mem(secondary, dst), Con8or32(src));
9672   ins_pipe(ialu_mem_imm);
9673 %}
9674 
9675 // Or Instructions
9676 // Or Register with Register
9677 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9678 %{
9679   match(Set dst (OrI dst src));
9680   effect(KILL cr);
9681 
9682   format %{ "orl     $dst, $src\t# int" %}
9683   opcode(0x0B);
9684   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9685   ins_pipe(ialu_reg_reg);
9686 %}
9687 
9688 // Or Register with Immediate
9689 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9690 %{
9691   match(Set dst (OrI dst src));
9692   effect(KILL cr);
9693 
9694   format %{ "orl     $dst, $src\t# int" %}
9695   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9696   ins_encode(OpcSErm(dst, src), Con8or32(src));
9697   ins_pipe(ialu_reg);
9698 %}
9699 
9700 // Or Register with Memory
9701 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9702 %{
9703   match(Set dst (OrI dst (LoadI src)));
9704   effect(KILL cr);
9705 
9706   ins_cost(125);
9707   format %{ "orl     $dst, $src\t# int" %}
9708   opcode(0x0B);
9709   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9710   ins_pipe(ialu_reg_mem);
9711 %}
9712 
9713 // Or Memory with Register
9714 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9715 %{
9716   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9717   effect(KILL cr);
9718 
9719   ins_cost(150);
9720   format %{ "orl     $dst, $src\t# int" %}
9721   opcode(0x09); /* Opcode 09 /r */
9722   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9723   ins_pipe(ialu_mem_reg);
9724 %}
9725 
9726 // Or Memory with Immediate
9727 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9728 %{
9729   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9730   effect(KILL cr);
9731 
9732   ins_cost(125);
9733   format %{ "orl     $dst, $src\t# int" %}
9734   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9735   ins_encode(REX_mem(dst), OpcSE(src),
9736              RM_opc_mem(secondary, dst), Con8or32(src));
9737   ins_pipe(ialu_mem_imm);
9738 %}
9739 
9740 // Xor Instructions
9741 // Xor Register with Register
9742 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9743 %{
9744   match(Set dst (XorI dst src));
9745   effect(KILL cr);
9746 
9747   format %{ "xorl    $dst, $src\t# int" %}
9748   opcode(0x33);
9749   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9750   ins_pipe(ialu_reg_reg);
9751 %}
9752 
9753 // Xor Register with Immediate -1
9754 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9755   match(Set dst (XorI dst imm));
9756 
9757   format %{ "not    $dst" %}
9758   ins_encode %{
9759      __ notl($dst$$Register);
9760   %}
9761   ins_pipe(ialu_reg);
9762 %}
9763 
9764 // Xor Register with Immediate
9765 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9766 %{
9767   match(Set dst (XorI dst src));
9768   effect(KILL cr);
9769 
9770   format %{ "xorl    $dst, $src\t# int" %}
9771   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9772   ins_encode(OpcSErm(dst, src), Con8or32(src));
9773   ins_pipe(ialu_reg);
9774 %}
9775 
9776 // Xor Register with Memory
9777 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9778 %{
9779   match(Set dst (XorI dst (LoadI src)));
9780   effect(KILL cr);
9781 
9782   ins_cost(125);
9783   format %{ "xorl    $dst, $src\t# int" %}
9784   opcode(0x33);
9785   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9786   ins_pipe(ialu_reg_mem);
9787 %}
9788 
9789 // Xor Memory with Register
9790 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9791 %{
9792   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9793   effect(KILL cr);
9794 
9795   ins_cost(150);
9796   format %{ "xorl    $dst, $src\t# int" %}
9797   opcode(0x31); /* Opcode 31 /r */
9798   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9799   ins_pipe(ialu_mem_reg);
9800 %}
9801 
9802 // Xor Memory with Immediate
9803 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9804 %{
9805   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9806   effect(KILL cr);
9807 
9808   ins_cost(125);
9809   format %{ "xorl    $dst, $src\t# int" %}
9810   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9811   ins_encode(REX_mem(dst), OpcSE(src),
9812              RM_opc_mem(secondary, dst), Con8or32(src));
9813   ins_pipe(ialu_mem_imm);
9814 %}
9815 
9816 
9817 // Long Logical Instructions
9818 
9819 // And Instructions
9820 // And Register with Register
9821 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9822 %{
9823   match(Set dst (AndL dst src));
9824   effect(KILL cr);
9825 
9826   format %{ "andq    $dst, $src\t# long" %}
9827   opcode(0x23);
9828   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9829   ins_pipe(ialu_reg_reg);
9830 %}
9831 
9832 // And Register with Immediate 255
9833 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9834 %{
9835   match(Set dst (AndL dst src));
9836 
9837   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9838   opcode(0x0F, 0xB6);
9839   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9840   ins_pipe(ialu_reg);
9841 %}
9842 
9843 // And Register with Immediate 65535
9844 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9845 %{
9846   match(Set dst (AndL dst src));
9847 
9848   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9849   opcode(0x0F, 0xB7);
9850   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9851   ins_pipe(ialu_reg);
9852 %}
9853 
9854 // And Register with Immediate
9855 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9856 %{
9857   match(Set dst (AndL dst src));
9858   effect(KILL cr);
9859 
9860   format %{ "andq    $dst, $src\t# long" %}
9861   opcode(0x81, 0x04); /* Opcode 81 /4 */
9862   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9863   ins_pipe(ialu_reg);
9864 %}
9865 
9866 // And Register with Memory
9867 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9868 %{
9869   match(Set dst (AndL dst (LoadL src)));
9870   effect(KILL cr);
9871 
9872   ins_cost(125);
9873   format %{ "andq    $dst, $src\t# long" %}
9874   opcode(0x23);
9875   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9876   ins_pipe(ialu_reg_mem);
9877 %}
9878 
9879 // And Memory with Register
9880 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9881 %{
9882   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9883   effect(KILL cr);
9884 
9885   ins_cost(150);
9886   format %{ "andq    $dst, $src\t# long" %}
9887   opcode(0x21); /* Opcode 21 /r */
9888   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9889   ins_pipe(ialu_mem_reg);
9890 %}
9891 
9892 // And Memory with Immediate
9893 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9894 %{
9895   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9896   effect(KILL cr);
9897 
9898   ins_cost(125);
9899   format %{ "andq    $dst, $src\t# long" %}
9900   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9901   ins_encode(REX_mem_wide(dst), OpcSE(src),
9902              RM_opc_mem(secondary, dst), Con8or32(src));
9903   ins_pipe(ialu_mem_imm);
9904 %}
9905 
9906 // Or Instructions
9907 // Or Register with Register
9908 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9909 %{
9910   match(Set dst (OrL dst src));
9911   effect(KILL cr);
9912 
9913   format %{ "orq     $dst, $src\t# long" %}
9914   opcode(0x0B);
9915   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9916   ins_pipe(ialu_reg_reg);
9917 %}
9918 
9919 // Use any_RegP to match R15 (TLS register) without spilling.
9920 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9921   match(Set dst (OrL dst (CastP2X src)));
9922   effect(KILL cr);
9923 
9924   format %{ "orq     $dst, $src\t# long" %}
9925   opcode(0x0B);
9926   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9927   ins_pipe(ialu_reg_reg);
9928 %}
9929 
9930 
9931 // Or Register with Immediate
9932 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9933 %{
9934   match(Set dst (OrL dst src));
9935   effect(KILL cr);
9936 
9937   format %{ "orq     $dst, $src\t# long" %}
9938   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9939   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9940   ins_pipe(ialu_reg);
9941 %}
9942 
9943 // Or Register with Memory
9944 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9945 %{
9946   match(Set dst (OrL dst (LoadL src)));
9947   effect(KILL cr);
9948 
9949   ins_cost(125);
9950   format %{ "orq     $dst, $src\t# long" %}
9951   opcode(0x0B);
9952   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9953   ins_pipe(ialu_reg_mem);
9954 %}
9955 
9956 // Or Memory with Register
9957 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9958 %{
9959   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9960   effect(KILL cr);
9961 
9962   ins_cost(150);
9963   format %{ "orq     $dst, $src\t# long" %}
9964   opcode(0x09); /* Opcode 09 /r */
9965   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9966   ins_pipe(ialu_mem_reg);
9967 %}
9968 
9969 // Or Memory with Immediate
9970 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9971 %{
9972   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9973   effect(KILL cr);
9974 
9975   ins_cost(125);
9976   format %{ "orq     $dst, $src\t# long" %}
9977   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9978   ins_encode(REX_mem_wide(dst), OpcSE(src),
9979              RM_opc_mem(secondary, dst), Con8or32(src));
9980   ins_pipe(ialu_mem_imm);
9981 %}
9982 
9983 // Xor Instructions
9984 // Xor Register with Register
9985 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9986 %{
9987   match(Set dst (XorL dst src));
9988   effect(KILL cr);
9989 
9990   format %{ "xorq    $dst, $src\t# long" %}
9991   opcode(0x33);
9992   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9993   ins_pipe(ialu_reg_reg);
9994 %}
9995 
9996 // Xor Register with Immediate -1
9997 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9998   match(Set dst (XorL dst imm));
9999 
10000   format %{ "notq   $dst" %}
10001   ins_encode %{
10002      __ notq($dst$$Register);
10003   %}
10004   ins_pipe(ialu_reg);
10005 %}
10006 
10007 // Xor Register with Immediate
10008 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10009 %{
10010   match(Set dst (XorL dst src));
10011   effect(KILL cr);
10012 
10013   format %{ "xorq    $dst, $src\t# long" %}
10014   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10015   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10016   ins_pipe(ialu_reg);
10017 %}
10018 
10019 // Xor Register with Memory
10020 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10021 %{
10022   match(Set dst (XorL dst (LoadL src)));
10023   effect(KILL cr);
10024 
10025   ins_cost(125);
10026   format %{ "xorq    $dst, $src\t# long" %}
10027   opcode(0x33);
10028   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10029   ins_pipe(ialu_reg_mem);
10030 %}
10031 
10032 // Xor Memory with Register
10033 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10034 %{
10035   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10036   effect(KILL cr);
10037 
10038   ins_cost(150);
10039   format %{ "xorq    $dst, $src\t# long" %}
10040   opcode(0x31); /* Opcode 31 /r */
10041   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10042   ins_pipe(ialu_mem_reg);
10043 %}
10044 
10045 // Xor Memory with Immediate
10046 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10047 %{
10048   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10049   effect(KILL cr);
10050 
10051   ins_cost(125);
10052   format %{ "xorq    $dst, $src\t# long" %}
10053   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10054   ins_encode(REX_mem_wide(dst), OpcSE(src),
10055              RM_opc_mem(secondary, dst), Con8or32(src));
10056   ins_pipe(ialu_mem_imm);
10057 %}
10058 
10059 // Convert Int to Boolean
10060 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10061 %{
10062   match(Set dst (Conv2B src));
10063   effect(KILL cr);
10064 
10065   format %{ "testl   $src, $src\t# ci2b\n\t"
10066             "setnz   $dst\n\t"
10067             "movzbl  $dst, $dst" %}
10068   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10069              setNZ_reg(dst),
10070              REX_reg_breg(dst, dst), // movzbl
10071              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10072   ins_pipe(pipe_slow); // XXX
10073 %}
10074 
10075 // Convert Pointer to Boolean
10076 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10077 %{
10078   match(Set dst (Conv2B src));
10079   effect(KILL cr);
10080 
10081   format %{ "testq   $src, $src\t# cp2b\n\t"
10082             "setnz   $dst\n\t"
10083             "movzbl  $dst, $dst" %}
10084   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10085              setNZ_reg(dst),
10086              REX_reg_breg(dst, dst), // movzbl
10087              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10088   ins_pipe(pipe_slow); // XXX
10089 %}
10090 
10091 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10092 %{
10093   match(Set dst (CmpLTMask p q));
10094   effect(KILL cr);
10095 
10096   ins_cost(400); // XXX
10097   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10098             "setlt   $dst\n\t"
10099             "movzbl  $dst, $dst\n\t"
10100             "negl    $dst" %}
10101   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10102              setLT_reg(dst),
10103              REX_reg_breg(dst, dst), // movzbl
10104              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10105              neg_reg(dst));
10106   ins_pipe(pipe_slow);
10107 %}
10108 
10109 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10110 %{
10111   match(Set dst (CmpLTMask dst zero));
10112   effect(KILL cr);
10113 
10114   ins_cost(100); // XXX
10115   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10116   opcode(0xC1, 0x7);  /* C1 /7 ib */
10117   ins_encode(reg_opc_imm(dst, 0x1F));
10118   ins_pipe(ialu_reg);
10119 %}
10120 
10121 
10122 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
10123 %{
10124   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10125   effect(TEMP tmp, KILL cr);
10126 
10127   ins_cost(400); // XXX
10128   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10129             "sbbl    $tmp, $tmp\n\t"
10130             "andl    $tmp, $y\n\t"
10131             "addl    $p, $tmp" %}
10132   ins_encode %{
10133     Register Rp = $p$$Register;
10134     Register Rq = $q$$Register;
10135     Register Ry = $y$$Register;
10136     Register Rt = $tmp$$Register;
10137     __ subl(Rp, Rq);
10138     __ sbbl(Rt, Rt);
10139     __ andl(Rt, Ry);
10140     __ addl(Rp, Rt);
10141   %}
10142   ins_pipe(pipe_cmplt);
10143 %}
10144 
10145 //---------- FP Instructions------------------------------------------------
10146 
10147 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10148 %{
10149   match(Set cr (CmpF src1 src2));
10150 
10151   ins_cost(145);
10152   format %{ "ucomiss $src1, $src2\n\t"
10153             "jnp,s   exit\n\t"
10154             "pushfq\t# saw NaN, set CF\n\t"
10155             "andq    [rsp], #0xffffff2b\n\t"
10156             "popfq\n"
10157     "exit:   nop\t# avoid branch to branch" %}
10158   opcode(0x0F, 0x2E);
10159   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10160              cmpfp_fixup);
10161   ins_pipe(pipe_slow);
10162 %}
10163 
10164 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10165   match(Set cr (CmpF src1 src2));
10166 
10167   ins_cost(145);
10168   format %{ "ucomiss $src1, $src2" %}
10169   ins_encode %{
10170     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10171   %}
10172   ins_pipe(pipe_slow);
10173 %}
10174 
10175 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10176 %{
10177   match(Set cr (CmpF src1 (LoadF src2)));
10178 
10179   ins_cost(145);
10180   format %{ "ucomiss $src1, $src2\n\t"
10181             "jnp,s   exit\n\t"
10182             "pushfq\t# saw NaN, set CF\n\t"
10183             "andq    [rsp], #0xffffff2b\n\t"
10184             "popfq\n"
10185     "exit:   nop\t# avoid branch to branch" %}
10186   opcode(0x0F, 0x2E);
10187   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10188              cmpfp_fixup);
10189   ins_pipe(pipe_slow);
10190 %}
10191 
10192 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10193   match(Set cr (CmpF src1 (LoadF src2)));
10194 
10195   ins_cost(100);
10196   format %{ "ucomiss $src1, $src2" %}
10197   opcode(0x0F, 0x2E);
10198   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10199   ins_pipe(pipe_slow);
10200 %}
10201 
10202 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10203   match(Set cr (CmpF src con));
10204 
10205   ins_cost(145);
10206   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10207             "jnp,s   exit\n\t"
10208             "pushfq\t# saw NaN, set CF\n\t"
10209             "andq    [rsp], #0xffffff2b\n\t"
10210             "popfq\n"
10211     "exit:   nop\t# avoid branch to branch" %}
10212   ins_encode %{
10213     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10214     emit_cmpfp_fixup(_masm);
10215   %}
10216   ins_pipe(pipe_slow);
10217 %}
10218 
10219 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10220   match(Set cr (CmpF src con));
10221   ins_cost(100);
10222   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10223   ins_encode %{
10224     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10225   %}
10226   ins_pipe(pipe_slow);
10227 %}
10228 
10229 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10230 %{
10231   match(Set cr (CmpD src1 src2));
10232 
10233   ins_cost(145);
10234   format %{ "ucomisd $src1, $src2\n\t"
10235             "jnp,s   exit\n\t"
10236             "pushfq\t# saw NaN, set CF\n\t"
10237             "andq    [rsp], #0xffffff2b\n\t"
10238             "popfq\n"
10239     "exit:   nop\t# avoid branch to branch" %}
10240   opcode(0x66, 0x0F, 0x2E);
10241   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10242              cmpfp_fixup);
10243   ins_pipe(pipe_slow);
10244 %}
10245 
10246 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10247   match(Set cr (CmpD src1 src2));
10248 
10249   ins_cost(100);
10250   format %{ "ucomisd $src1, $src2 test" %}
10251   ins_encode %{
10252     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10253   %}
10254   ins_pipe(pipe_slow);
10255 %}
10256 
10257 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10258 %{
10259   match(Set cr (CmpD src1 (LoadD src2)));
10260 
10261   ins_cost(145);
10262   format %{ "ucomisd $src1, $src2\n\t"
10263             "jnp,s   exit\n\t"
10264             "pushfq\t# saw NaN, set CF\n\t"
10265             "andq    [rsp], #0xffffff2b\n\t"
10266             "popfq\n"
10267     "exit:   nop\t# avoid branch to branch" %}
10268   opcode(0x66, 0x0F, 0x2E);
10269   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10270              cmpfp_fixup);
10271   ins_pipe(pipe_slow);
10272 %}
10273 
10274 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10275   match(Set cr (CmpD src1 (LoadD src2)));
10276 
10277   ins_cost(100);
10278   format %{ "ucomisd $src1, $src2" %}
10279   opcode(0x66, 0x0F, 0x2E);
10280   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10281   ins_pipe(pipe_slow);
10282 %}
10283 
10284 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10285   match(Set cr (CmpD src con));
10286 
10287   ins_cost(145);
10288   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10289             "jnp,s   exit\n\t"
10290             "pushfq\t# saw NaN, set CF\n\t"
10291             "andq    [rsp], #0xffffff2b\n\t"
10292             "popfq\n"
10293     "exit:   nop\t# avoid branch to branch" %}
10294   ins_encode %{
10295     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10296     emit_cmpfp_fixup(_masm);
10297   %}
10298   ins_pipe(pipe_slow);
10299 %}
10300 
10301 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10302   match(Set cr (CmpD src con));
10303   ins_cost(100);
10304   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10305   ins_encode %{
10306     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10307   %}
10308   ins_pipe(pipe_slow);
10309 %}
10310 
10311 // Compare into -1,0,1
10312 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10313 %{
10314   match(Set dst (CmpF3 src1 src2));
10315   effect(KILL cr);
10316 
10317   ins_cost(275);
10318   format %{ "ucomiss $src1, $src2\n\t"
10319             "movl    $dst, #-1\n\t"
10320             "jp,s    done\n\t"
10321             "jb,s    done\n\t"
10322             "setne   $dst\n\t"
10323             "movzbl  $dst, $dst\n"
10324     "done:" %}
10325 
10326   opcode(0x0F, 0x2E);
10327   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10328              cmpfp3(dst));
10329   ins_pipe(pipe_slow);
10330 %}
10331 
10332 // Compare into -1,0,1
10333 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10334 %{
10335   match(Set dst (CmpF3 src1 (LoadF src2)));
10336   effect(KILL cr);
10337 
10338   ins_cost(275);
10339   format %{ "ucomiss $src1, $src2\n\t"
10340             "movl    $dst, #-1\n\t"
10341             "jp,s    done\n\t"
10342             "jb,s    done\n\t"
10343             "setne   $dst\n\t"
10344             "movzbl  $dst, $dst\n"
10345     "done:" %}
10346 
10347   opcode(0x0F, 0x2E);
10348   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10349              cmpfp3(dst));
10350   ins_pipe(pipe_slow);
10351 %}
10352 
10353 // Compare into -1,0,1
10354 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10355   match(Set dst (CmpF3 src con));
10356   effect(KILL cr);
10357 
10358   ins_cost(275);
10359   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10360             "movl    $dst, #-1\n\t"
10361             "jp,s    done\n\t"
10362             "jb,s    done\n\t"
10363             "setne   $dst\n\t"
10364             "movzbl  $dst, $dst\n"
10365     "done:" %}
10366   ins_encode %{
10367     Label L_done;
10368     Register Rdst = $dst$$Register;
10369     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10370     __ movl(Rdst, -1);
10371     __ jcc(Assembler::parity, L_done);
10372     __ jcc(Assembler::below, L_done);
10373     __ setb(Assembler::notEqual, Rdst);
10374     __ movzbl(Rdst, Rdst);
10375     __ bind(L_done);
10376   %}
10377   ins_pipe(pipe_slow);
10378 %}
10379 
10380 // Compare into -1,0,1
10381 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10382 %{
10383   match(Set dst (CmpD3 src1 src2));
10384   effect(KILL cr);
10385 
10386   ins_cost(275);
10387   format %{ "ucomisd $src1, $src2\n\t"
10388             "movl    $dst, #-1\n\t"
10389             "jp,s    done\n\t"
10390             "jb,s    done\n\t"
10391             "setne   $dst\n\t"
10392             "movzbl  $dst, $dst\n"
10393     "done:" %}
10394 
10395   opcode(0x66, 0x0F, 0x2E);
10396   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10397              cmpfp3(dst));
10398   ins_pipe(pipe_slow);
10399 %}
10400 
10401 // Compare into -1,0,1
10402 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10403 %{
10404   match(Set dst (CmpD3 src1 (LoadD src2)));
10405   effect(KILL cr);
10406 
10407   ins_cost(275);
10408   format %{ "ucomisd $src1, $src2\n\t"
10409             "movl    $dst, #-1\n\t"
10410             "jp,s    done\n\t"
10411             "jb,s    done\n\t"
10412             "setne   $dst\n\t"
10413             "movzbl  $dst, $dst\n"
10414     "done:" %}
10415 
10416   opcode(0x66, 0x0F, 0x2E);
10417   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10418              cmpfp3(dst));
10419   ins_pipe(pipe_slow);
10420 %}
10421 
10422 // Compare into -1,0,1
10423 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10424   match(Set dst (CmpD3 src con));
10425   effect(KILL cr);
10426 
10427   ins_cost(275);
10428   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10429             "movl    $dst, #-1\n\t"
10430             "jp,s    done\n\t"
10431             "jb,s    done\n\t"
10432             "setne   $dst\n\t"
10433             "movzbl  $dst, $dst\n"
10434     "done:" %}
10435   ins_encode %{
10436     Register Rdst = $dst$$Register;
10437     Label L_done;
10438     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10439     __ movl(Rdst, -1);
10440     __ jcc(Assembler::parity, L_done);
10441     __ jcc(Assembler::below, L_done);
10442     __ setb(Assembler::notEqual, Rdst);
10443     __ movzbl(Rdst, Rdst);
10444     __ bind(L_done);
10445   %}
10446   ins_pipe(pipe_slow);
10447 %}
10448 
10449 instruct addF_reg(regF dst, regF src)
10450 %{
10451   match(Set dst (AddF dst src));
10452 
10453   format %{ "addss   $dst, $src" %}
10454   ins_cost(150); // XXX
10455   opcode(0xF3, 0x0F, 0x58);
10456   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10457   ins_pipe(pipe_slow);
10458 %}
10459 
10460 instruct addF_mem(regF dst, memory src)
10461 %{
10462   match(Set dst (AddF dst (LoadF src)));
10463 
10464   format %{ "addss   $dst, $src" %}
10465   ins_cost(150); // XXX
10466   opcode(0xF3, 0x0F, 0x58);
10467   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10468   ins_pipe(pipe_slow);
10469 %}
10470 
10471 instruct addF_imm(regF dst, immF con) %{
10472   match(Set dst (AddF dst con));
10473   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10474   ins_cost(150); // XXX
10475   ins_encode %{
10476     __ addss($dst$$XMMRegister, $constantaddress($con));
10477   %}
10478   ins_pipe(pipe_slow);
10479 %}
10480 
10481 instruct addD_reg(regD dst, regD src)
10482 %{
10483   match(Set dst (AddD dst src));
10484 
10485   format %{ "addsd   $dst, $src" %}
10486   ins_cost(150); // XXX
10487   opcode(0xF2, 0x0F, 0x58);
10488   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10489   ins_pipe(pipe_slow);
10490 %}
10491 
10492 instruct addD_mem(regD dst, memory src)
10493 %{
10494   match(Set dst (AddD dst (LoadD src)));
10495 
10496   format %{ "addsd   $dst, $src" %}
10497   ins_cost(150); // XXX
10498   opcode(0xF2, 0x0F, 0x58);
10499   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10500   ins_pipe(pipe_slow);
10501 %}
10502 
10503 instruct addD_imm(regD dst, immD con) %{
10504   match(Set dst (AddD dst con));
10505   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10506   ins_cost(150); // XXX
10507   ins_encode %{
10508     __ addsd($dst$$XMMRegister, $constantaddress($con));
10509   %}
10510   ins_pipe(pipe_slow);
10511 %}
10512 
10513 instruct subF_reg(regF dst, regF src)
10514 %{
10515   match(Set dst (SubF dst src));
10516 
10517   format %{ "subss   $dst, $src" %}
10518   ins_cost(150); // XXX
10519   opcode(0xF3, 0x0F, 0x5C);
10520   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10521   ins_pipe(pipe_slow);
10522 %}
10523 
10524 instruct subF_mem(regF dst, memory src)
10525 %{
10526   match(Set dst (SubF dst (LoadF src)));
10527 
10528   format %{ "subss   $dst, $src" %}
10529   ins_cost(150); // XXX
10530   opcode(0xF3, 0x0F, 0x5C);
10531   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10532   ins_pipe(pipe_slow);
10533 %}
10534 
10535 instruct subF_imm(regF dst, immF con) %{
10536   match(Set dst (SubF dst con));
10537   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10538   ins_cost(150); // XXX
10539   ins_encode %{
10540     __ subss($dst$$XMMRegister, $constantaddress($con));
10541   %}
10542   ins_pipe(pipe_slow);
10543 %}
10544 
10545 instruct subD_reg(regD dst, regD src)
10546 %{
10547   match(Set dst (SubD dst src));
10548 
10549   format %{ "subsd   $dst, $src" %}
10550   ins_cost(150); // XXX
10551   opcode(0xF2, 0x0F, 0x5C);
10552   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10553   ins_pipe(pipe_slow);
10554 %}
10555 
10556 instruct subD_mem(regD dst, memory src)
10557 %{
10558   match(Set dst (SubD dst (LoadD src)));
10559 
10560   format %{ "subsd   $dst, $src" %}
10561   ins_cost(150); // XXX
10562   opcode(0xF2, 0x0F, 0x5C);
10563   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10564   ins_pipe(pipe_slow);
10565 %}
10566 
10567 instruct subD_imm(regD dst, immD con) %{
10568   match(Set dst (SubD dst con));
10569   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10570   ins_cost(150); // XXX
10571   ins_encode %{
10572     __ subsd($dst$$XMMRegister, $constantaddress($con));
10573   %}
10574   ins_pipe(pipe_slow);
10575 %}
10576 
10577 instruct mulF_reg(regF dst, regF src)
10578 %{
10579   match(Set dst (MulF dst src));
10580 
10581   format %{ "mulss   $dst, $src" %}
10582   ins_cost(150); // XXX
10583   opcode(0xF3, 0x0F, 0x59);
10584   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10585   ins_pipe(pipe_slow);
10586 %}
10587 
10588 instruct mulF_mem(regF dst, memory src)
10589 %{
10590   match(Set dst (MulF dst (LoadF src)));
10591 
10592   format %{ "mulss   $dst, $src" %}
10593   ins_cost(150); // XXX
10594   opcode(0xF3, 0x0F, 0x59);
10595   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10596   ins_pipe(pipe_slow);
10597 %}
10598 
10599 instruct mulF_imm(regF dst, immF con) %{
10600   match(Set dst (MulF dst con));
10601   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10602   ins_cost(150); // XXX
10603   ins_encode %{
10604     __ mulss($dst$$XMMRegister, $constantaddress($con));
10605   %}
10606   ins_pipe(pipe_slow);
10607 %}
10608 
10609 instruct mulD_reg(regD dst, regD src)
10610 %{
10611   match(Set dst (MulD dst src));
10612 
10613   format %{ "mulsd   $dst, $src" %}
10614   ins_cost(150); // XXX
10615   opcode(0xF2, 0x0F, 0x59);
10616   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10617   ins_pipe(pipe_slow);
10618 %}
10619 
10620 instruct mulD_mem(regD dst, memory src)
10621 %{
10622   match(Set dst (MulD dst (LoadD src)));
10623 
10624   format %{ "mulsd   $dst, $src" %}
10625   ins_cost(150); // XXX
10626   opcode(0xF2, 0x0F, 0x59);
10627   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10628   ins_pipe(pipe_slow);
10629 %}
10630 
10631 instruct mulD_imm(regD dst, immD con) %{
10632   match(Set dst (MulD dst con));
10633   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10634   ins_cost(150); // XXX
10635   ins_encode %{
10636     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10637   %}
10638   ins_pipe(pipe_slow);
10639 %}
10640 
10641 instruct divF_reg(regF dst, regF src)
10642 %{
10643   match(Set dst (DivF dst src));
10644 
10645   format %{ "divss   $dst, $src" %}
10646   ins_cost(150); // XXX
10647   opcode(0xF3, 0x0F, 0x5E);
10648   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10649   ins_pipe(pipe_slow);
10650 %}
10651 
10652 instruct divF_mem(regF dst, memory src)
10653 %{
10654   match(Set dst (DivF dst (LoadF src)));
10655 
10656   format %{ "divss   $dst, $src" %}
10657   ins_cost(150); // XXX
10658   opcode(0xF3, 0x0F, 0x5E);
10659   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10660   ins_pipe(pipe_slow);
10661 %}
10662 
10663 instruct divF_imm(regF dst, immF con) %{
10664   match(Set dst (DivF dst con));
10665   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10666   ins_cost(150); // XXX
10667   ins_encode %{
10668     __ divss($dst$$XMMRegister, $constantaddress($con));
10669   %}
10670   ins_pipe(pipe_slow);
10671 %}
10672 
10673 instruct divD_reg(regD dst, regD src)
10674 %{
10675   match(Set dst (DivD dst src));
10676 
10677   format %{ "divsd   $dst, $src" %}
10678   ins_cost(150); // XXX
10679   opcode(0xF2, 0x0F, 0x5E);
10680   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10681   ins_pipe(pipe_slow);
10682 %}
10683 
10684 instruct divD_mem(regD dst, memory src)
10685 %{
10686   match(Set dst (DivD dst (LoadD src)));
10687 
10688   format %{ "divsd   $dst, $src" %}
10689   ins_cost(150); // XXX
10690   opcode(0xF2, 0x0F, 0x5E);
10691   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10692   ins_pipe(pipe_slow);
10693 %}
10694 
10695 instruct divD_imm(regD dst, immD con) %{
10696   match(Set dst (DivD dst con));
10697   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10698   ins_cost(150); // XXX
10699   ins_encode %{
10700     __ divsd($dst$$XMMRegister, $constantaddress($con));
10701   %}
10702   ins_pipe(pipe_slow);
10703 %}
10704 
10705 instruct sqrtF_reg(regF dst, regF src)
10706 %{
10707   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10708 
10709   format %{ "sqrtss  $dst, $src" %}
10710   ins_cost(150); // XXX
10711   opcode(0xF3, 0x0F, 0x51);
10712   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10713   ins_pipe(pipe_slow);
10714 %}
10715 
10716 instruct sqrtF_mem(regF dst, memory src)
10717 %{
10718   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10719 
10720   format %{ "sqrtss  $dst, $src" %}
10721   ins_cost(150); // XXX
10722   opcode(0xF3, 0x0F, 0x51);
10723   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10724   ins_pipe(pipe_slow);
10725 %}
10726 
10727 instruct sqrtF_imm(regF dst, immF con) %{
10728   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10729   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10730   ins_cost(150); // XXX
10731   ins_encode %{
10732     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10733   %}
10734   ins_pipe(pipe_slow);
10735 %}
10736 
10737 instruct sqrtD_reg(regD dst, regD src)
10738 %{
10739   match(Set dst (SqrtD src));
10740 
10741   format %{ "sqrtsd  $dst, $src" %}
10742   ins_cost(150); // XXX
10743   opcode(0xF2, 0x0F, 0x51);
10744   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10745   ins_pipe(pipe_slow);
10746 %}
10747 
10748 instruct sqrtD_mem(regD dst, memory src)
10749 %{
10750   match(Set dst (SqrtD (LoadD src)));
10751 
10752   format %{ "sqrtsd  $dst, $src" %}
10753   ins_cost(150); // XXX
10754   opcode(0xF2, 0x0F, 0x51);
10755   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10756   ins_pipe(pipe_slow);
10757 %}
10758 
10759 instruct sqrtD_imm(regD dst, immD con) %{
10760   match(Set dst (SqrtD con));
10761   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10762   ins_cost(150); // XXX
10763   ins_encode %{
10764     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10765   %}
10766   ins_pipe(pipe_slow);
10767 %}
10768 
10769 instruct absF_reg(regF dst)
10770 %{
10771   match(Set dst (AbsF dst));
10772 
10773   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10774   ins_encode(absF_encoding(dst));
10775   ins_pipe(pipe_slow);
10776 %}
10777 
10778 instruct absD_reg(regD dst)
10779 %{
10780   match(Set dst (AbsD dst));
10781 
10782   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10783             "# abs double by sign masking" %}
10784   ins_encode(absD_encoding(dst));
10785   ins_pipe(pipe_slow);
10786 %}
10787 
10788 instruct negF_reg(regF dst)
10789 %{
10790   match(Set dst (NegF dst));
10791 
10792   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10793   ins_encode(negF_encoding(dst));
10794   ins_pipe(pipe_slow);
10795 %}
10796 
10797 instruct negD_reg(regD dst)
10798 %{
10799   match(Set dst (NegD dst));
10800 
10801   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10802             "# neg double by sign flipping" %}
10803   ins_encode(negD_encoding(dst));
10804   ins_pipe(pipe_slow);
10805 %}
10806 
10807 // -----------Trig and Trancendental Instructions------------------------------
10808 instruct cosD_reg(regD dst) %{
10809   match(Set dst (CosD dst));
10810 
10811   format %{ "dcos   $dst\n\t" %}
10812   opcode(0xD9, 0xFF);
10813   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10814   ins_pipe( pipe_slow );
10815 %}
10816 
10817 instruct sinD_reg(regD dst) %{
10818   match(Set dst (SinD dst));
10819 
10820   format %{ "dsin   $dst\n\t" %}
10821   opcode(0xD9, 0xFE);
10822   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10823   ins_pipe( pipe_slow );
10824 %}
10825 
10826 instruct tanD_reg(regD dst) %{
10827   match(Set dst (TanD dst));
10828 
10829   format %{ "dtan   $dst\n\t" %}
10830   ins_encode( Push_SrcXD(dst),
10831               Opcode(0xD9), Opcode(0xF2),   //fptan
10832               Opcode(0xDD), Opcode(0xD8),   //fstp st
10833               Push_ResultXD(dst) );
10834   ins_pipe( pipe_slow );
10835 %}
10836 
10837 instruct log10D_reg(regD dst) %{
10838   // The source and result Double operands in XMM registers
10839   match(Set dst (Log10D dst));
10840   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10841   // fyl2x        ; compute log_10(2) * log_2(x)
10842   format %{ "fldlg2\t\t\t#Log10\n\t"
10843             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10844          %}
10845    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10846               Push_SrcXD(dst),
10847               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10848               Push_ResultXD(dst));
10849 
10850   ins_pipe( pipe_slow );
10851 %}
10852 
10853 instruct logD_reg(regD dst) %{
10854   // The source and result Double operands in XMM registers
10855   match(Set dst (LogD dst));
10856   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10857   // fyl2x        ; compute log_e(2) * log_2(x)
10858   format %{ "fldln2\t\t\t#Log_e\n\t"
10859             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10860          %}
10861   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10862               Push_SrcXD(dst),
10863               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10864               Push_ResultXD(dst));
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 
10869 
10870 //----------Arithmetic Conversion Instructions---------------------------------
10871 
10872 instruct roundFloat_nop(regF dst)
10873 %{
10874   match(Set dst (RoundFloat dst));
10875 
10876   ins_cost(0);
10877   ins_encode();
10878   ins_pipe(empty);
10879 %}
10880 
10881 instruct roundDouble_nop(regD dst)
10882 %{
10883   match(Set dst (RoundDouble dst));
10884 
10885   ins_cost(0);
10886   ins_encode();
10887   ins_pipe(empty);
10888 %}
10889 
10890 instruct convF2D_reg_reg(regD dst, regF src)
10891 %{
10892   match(Set dst (ConvF2D src));
10893 
10894   format %{ "cvtss2sd $dst, $src" %}
10895   opcode(0xF3, 0x0F, 0x5A);
10896   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10897   ins_pipe(pipe_slow); // XXX
10898 %}
10899 
10900 instruct convF2D_reg_mem(regD dst, memory src)
10901 %{
10902   match(Set dst (ConvF2D (LoadF src)));
10903 
10904   format %{ "cvtss2sd $dst, $src" %}
10905   opcode(0xF3, 0x0F, 0x5A);
10906   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10907   ins_pipe(pipe_slow); // XXX
10908 %}
10909 
10910 instruct convD2F_reg_reg(regF dst, regD src)
10911 %{
10912   match(Set dst (ConvD2F src));
10913 
10914   format %{ "cvtsd2ss $dst, $src" %}
10915   opcode(0xF2, 0x0F, 0x5A);
10916   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10917   ins_pipe(pipe_slow); // XXX
10918 %}
10919 
10920 instruct convD2F_reg_mem(regF dst, memory src)
10921 %{
10922   match(Set dst (ConvD2F (LoadD src)));
10923 
10924   format %{ "cvtsd2ss $dst, $src" %}
10925   opcode(0xF2, 0x0F, 0x5A);
10926   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10927   ins_pipe(pipe_slow); // XXX
10928 %}
10929 
10930 // XXX do mem variants
10931 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10932 %{
10933   match(Set dst (ConvF2I src));
10934   effect(KILL cr);
10935 
10936   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10937             "cmpl    $dst, #0x80000000\n\t"
10938             "jne,s   done\n\t"
10939             "subq    rsp, #8\n\t"
10940             "movss   [rsp], $src\n\t"
10941             "call    f2i_fixup\n\t"
10942             "popq    $dst\n"
10943     "done:   "%}
10944   opcode(0xF3, 0x0F, 0x2C);
10945   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10946              f2i_fixup(dst, src));
10947   ins_pipe(pipe_slow);
10948 %}
10949 
10950 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10951 %{
10952   match(Set dst (ConvF2L src));
10953   effect(KILL cr);
10954 
10955   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10956             "cmpq    $dst, [0x8000000000000000]\n\t"
10957             "jne,s   done\n\t"
10958             "subq    rsp, #8\n\t"
10959             "movss   [rsp], $src\n\t"
10960             "call    f2l_fixup\n\t"
10961             "popq    $dst\n"
10962     "done:   "%}
10963   opcode(0xF3, 0x0F, 0x2C);
10964   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10965              f2l_fixup(dst, src));
10966   ins_pipe(pipe_slow);
10967 %}
10968 
10969 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10970 %{
10971   match(Set dst (ConvD2I src));
10972   effect(KILL cr);
10973 
10974   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10975             "cmpl    $dst, #0x80000000\n\t"
10976             "jne,s   done\n\t"
10977             "subq    rsp, #8\n\t"
10978             "movsd   [rsp], $src\n\t"
10979             "call    d2i_fixup\n\t"
10980             "popq    $dst\n"
10981     "done:   "%}
10982   opcode(0xF2, 0x0F, 0x2C);
10983   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10984              d2i_fixup(dst, src));
10985   ins_pipe(pipe_slow);
10986 %}
10987 
10988 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10989 %{
10990   match(Set dst (ConvD2L src));
10991   effect(KILL cr);
10992 
10993   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10994             "cmpq    $dst, [0x8000000000000000]\n\t"
10995             "jne,s   done\n\t"
10996             "subq    rsp, #8\n\t"
10997             "movsd   [rsp], $src\n\t"
10998             "call    d2l_fixup\n\t"
10999             "popq    $dst\n"
11000     "done:   "%}
11001   opcode(0xF2, 0x0F, 0x2C);
11002   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11003              d2l_fixup(dst, src));
11004   ins_pipe(pipe_slow);
11005 %}
11006 
11007 instruct convI2F_reg_reg(regF dst, rRegI src)
11008 %{
11009   predicate(!UseXmmI2F);
11010   match(Set dst (ConvI2F src));
11011 
11012   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11013   opcode(0xF3, 0x0F, 0x2A);
11014   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11015   ins_pipe(pipe_slow); // XXX
11016 %}
11017 
11018 instruct convI2F_reg_mem(regF dst, memory src)
11019 %{
11020   match(Set dst (ConvI2F (LoadI src)));
11021 
11022   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11023   opcode(0xF3, 0x0F, 0x2A);
11024   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11025   ins_pipe(pipe_slow); // XXX
11026 %}
11027 
11028 instruct convI2D_reg_reg(regD dst, rRegI src)
11029 %{
11030   predicate(!UseXmmI2D);
11031   match(Set dst (ConvI2D src));
11032 
11033   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11034   opcode(0xF2, 0x0F, 0x2A);
11035   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11036   ins_pipe(pipe_slow); // XXX
11037 %}
11038 
11039 instruct convI2D_reg_mem(regD dst, memory src)
11040 %{
11041   match(Set dst (ConvI2D (LoadI src)));
11042 
11043   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11044   opcode(0xF2, 0x0F, 0x2A);
11045   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11046   ins_pipe(pipe_slow); // XXX
11047 %}
11048 
11049 instruct convXI2F_reg(regF dst, rRegI src)
11050 %{
11051   predicate(UseXmmI2F);
11052   match(Set dst (ConvI2F src));
11053 
11054   format %{ "movdl $dst, $src\n\t"
11055             "cvtdq2psl $dst, $dst\t# i2f" %}
11056   ins_encode %{
11057     __ movdl($dst$$XMMRegister, $src$$Register);
11058     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11059   %}
11060   ins_pipe(pipe_slow); // XXX
11061 %}
11062 
11063 instruct convXI2D_reg(regD dst, rRegI src)
11064 %{
11065   predicate(UseXmmI2D);
11066   match(Set dst (ConvI2D src));
11067 
11068   format %{ "movdl $dst, $src\n\t"
11069             "cvtdq2pdl $dst, $dst\t# i2d" %}
11070   ins_encode %{
11071     __ movdl($dst$$XMMRegister, $src$$Register);
11072     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11073   %}
11074   ins_pipe(pipe_slow); // XXX
11075 %}
11076 
11077 instruct convL2F_reg_reg(regF dst, rRegL src)
11078 %{
11079   match(Set dst (ConvL2F src));
11080 
11081   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11082   opcode(0xF3, 0x0F, 0x2A);
11083   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11084   ins_pipe(pipe_slow); // XXX
11085 %}
11086 
11087 instruct convL2F_reg_mem(regF dst, memory src)
11088 %{
11089   match(Set dst (ConvL2F (LoadL src)));
11090 
11091   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11092   opcode(0xF3, 0x0F, 0x2A);
11093   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11094   ins_pipe(pipe_slow); // XXX
11095 %}
11096 
11097 instruct convL2D_reg_reg(regD dst, rRegL src)
11098 %{
11099   match(Set dst (ConvL2D src));
11100 
11101   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11102   opcode(0xF2, 0x0F, 0x2A);
11103   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11104   ins_pipe(pipe_slow); // XXX
11105 %}
11106 
11107 instruct convL2D_reg_mem(regD dst, memory src)
11108 %{
11109   match(Set dst (ConvL2D (LoadL src)));
11110 
11111   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11112   opcode(0xF2, 0x0F, 0x2A);
11113   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11114   ins_pipe(pipe_slow); // XXX
11115 %}
11116 
11117 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11118 %{
11119   match(Set dst (ConvI2L src));
11120 
11121   ins_cost(125);
11122   format %{ "movslq  $dst, $src\t# i2l" %}
11123   ins_encode %{
11124     __ movslq($dst$$Register, $src$$Register);
11125   %}
11126   ins_pipe(ialu_reg_reg);
11127 %}
11128 
11129 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11130 // %{
11131 //   match(Set dst (ConvI2L src));
11132 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11133 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11134 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11135 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11136 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11137 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11138 
11139 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11140 //   ins_encode(enc_copy(dst, src));
11141 // //   opcode(0x63); // needs REX.W
11142 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11143 //   ins_pipe(ialu_reg_reg);
11144 // %}
11145 
11146 // Zero-extend convert int to long
11147 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11148 %{
11149   match(Set dst (AndL (ConvI2L src) mask));
11150 
11151   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11152   ins_encode(enc_copy(dst, src));
11153   ins_pipe(ialu_reg_reg);
11154 %}
11155 
11156 // Zero-extend convert int to long
11157 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11158 %{
11159   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11160 
11161   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11162   opcode(0x8B);
11163   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11164   ins_pipe(ialu_reg_mem);
11165 %}
11166 
11167 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11168 %{
11169   match(Set dst (AndL src mask));
11170 
11171   format %{ "movl    $dst, $src\t# zero-extend long" %}
11172   ins_encode(enc_copy_always(dst, src));
11173   ins_pipe(ialu_reg_reg);
11174 %}
11175 
11176 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11177 %{
11178   match(Set dst (ConvL2I src));
11179 
11180   format %{ "movl    $dst, $src\t# l2i" %}
11181   ins_encode(enc_copy_always(dst, src));
11182   ins_pipe(ialu_reg_reg);
11183 %}
11184 
11185 
11186 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11187   match(Set dst (MoveF2I src));
11188   effect(DEF dst, USE src);
11189 
11190   ins_cost(125);
11191   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11192   opcode(0x8B);
11193   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11194   ins_pipe(ialu_reg_mem);
11195 %}
11196 
11197 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11198   match(Set dst (MoveI2F src));
11199   effect(DEF dst, USE src);
11200 
11201   ins_cost(125);
11202   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11203   opcode(0xF3, 0x0F, 0x10);
11204   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11205   ins_pipe(pipe_slow);
11206 %}
11207 
11208 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11209   match(Set dst (MoveD2L src));
11210   effect(DEF dst, USE src);
11211 
11212   ins_cost(125);
11213   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11214   opcode(0x8B);
11215   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11216   ins_pipe(ialu_reg_mem);
11217 %}
11218 
11219 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11220   predicate(!UseXmmLoadAndClearUpper);
11221   match(Set dst (MoveL2D src));
11222   effect(DEF dst, USE src);
11223 
11224   ins_cost(125);
11225   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11226   opcode(0x66, 0x0F, 0x12);
11227   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11228   ins_pipe(pipe_slow);
11229 %}
11230 
11231 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11232   predicate(UseXmmLoadAndClearUpper);
11233   match(Set dst (MoveL2D src));
11234   effect(DEF dst, USE src);
11235 
11236   ins_cost(125);
11237   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11238   opcode(0xF2, 0x0F, 0x10);
11239   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11240   ins_pipe(pipe_slow);
11241 %}
11242 
11243 
11244 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11245   match(Set dst (MoveF2I src));
11246   effect(DEF dst, USE src);
11247 
11248   ins_cost(95); // XXX
11249   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11250   opcode(0xF3, 0x0F, 0x11);
11251   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11252   ins_pipe(pipe_slow);
11253 %}
11254 
11255 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11256   match(Set dst (MoveI2F src));
11257   effect(DEF dst, USE src);
11258 
11259   ins_cost(100);
11260   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11261   opcode(0x89);
11262   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11263   ins_pipe( ialu_mem_reg );
11264 %}
11265 
11266 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11267   match(Set dst (MoveD2L src));
11268   effect(DEF dst, USE src);
11269 
11270   ins_cost(95); // XXX
11271   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11272   opcode(0xF2, 0x0F, 0x11);
11273   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11274   ins_pipe(pipe_slow);
11275 %}
11276 
11277 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11278   match(Set dst (MoveL2D src));
11279   effect(DEF dst, USE src);
11280 
11281   ins_cost(100);
11282   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11283   opcode(0x89);
11284   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11285   ins_pipe(ialu_mem_reg);
11286 %}
11287 
11288 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11289   match(Set dst (MoveF2I src));
11290   effect(DEF dst, USE src);
11291   ins_cost(85);
11292   format %{ "movd    $dst,$src\t# MoveF2I" %}
11293   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11294   ins_pipe( pipe_slow );
11295 %}
11296 
11297 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11298   match(Set dst (MoveD2L src));
11299   effect(DEF dst, USE src);
11300   ins_cost(85);
11301   format %{ "movd    $dst,$src\t# MoveD2L" %}
11302   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306 // The next instructions have long latency and use Int unit. Set high cost.
11307 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11308   match(Set dst (MoveI2F src));
11309   effect(DEF dst, USE src);
11310   ins_cost(300);
11311   format %{ "movd    $dst,$src\t# MoveI2F" %}
11312   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11313   ins_pipe( pipe_slow );
11314 %}
11315 
11316 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11317   match(Set dst (MoveL2D src));
11318   effect(DEF dst, USE src);
11319   ins_cost(300);
11320   format %{ "movd    $dst,$src\t# MoveL2D" %}
11321   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11322   ins_pipe( pipe_slow );
11323 %}
11324 
11325 // Replicate scalar to packed byte (1 byte) values in xmm
11326 instruct Repl8B_reg(regD dst, regD src) %{
11327   match(Set dst (Replicate8B src));
11328   format %{ "MOVDQA  $dst,$src\n\t"
11329             "PUNPCKLBW $dst,$dst\n\t"
11330             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11331   ins_encode( pshufd_8x8(dst, src));
11332   ins_pipe( pipe_slow );
11333 %}
11334 
11335 // Replicate scalar to packed byte (1 byte) values in xmm
11336 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11337   match(Set dst (Replicate8B src));
11338   format %{ "MOVD    $dst,$src\n\t"
11339             "PUNPCKLBW $dst,$dst\n\t"
11340             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11341   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11342   ins_pipe( pipe_slow );
11343 %}
11344 
11345 // Replicate scalar zero to packed byte (1 byte) values in xmm
11346 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11347   match(Set dst (Replicate8B zero));
11348   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11349   ins_encode( pxor(dst, dst));
11350   ins_pipe( fpu_reg_reg );
11351 %}
11352 
11353 // Replicate scalar to packed shore (2 byte) values in xmm
11354 instruct Repl4S_reg(regD dst, regD src) %{
11355   match(Set dst (Replicate4S src));
11356   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11357   ins_encode( pshufd_4x16(dst, src));
11358   ins_pipe( fpu_reg_reg );
11359 %}
11360 
11361 // Replicate scalar to packed shore (2 byte) values in xmm
11362 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11363   match(Set dst (Replicate4S src));
11364   format %{ "MOVD    $dst,$src\n\t"
11365             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11366   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11367   ins_pipe( fpu_reg_reg );
11368 %}
11369 
11370 // Replicate scalar zero to packed short (2 byte) values in xmm
11371 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11372   match(Set dst (Replicate4S zero));
11373   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11374   ins_encode( pxor(dst, dst));
11375   ins_pipe( fpu_reg_reg );
11376 %}
11377 
11378 // Replicate scalar to packed char (2 byte) values in xmm
11379 instruct Repl4C_reg(regD dst, regD src) %{
11380   match(Set dst (Replicate4C src));
11381   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11382   ins_encode( pshufd_4x16(dst, src));
11383   ins_pipe( fpu_reg_reg );
11384 %}
11385 
11386 // Replicate scalar to packed char (2 byte) values in xmm
11387 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11388   match(Set dst (Replicate4C src));
11389   format %{ "MOVD    $dst,$src\n\t"
11390             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11391   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11392   ins_pipe( fpu_reg_reg );
11393 %}
11394 
11395 // Replicate scalar zero to packed char (2 byte) values in xmm
11396 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11397   match(Set dst (Replicate4C zero));
11398   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11399   ins_encode( pxor(dst, dst));
11400   ins_pipe( fpu_reg_reg );
11401 %}
11402 
11403 // Replicate scalar to packed integer (4 byte) values in xmm
11404 instruct Repl2I_reg(regD dst, regD src) %{
11405   match(Set dst (Replicate2I src));
11406   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11407   ins_encode( pshufd(dst, src, 0x00));
11408   ins_pipe( fpu_reg_reg );
11409 %}
11410 
11411 // Replicate scalar to packed integer (4 byte) values in xmm
11412 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11413   match(Set dst (Replicate2I src));
11414   format %{ "MOVD   $dst,$src\n\t"
11415             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11416   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11417   ins_pipe( fpu_reg_reg );
11418 %}
11419 
11420 // Replicate scalar zero to packed integer (2 byte) values in xmm
11421 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11422   match(Set dst (Replicate2I zero));
11423   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11424   ins_encode( pxor(dst, dst));
11425   ins_pipe( fpu_reg_reg );
11426 %}
11427 
11428 // Replicate scalar to packed single precision floating point values in xmm
11429 instruct Repl2F_reg(regD dst, regD src) %{
11430   match(Set dst (Replicate2F src));
11431   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11432   ins_encode( pshufd(dst, src, 0xe0));
11433   ins_pipe( fpu_reg_reg );
11434 %}
11435 
11436 // Replicate scalar to packed single precision floating point values in xmm
11437 instruct Repl2F_regF(regD dst, regF src) %{
11438   match(Set dst (Replicate2F src));
11439   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11440   ins_encode( pshufd(dst, src, 0xe0));
11441   ins_pipe( fpu_reg_reg );
11442 %}
11443 
11444 // Replicate scalar to packed single precision floating point values in xmm
11445 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11446   match(Set dst (Replicate2F zero));
11447   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11448   ins_encode( pxor(dst, dst));
11449   ins_pipe( fpu_reg_reg );
11450 %}
11451 
11452 
11453 // =======================================================================
11454 // fast clearing of an array
11455 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11456                   rFlagsReg cr)
11457 %{
11458   match(Set dummy (ClearArray cnt base));
11459   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11460 
11461   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11462             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11463   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11464              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11465   ins_pipe(pipe_slow);
11466 %}
11467 
11468 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11469                         rax_RegI result, regD tmp1, rFlagsReg cr)
11470 %{
11471   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11472   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11473 
11474   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11475   ins_encode %{
11476     __ string_compare($str1$$Register, $str2$$Register,
11477                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11478                       $tmp1$$XMMRegister);
11479   %}
11480   ins_pipe( pipe_slow );
11481 %}
11482 
11483 // fast search of substring with known size.
11484 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11485                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11486 %{
11487   predicate(UseSSE42Intrinsics);
11488   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11489   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11490 
11491   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11492   ins_encode %{
11493     int icnt2 = (int)$int_cnt2$$constant;
11494     if (icnt2 >= 8) {
11495       // IndexOf for constant substrings with size >= 8 elements
11496       // which don't need to be loaded through stack.
11497       __ string_indexofC8($str1$$Register, $str2$$Register,
11498                           $cnt1$$Register, $cnt2$$Register,
11499                           icnt2, $result$$Register,
11500                           $vec$$XMMRegister, $tmp$$Register);
11501     } else {
11502       // Small strings are loaded through stack if they cross page boundary.
11503       __ string_indexof($str1$$Register, $str2$$Register,
11504                         $cnt1$$Register, $cnt2$$Register,
11505                         icnt2, $result$$Register,
11506                         $vec$$XMMRegister, $tmp$$Register);
11507     }
11508   %}
11509   ins_pipe( pipe_slow );
11510 %}
11511 
11512 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11513                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11514 %{
11515   predicate(UseSSE42Intrinsics);
11516   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11517   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11518 
11519   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11520   ins_encode %{
11521     __ string_indexof($str1$$Register, $str2$$Register,
11522                       $cnt1$$Register, $cnt2$$Register,
11523                       (-1), $result$$Register,
11524                       $vec$$XMMRegister, $tmp$$Register);
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 // fast string equals
11530 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11531                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11532 %{
11533   match(Set result (StrEquals (Binary str1 str2) cnt));
11534   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11535 
11536   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11537   ins_encode %{
11538     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11539                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11540                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11541   %}
11542   ins_pipe( pipe_slow );
11543 %}
11544 
11545 // fast array equals
11546 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11547                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11548 %{
11549   match(Set result (AryEq ary1 ary2));
11550   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11551   //ins_cost(300);
11552 
11553   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11554   ins_encode %{
11555     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11556                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11557                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11558   %}
11559   ins_pipe( pipe_slow );
11560 %}
11561 
11562 //----------Control Flow Instructions------------------------------------------
11563 // Signed compare Instructions
11564 
11565 // XXX more variants!!
11566 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11567 %{
11568   match(Set cr (CmpI op1 op2));
11569   effect(DEF cr, USE op1, USE op2);
11570 
11571   format %{ "cmpl    $op1, $op2" %}
11572   opcode(0x3B);  /* Opcode 3B /r */
11573   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11574   ins_pipe(ialu_cr_reg_reg);
11575 %}
11576 
11577 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11578 %{
11579   match(Set cr (CmpI op1 op2));
11580 
11581   format %{ "cmpl    $op1, $op2" %}
11582   opcode(0x81, 0x07); /* Opcode 81 /7 */
11583   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11584   ins_pipe(ialu_cr_reg_imm);
11585 %}
11586 
11587 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11588 %{
11589   match(Set cr (CmpI op1 (LoadI op2)));
11590 
11591   ins_cost(500); // XXX
11592   format %{ "cmpl    $op1, $op2" %}
11593   opcode(0x3B); /* Opcode 3B /r */
11594   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11595   ins_pipe(ialu_cr_reg_mem);
11596 %}
11597 
11598 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11599 %{
11600   match(Set cr (CmpI src zero));
11601 
11602   format %{ "testl   $src, $src" %}
11603   opcode(0x85);
11604   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11605   ins_pipe(ialu_cr_reg_imm);
11606 %}
11607 
11608 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11609 %{
11610   match(Set cr (CmpI (AndI src con) zero));
11611 
11612   format %{ "testl   $src, $con" %}
11613   opcode(0xF7, 0x00);
11614   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11615   ins_pipe(ialu_cr_reg_imm);
11616 %}
11617 
11618 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11619 %{
11620   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11621 
11622   format %{ "testl   $src, $mem" %}
11623   opcode(0x85);
11624   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11625   ins_pipe(ialu_cr_reg_mem);
11626 %}
11627 
11628 // Unsigned compare Instructions; really, same as signed except they
11629 // produce an rFlagsRegU instead of rFlagsReg.
11630 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11631 %{
11632   match(Set cr (CmpU op1 op2));
11633 
11634   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11635   opcode(0x3B); /* Opcode 3B /r */
11636   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11637   ins_pipe(ialu_cr_reg_reg);
11638 %}
11639 
11640 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11641 %{
11642   match(Set cr (CmpU op1 op2));
11643 
11644   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11645   opcode(0x81,0x07); /* Opcode 81 /7 */
11646   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11647   ins_pipe(ialu_cr_reg_imm);
11648 %}
11649 
11650 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11651 %{
11652   match(Set cr (CmpU op1 (LoadI op2)));
11653 
11654   ins_cost(500); // XXX
11655   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11656   opcode(0x3B); /* Opcode 3B /r */
11657   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11658   ins_pipe(ialu_cr_reg_mem);
11659 %}
11660 
11661 // // // Cisc-spilled version of cmpU_rReg
11662 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11663 // //%{
11664 // //  match(Set cr (CmpU (LoadI op1) op2));
11665 // //
11666 // //  format %{ "CMPu   $op1,$op2" %}
11667 // //  ins_cost(500);
11668 // //  opcode(0x39);  /* Opcode 39 /r */
11669 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11670 // //%}
11671 
11672 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11673 %{
11674   match(Set cr (CmpU src zero));
11675 
11676   format %{ "testl  $src, $src\t# unsigned" %}
11677   opcode(0x85);
11678   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11679   ins_pipe(ialu_cr_reg_imm);
11680 %}
11681 
11682 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11683 %{
11684   match(Set cr (CmpP op1 op2));
11685 
11686   format %{ "cmpq    $op1, $op2\t# ptr" %}
11687   opcode(0x3B); /* Opcode 3B /r */
11688   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11689   ins_pipe(ialu_cr_reg_reg);
11690 %}
11691 
11692 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11693 %{
11694   match(Set cr (CmpP op1 (LoadP op2)));
11695 
11696   ins_cost(500); // XXX
11697   format %{ "cmpq    $op1, $op2\t# ptr" %}
11698   opcode(0x3B); /* Opcode 3B /r */
11699   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11700   ins_pipe(ialu_cr_reg_mem);
11701 %}
11702 
11703 // // // Cisc-spilled version of cmpP_rReg
11704 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11705 // //%{
11706 // //  match(Set cr (CmpP (LoadP op1) op2));
11707 // //
11708 // //  format %{ "CMPu   $op1,$op2" %}
11709 // //  ins_cost(500);
11710 // //  opcode(0x39);  /* Opcode 39 /r */
11711 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11712 // //%}
11713 
11714 // XXX this is generalized by compP_rReg_mem???
11715 // Compare raw pointer (used in out-of-heap check).
11716 // Only works because non-oop pointers must be raw pointers
11717 // and raw pointers have no anti-dependencies.
11718 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11719 %{
11720   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11721   match(Set cr (CmpP op1 (LoadP op2)));
11722 
11723   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11724   opcode(0x3B); /* Opcode 3B /r */
11725   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11726   ins_pipe(ialu_cr_reg_mem);
11727 %}
11728 
11729 // This will generate a signed flags result. This should be OK since
11730 // any compare to a zero should be eq/neq.
11731 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11732 %{
11733   match(Set cr (CmpP src zero));
11734 
11735   format %{ "testq   $src, $src\t# ptr" %}
11736   opcode(0x85);
11737   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11738   ins_pipe(ialu_cr_reg_imm);
11739 %}
11740 
11741 // This will generate a signed flags result. This should be OK since
11742 // any compare to a zero should be eq/neq.
11743 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11744 %{
11745   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11746   match(Set cr (CmpP (LoadP op) zero));
11747 
11748   ins_cost(500); // XXX
11749   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11750   opcode(0xF7); /* Opcode F7 /0 */
11751   ins_encode(REX_mem_wide(op),
11752              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11753   ins_pipe(ialu_cr_reg_imm);
11754 %}
11755 
11756 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11757 %{
11758   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11759   match(Set cr (CmpP (LoadP mem) zero));
11760 
11761   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11762   ins_encode %{
11763     __ cmpq(r12, $mem$$Address);
11764   %}
11765   ins_pipe(ialu_cr_reg_mem);
11766 %}
11767 
11768 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11769 %{
11770   match(Set cr (CmpN op1 op2));
11771 
11772   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11773   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11774   ins_pipe(ialu_cr_reg_reg);
11775 %}
11776 
11777 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11778 %{
11779   match(Set cr (CmpN src (LoadN mem)));
11780 
11781   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11782   ins_encode %{
11783     __ cmpl($src$$Register, $mem$$Address);
11784   %}
11785   ins_pipe(ialu_cr_reg_mem);
11786 %}
11787 
11788 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11789   match(Set cr (CmpN op1 op2));
11790 
11791   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11792   ins_encode %{
11793     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11794   %}
11795   ins_pipe(ialu_cr_reg_imm);
11796 %}
11797 
11798 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11799 %{
11800   match(Set cr (CmpN src (LoadN mem)));
11801 
11802   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11803   ins_encode %{
11804     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11805   %}
11806   ins_pipe(ialu_cr_reg_mem);
11807 %}
11808 
11809 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11810   match(Set cr (CmpN src zero));
11811 
11812   format %{ "testl   $src, $src\t# compressed ptr" %}
11813   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11814   ins_pipe(ialu_cr_reg_imm);
11815 %}
11816 
11817 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11818 %{
11819   predicate(Universe::narrow_oop_base() != NULL);
11820   match(Set cr (CmpN (LoadN mem) zero));
11821 
11822   ins_cost(500); // XXX
11823   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11824   ins_encode %{
11825     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11826   %}
11827   ins_pipe(ialu_cr_reg_mem);
11828 %}
11829 
11830 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11831 %{
11832   predicate(Universe::narrow_oop_base() == NULL);
11833   match(Set cr (CmpN (LoadN mem) zero));
11834 
11835   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11836   ins_encode %{
11837     __ cmpl(r12, $mem$$Address);
11838   %}
11839   ins_pipe(ialu_cr_reg_mem);
11840 %}
11841 
11842 // Yanked all unsigned pointer compare operations.
11843 // Pointer compares are done with CmpP which is already unsigned.
11844 
11845 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11846 %{
11847   match(Set cr (CmpL op1 op2));
11848 
11849   format %{ "cmpq    $op1, $op2" %}
11850   opcode(0x3B);  /* Opcode 3B /r */
11851   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11852   ins_pipe(ialu_cr_reg_reg);
11853 %}
11854 
11855 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11856 %{
11857   match(Set cr (CmpL op1 op2));
11858 
11859   format %{ "cmpq    $op1, $op2" %}
11860   opcode(0x81, 0x07); /* Opcode 81 /7 */
11861   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11862   ins_pipe(ialu_cr_reg_imm);
11863 %}
11864 
11865 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11866 %{
11867   match(Set cr (CmpL op1 (LoadL op2)));
11868 
11869   format %{ "cmpq    $op1, $op2" %}
11870   opcode(0x3B); /* Opcode 3B /r */
11871   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11872   ins_pipe(ialu_cr_reg_mem);
11873 %}
11874 
11875 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11876 %{
11877   match(Set cr (CmpL src zero));
11878 
11879   format %{ "testq   $src, $src" %}
11880   opcode(0x85);
11881   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11882   ins_pipe(ialu_cr_reg_imm);
11883 %}
11884 
11885 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11886 %{
11887   match(Set cr (CmpL (AndL src con) zero));
11888 
11889   format %{ "testq   $src, $con\t# long" %}
11890   opcode(0xF7, 0x00);
11891   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11892   ins_pipe(ialu_cr_reg_imm);
11893 %}
11894 
11895 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11896 %{
11897   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11898 
11899   format %{ "testq   $src, $mem" %}
11900   opcode(0x85);
11901   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11902   ins_pipe(ialu_cr_reg_mem);
11903 %}
11904 
11905 // Manifest a CmpL result in an integer register.  Very painful.
11906 // This is the test to avoid.
11907 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11908 %{
11909   match(Set dst (CmpL3 src1 src2));
11910   effect(KILL flags);
11911 
11912   ins_cost(275); // XXX
11913   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11914             "movl    $dst, -1\n\t"
11915             "jl,s    done\n\t"
11916             "setne   $dst\n\t"
11917             "movzbl  $dst, $dst\n\t"
11918     "done:" %}
11919   ins_encode(cmpl3_flag(src1, src2, dst));
11920   ins_pipe(pipe_slow);
11921 %}
11922 
11923 //----------Max and Min--------------------------------------------------------
11924 // Min Instructions
11925 
11926 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11927 %{
11928   effect(USE_DEF dst, USE src, USE cr);
11929 
11930   format %{ "cmovlgt $dst, $src\t# min" %}
11931   opcode(0x0F, 0x4F);
11932   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11933   ins_pipe(pipe_cmov_reg);
11934 %}
11935 
11936 
11937 instruct minI_rReg(rRegI dst, rRegI src)
11938 %{
11939   match(Set dst (MinI dst src));
11940 
11941   ins_cost(200);
11942   expand %{
11943     rFlagsReg cr;
11944     compI_rReg(cr, dst, src);
11945     cmovI_reg_g(dst, src, cr);
11946   %}
11947 %}
11948 
11949 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11950 %{
11951   effect(USE_DEF dst, USE src, USE cr);
11952 
11953   format %{ "cmovllt $dst, $src\t# max" %}
11954   opcode(0x0F, 0x4C);
11955   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11956   ins_pipe(pipe_cmov_reg);
11957 %}
11958 
11959 
11960 instruct maxI_rReg(rRegI dst, rRegI src)
11961 %{
11962   match(Set dst (MaxI dst src));
11963 
11964   ins_cost(200);
11965   expand %{
11966     rFlagsReg cr;
11967     compI_rReg(cr, dst, src);
11968     cmovI_reg_l(dst, src, cr);
11969   %}
11970 %}
11971 
11972 // ============================================================================
11973 // Branch Instructions
11974 
11975 // Jump Direct - Label defines a relative address from JMP+1
11976 instruct jmpDir(label labl)
11977 %{
11978   match(Goto);
11979   effect(USE labl);
11980 
11981   ins_cost(300);
11982   format %{ "jmp     $labl" %}
11983   size(5);
11984   ins_encode %{
11985     Label* L = $labl$$label;
11986     __ jmp(*L, false); // Always long jump
11987   %}
11988   ins_pipe(pipe_jmp);
11989 %}
11990 
11991 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11992 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11993 %{
11994   match(If cop cr);
11995   effect(USE labl);
11996 
11997   ins_cost(300);
11998   format %{ "j$cop     $labl" %}
11999   size(6);
12000   ins_encode %{
12001     Label* L = $labl$$label;
12002     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12003   %}
12004   ins_pipe(pipe_jcc);
12005 %}
12006 
12007 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12008 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12009 %{
12010   match(CountedLoopEnd cop cr);
12011   effect(USE labl);
12012 
12013   ins_cost(300);
12014   format %{ "j$cop     $labl\t# loop end" %}
12015   size(6);
12016   ins_encode %{
12017     Label* L = $labl$$label;
12018     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12019   %}
12020   ins_pipe(pipe_jcc);
12021 %}
12022 
12023 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12024 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12025   match(CountedLoopEnd cop cmp);
12026   effect(USE labl);
12027 
12028   ins_cost(300);
12029   format %{ "j$cop,u   $labl\t# loop end" %}
12030   size(6);
12031   ins_encode %{
12032     Label* L = $labl$$label;
12033     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12034   %}
12035   ins_pipe(pipe_jcc);
12036 %}
12037 
12038 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12039   match(CountedLoopEnd cop cmp);
12040   effect(USE labl);
12041 
12042   ins_cost(200);
12043   format %{ "j$cop,u   $labl\t# loop end" %}
12044   size(6);
12045   ins_encode %{
12046     Label* L = $labl$$label;
12047     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12048   %}
12049   ins_pipe(pipe_jcc);
12050 %}
12051 
12052 // Jump Direct Conditional - using unsigned comparison
12053 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12054   match(If cop cmp);
12055   effect(USE labl);
12056 
12057   ins_cost(300);
12058   format %{ "j$cop,u  $labl" %}
12059   size(6);
12060   ins_encode %{
12061     Label* L = $labl$$label;
12062     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12063   %}
12064   ins_pipe(pipe_jcc);
12065 %}
12066 
12067 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12068   match(If cop cmp);
12069   effect(USE labl);
12070 
12071   ins_cost(200);
12072   format %{ "j$cop,u  $labl" %}
12073   size(6);
12074   ins_encode %{
12075     Label* L = $labl$$label;
12076     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12077   %}
12078   ins_pipe(pipe_jcc);
12079 %}
12080 
12081 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12082   match(If cop cmp);
12083   effect(USE labl);
12084 
12085   ins_cost(200);
12086   format %{ $$template
12087     if ($cop$$cmpcode == Assembler::notEqual) {
12088       $$emit$$"jp,u   $labl\n\t"
12089       $$emit$$"j$cop,u   $labl"
12090     } else {
12091       $$emit$$"jp,u   done\n\t"
12092       $$emit$$"j$cop,u   $labl\n\t"
12093       $$emit$$"done:"
12094     }
12095   %}
12096   ins_encode %{
12097     Label* l = $labl$$label;
12098     if ($cop$$cmpcode == Assembler::notEqual) {
12099       __ jcc(Assembler::parity, *l, false);
12100       __ jcc(Assembler::notEqual, *l, false);
12101     } else if ($cop$$cmpcode == Assembler::equal) {
12102       Label done;
12103       __ jccb(Assembler::parity, done);
12104       __ jcc(Assembler::equal, *l, false);
12105       __ bind(done);
12106     } else {
12107        ShouldNotReachHere();
12108     }
12109   %}
12110   ins_pipe(pipe_jcc);
12111 %}
12112 
12113 // ============================================================================
12114 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12115 // superklass array for an instance of the superklass.  Set a hidden
12116 // internal cache on a hit (cache is checked with exposed code in
12117 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12118 // encoding ALSO sets flags.
12119 
12120 instruct partialSubtypeCheck(rdi_RegP result,
12121                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12122                              rFlagsReg cr)
12123 %{
12124   match(Set result (PartialSubtypeCheck sub super));
12125   effect(KILL rcx, KILL cr);
12126 
12127   ins_cost(1100);  // slightly larger than the next version
12128   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12129             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12130             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12131             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12132             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12133             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12134             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12135     "miss:\t" %}
12136 
12137   opcode(0x1); // Force a XOR of RDI
12138   ins_encode(enc_PartialSubtypeCheck());
12139   ins_pipe(pipe_slow);
12140 %}
12141 
12142 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12143                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12144                                      immP0 zero,
12145                                      rdi_RegP result)
12146 %{
12147   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12148   effect(KILL rcx, KILL result);
12149 
12150   ins_cost(1000);
12151   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12152             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12153             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12154             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12155             "jne,s   miss\t\t# Missed: flags nz\n\t"
12156             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12157     "miss:\t" %}
12158 
12159   opcode(0x0); // No need to XOR RDI
12160   ins_encode(enc_PartialSubtypeCheck());
12161   ins_pipe(pipe_slow);
12162 %}
12163 
12164 // ============================================================================
12165 // Branch Instructions -- short offset versions
12166 //
12167 // These instructions are used to replace jumps of a long offset (the default
12168 // match) with jumps of a shorter offset.  These instructions are all tagged
12169 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12170 // match rules in general matching.  Instead, the ADLC generates a conversion
12171 // method in the MachNode which can be used to do in-place replacement of the
12172 // long variant with the shorter variant.  The compiler will determine if a
12173 // branch can be taken by the is_short_branch_offset() predicate in the machine
12174 // specific code section of the file.
12175 
12176 // Jump Direct - Label defines a relative address from JMP+1
12177 instruct jmpDir_short(label labl) %{
12178   match(Goto);
12179   effect(USE labl);
12180 
12181   ins_cost(300);
12182   format %{ "jmp,s   $labl" %}
12183   size(2);
12184   ins_encode %{
12185     Label* L = $labl$$label;
12186     __ jmpb(*L);
12187   %}
12188   ins_pipe(pipe_jmp);
12189   ins_short_branch(1);
12190 %}
12191 
12192 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12193 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12194   match(If cop cr);
12195   effect(USE labl);
12196 
12197   ins_cost(300);
12198   format %{ "j$cop,s   $labl" %}
12199   size(2);
12200   ins_encode %{
12201     Label* L = $labl$$label;
12202     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12203   %}
12204   ins_pipe(pipe_jcc);
12205   ins_short_branch(1);
12206 %}
12207 
12208 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12209 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12210   match(CountedLoopEnd cop cr);
12211   effect(USE labl);
12212 
12213   ins_cost(300);
12214   format %{ "j$cop,s   $labl\t# loop end" %}
12215   size(2);
12216   ins_encode %{
12217     Label* L = $labl$$label;
12218     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12219   %}
12220   ins_pipe(pipe_jcc);
12221   ins_short_branch(1);
12222 %}
12223 
12224 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12225 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12226   match(CountedLoopEnd cop cmp);
12227   effect(USE labl);
12228 
12229   ins_cost(300);
12230   format %{ "j$cop,us  $labl\t# loop end" %}
12231   size(2);
12232   ins_encode %{
12233     Label* L = $labl$$label;
12234     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12235   %}
12236   ins_pipe(pipe_jcc);
12237   ins_short_branch(1);
12238 %}
12239 
12240 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12241   match(CountedLoopEnd cop cmp);
12242   effect(USE labl);
12243 
12244   ins_cost(300);
12245   format %{ "j$cop,us  $labl\t# loop end" %}
12246   size(2);
12247   ins_encode %{
12248     Label* L = $labl$$label;
12249     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12250   %}
12251   ins_pipe(pipe_jcc);
12252   ins_short_branch(1);
12253 %}
12254 
12255 // Jump Direct Conditional - using unsigned comparison
12256 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12257   match(If cop cmp);
12258   effect(USE labl);
12259 
12260   ins_cost(300);
12261   format %{ "j$cop,us  $labl" %}
12262   size(2);
12263   ins_encode %{
12264     Label* L = $labl$$label;
12265     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12266   %}
12267   ins_pipe(pipe_jcc);
12268   ins_short_branch(1);
12269 %}
12270 
12271 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12272   match(If cop cmp);
12273   effect(USE labl);
12274 
12275   ins_cost(300);
12276   format %{ "j$cop,us  $labl" %}
12277   size(2);
12278   ins_encode %{
12279     Label* L = $labl$$label;
12280     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12281   %}
12282   ins_pipe(pipe_jcc);
12283   ins_short_branch(1);
12284 %}
12285 
12286 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12287   match(If cop cmp);
12288   effect(USE labl);
12289 
12290   ins_cost(300);
12291   format %{ $$template
12292     if ($cop$$cmpcode == Assembler::notEqual) {
12293       $$emit$$"jp,u,s   $labl\n\t"
12294       $$emit$$"j$cop,u,s   $labl"
12295     } else {
12296       $$emit$$"jp,u,s   done\n\t"
12297       $$emit$$"j$cop,u,s  $labl\n\t"
12298       $$emit$$"done:"
12299     }
12300   %}
12301   size(4);
12302   ins_encode %{
12303     Label* l = $labl$$label;
12304     if ($cop$$cmpcode == Assembler::notEqual) {
12305       __ jccb(Assembler::parity, *l);
12306       __ jccb(Assembler::notEqual, *l);
12307     } else if ($cop$$cmpcode == Assembler::equal) {
12308       Label done;
12309       __ jccb(Assembler::parity, done);
12310       __ jccb(Assembler::equal, *l);
12311       __ bind(done);
12312     } else {
12313        ShouldNotReachHere();
12314     }
12315   %}
12316   ins_pipe(pipe_jcc);
12317   ins_short_branch(1);
12318 %}
12319 
12320 // ============================================================================
12321 // inlined locking and unlocking
12322 
12323 instruct cmpFastLock(rFlagsReg cr,
12324                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12325 %{
12326   match(Set cr (FastLock object box));
12327   effect(TEMP tmp, TEMP scr);
12328 
12329   ins_cost(300);
12330   format %{ "fastlock $object,$box,$tmp,$scr" %}
12331   ins_encode(Fast_Lock(object, box, tmp, scr));
12332   ins_pipe(pipe_slow);
12333 %}
12334 
12335 instruct cmpFastUnlock(rFlagsReg cr,
12336                        rRegP object, rax_RegP box, rRegP tmp)
12337 %{
12338   match(Set cr (FastUnlock object box));
12339   effect(TEMP tmp);
12340 
12341   ins_cost(300);
12342   format %{ "fastunlock $object, $box, $tmp" %}
12343   ins_encode(Fast_Unlock(object, box, tmp));
12344   ins_pipe(pipe_slow);
12345 %}
12346 
12347 
12348 // ============================================================================
12349 // Safepoint Instructions
12350 instruct safePoint_poll(rFlagsReg cr)
12351 %{
12352   predicate(!Assembler::is_polling_page_far());
12353   match(SafePoint);
12354   effect(KILL cr);
12355 
12356   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12357             "# Safepoint: poll for GC" %}
12358   ins_cost(125);
12359   ins_encode %{
12360     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12361     __ testl(rax, addr);
12362   %}
12363   ins_pipe(ialu_reg_mem);
12364 %}
12365 
12366 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12367 %{
12368   predicate(Assembler::is_polling_page_far());
12369   match(SafePoint poll);
12370   effect(KILL cr, USE poll);
12371 
12372   format %{ "testl  rax, [$poll]\t"
12373             "# Safepoint: poll for GC" %}
12374   ins_cost(125);
12375   ins_encode %{
12376     __ relocate(relocInfo::poll_type);
12377     __ testl(rax, Address($poll$$Register, 0));
12378   %}
12379   ins_pipe(ialu_reg_mem);
12380 %}
12381 
12382 // ============================================================================
12383 // Procedure Call/Return Instructions
12384 // Call Java Static Instruction
12385 // Note: If this code changes, the corresponding ret_addr_offset() and
12386 //       compute_padding() functions will have to be adjusted.
12387 instruct CallStaticJavaDirect(method meth) %{
12388   match(CallStaticJava);
12389   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12390   effect(USE meth);
12391 
12392   ins_cost(300);
12393   format %{ "call,static " %}
12394   opcode(0xE8); /* E8 cd */
12395   ins_encode(Java_Static_Call(meth), call_epilog);
12396   ins_pipe(pipe_slow);
12397   ins_alignment(4);
12398 %}
12399 
12400 // Call Java Static Instruction (method handle version)
12401 // Note: If this code changes, the corresponding ret_addr_offset() and
12402 //       compute_padding() functions will have to be adjusted.
12403 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12404   match(CallStaticJava);
12405   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12406   effect(USE meth);
12407   // RBP is saved by all callees (for interpreter stack correction).
12408   // We use it here for a similar purpose, in {preserve,restore}_SP.
12409 
12410   ins_cost(300);
12411   format %{ "call,static/MethodHandle " %}
12412   opcode(0xE8); /* E8 cd */
12413   ins_encode(preserve_SP,
12414              Java_Static_Call(meth),
12415              restore_SP,
12416              call_epilog);
12417   ins_pipe(pipe_slow);
12418   ins_alignment(4);
12419 %}
12420 
12421 // Call Java Dynamic Instruction
12422 // Note: If this code changes, the corresponding ret_addr_offset() and
12423 //       compute_padding() functions will have to be adjusted.
12424 instruct CallDynamicJavaDirect(method meth)
12425 %{
12426   match(CallDynamicJava);
12427   effect(USE meth);
12428 
12429   ins_cost(300);
12430   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12431             "call,dynamic " %}
12432   opcode(0xE8); /* E8 cd */
12433   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12434   ins_pipe(pipe_slow);
12435   ins_alignment(4);
12436 %}
12437 
12438 // Call Runtime Instruction
12439 instruct CallRuntimeDirect(method meth)
12440 %{
12441   match(CallRuntime);
12442   effect(USE meth);
12443 
12444   ins_cost(300);
12445   format %{ "call,runtime " %}
12446   opcode(0xE8); /* E8 cd */
12447   ins_encode(Java_To_Runtime(meth));
12448   ins_pipe(pipe_slow);
12449 %}
12450 
12451 // Call runtime without safepoint
12452 instruct CallLeafDirect(method meth)
12453 %{
12454   match(CallLeaf);
12455   effect(USE meth);
12456 
12457   ins_cost(300);
12458   format %{ "call_leaf,runtime " %}
12459   opcode(0xE8); /* E8 cd */
12460   ins_encode(Java_To_Runtime(meth));
12461   ins_pipe(pipe_slow);
12462 %}
12463 
12464 // Call runtime without safepoint
12465 instruct CallLeafNoFPDirect(method meth)
12466 %{
12467   match(CallLeafNoFP);
12468   effect(USE meth);
12469 
12470   ins_cost(300);
12471   format %{ "call_leaf_nofp,runtime " %}
12472   opcode(0xE8); /* E8 cd */
12473   ins_encode(Java_To_Runtime(meth));
12474   ins_pipe(pipe_slow);
12475 %}
12476 
12477 // Return Instruction
12478 // Remove the return address & jump to it.
12479 // Notice: We always emit a nop after a ret to make sure there is room
12480 // for safepoint patching
12481 instruct Ret()
12482 %{
12483   match(Return);
12484 
12485   format %{ "ret" %}
12486   opcode(0xC3);
12487   ins_encode(OpcP);
12488   ins_pipe(pipe_jmp);
12489 %}
12490 
12491 // Tail Call; Jump from runtime stub to Java code.
12492 // Also known as an 'interprocedural jump'.
12493 // Target of jump will eventually return to caller.
12494 // TailJump below removes the return address.
12495 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12496 %{
12497   match(TailCall jump_target method_oop);
12498 
12499   ins_cost(300);
12500   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12501   opcode(0xFF, 0x4); /* Opcode FF /4 */
12502   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12503   ins_pipe(pipe_jmp);
12504 %}
12505 
12506 // Tail Jump; remove the return address; jump to target.
12507 // TailCall above leaves the return address around.
12508 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12509 %{
12510   match(TailJump jump_target ex_oop);
12511 
12512   ins_cost(300);
12513   format %{ "popq    rdx\t# pop return address\n\t"
12514             "jmp     $jump_target" %}
12515   opcode(0xFF, 0x4); /* Opcode FF /4 */
12516   ins_encode(Opcode(0x5a), // popq rdx
12517              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12518   ins_pipe(pipe_jmp);
12519 %}
12520 
12521 // Create exception oop: created by stack-crawling runtime code.
12522 // Created exception is now available to this handler, and is setup
12523 // just prior to jumping to this handler.  No code emitted.
12524 instruct CreateException(rax_RegP ex_oop)
12525 %{
12526   match(Set ex_oop (CreateEx));
12527 
12528   size(0);
12529   // use the following format syntax
12530   format %{ "# exception oop is in rax; no code emitted" %}
12531   ins_encode();
12532   ins_pipe(empty);
12533 %}
12534 
12535 // Rethrow exception:
12536 // The exception oop will come in the first argument position.
12537 // Then JUMP (not call) to the rethrow stub code.
12538 instruct RethrowException()
12539 %{
12540   match(Rethrow);
12541 
12542   // use the following format syntax
12543   format %{ "jmp     rethrow_stub" %}
12544   ins_encode(enc_rethrow);
12545   ins_pipe(pipe_jmp);
12546 %}
12547 
12548 
12549 //----------PEEPHOLE RULES-----------------------------------------------------
12550 // These must follow all instruction definitions as they use the names
12551 // defined in the instructions definitions.
12552 //
12553 // peepmatch ( root_instr_name [preceding_instruction]* );
12554 //
12555 // peepconstraint %{
12556 // (instruction_number.operand_name relational_op instruction_number.operand_name
12557 //  [, ...] );
12558 // // instruction numbers are zero-based using left to right order in peepmatch
12559 //
12560 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12561 // // provide an instruction_number.operand_name for each operand that appears
12562 // // in the replacement instruction's match rule
12563 //
12564 // ---------VM FLAGS---------------------------------------------------------
12565 //
12566 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12567 //
12568 // Each peephole rule is given an identifying number starting with zero and
12569 // increasing by one in the order seen by the parser.  An individual peephole
12570 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12571 // on the command-line.
12572 //
12573 // ---------CURRENT LIMITATIONS----------------------------------------------
12574 //
12575 // Only match adjacent instructions in same basic block
12576 // Only equality constraints
12577 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12578 // Only one replacement instruction
12579 //
12580 // ---------EXAMPLE----------------------------------------------------------
12581 //
12582 // // pertinent parts of existing instructions in architecture description
12583 // instruct movI(rRegI dst, rRegI src)
12584 // %{
12585 //   match(Set dst (CopyI src));
12586 // %}
12587 //
12588 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12589 // %{
12590 //   match(Set dst (AddI dst src));
12591 //   effect(KILL cr);
12592 // %}
12593 //
12594 // // Change (inc mov) to lea
12595 // peephole %{
12596 //   // increment preceeded by register-register move
12597 //   peepmatch ( incI_rReg movI );
12598 //   // require that the destination register of the increment
12599 //   // match the destination register of the move
12600 //   peepconstraint ( 0.dst == 1.dst );
12601 //   // construct a replacement instruction that sets
12602 //   // the destination to ( move's source register + one )
12603 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12604 // %}
12605 //
12606 
12607 // Implementation no longer uses movX instructions since
12608 // machine-independent system no longer uses CopyX nodes.
12609 //
12610 // peephole
12611 // %{
12612 //   peepmatch (incI_rReg movI);
12613 //   peepconstraint (0.dst == 1.dst);
12614 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12615 // %}
12616 
12617 // peephole
12618 // %{
12619 //   peepmatch (decI_rReg movI);
12620 //   peepconstraint (0.dst == 1.dst);
12621 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12622 // %}
12623 
12624 // peephole
12625 // %{
12626 //   peepmatch (addI_rReg_imm movI);
12627 //   peepconstraint (0.dst == 1.dst);
12628 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12629 // %}
12630 
12631 // peephole
12632 // %{
12633 //   peepmatch (incL_rReg movL);
12634 //   peepconstraint (0.dst == 1.dst);
12635 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12636 // %}
12637 
12638 // peephole
12639 // %{
12640 //   peepmatch (decL_rReg movL);
12641 //   peepconstraint (0.dst == 1.dst);
12642 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12643 // %}
12644 
12645 // peephole
12646 // %{
12647 //   peepmatch (addL_rReg_imm movL);
12648 //   peepconstraint (0.dst == 1.dst);
12649 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12650 // %}
12651 
12652 // peephole
12653 // %{
12654 //   peepmatch (addP_rReg_imm movP);
12655 //   peepconstraint (0.dst == 1.dst);
12656 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12657 // %}
12658 
12659 // // Change load of spilled value to only a spill
12660 // instruct storeI(memory mem, rRegI src)
12661 // %{
12662 //   match(Set mem (StoreI mem src));
12663 // %}
12664 //
12665 // instruct loadI(rRegI dst, memory mem)
12666 // %{
12667 //   match(Set dst (LoadI mem));
12668 // %}
12669 //
12670 
12671 peephole
12672 %{
12673   peepmatch (loadI storeI);
12674   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12675   peepreplace (storeI(1.mem 1.mem 1.src));
12676 %}
12677 
12678 peephole
12679 %{
12680   peepmatch (loadL storeL);
12681   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12682   peepreplace (storeL(1.mem 1.mem 1.src));
12683 %}
12684 
12685 //----------SMARTSPILL RULES---------------------------------------------------
12686 // These must follow all instruction definitions as they use the names
12687 // defined in the instructions definitions.