1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 // This could be in MacroAssembler but it's fairly C2 specific
 834 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 835   Label exit;
 836   __ jccb(Assembler::noParity, exit);
 837   __ pushf();
 838   __ andq(Address(rsp, 0), 0xffffff2b);
 839   __ popf();
 840   __ bind(exit);
 841   __ nop(); // (target for branch to avoid branch to branch)
 842 }
 843 
 844 
 845 //=============================================================================
 846 const bool Matcher::constant_table_absolute_addressing = true;
 847 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 848 
 849 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 850   // Empty encoding
 851 }
 852 
 853 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 854   return 0;
 855 }
 856 
 857 #ifndef PRODUCT
 858 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 859   st->print("# MachConstantBaseNode (empty encoding)");
 860 }
 861 #endif
 862 
 863 
 864 //=============================================================================
 865 #ifndef PRODUCT
 866 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 867 {
 868   Compile* C = ra_->C;
 869 
 870   int framesize = C->frame_slots() << LogBytesPerInt;
 871   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 872   // Remove wordSize for return adr already pushed
 873   // and another for the RBP we are going to save
 874   framesize -= 2*wordSize;
 875   bool need_nop = true;
 876 
 877   // Calls to C2R adapters often do not accept exceptional returns.
 878   // We require that their callers must bang for them.  But be
 879   // careful, because some VM calls (such as call site linkage) can
 880   // use several kilobytes of stack.  But the stack safety zone should
 881   // account for that.  See bugs 4446381, 4468289, 4497237.
 882   if (C->need_stack_bang(framesize)) {
 883     st->print_cr("# stack bang"); st->print("\t");
 884     need_nop = false;
 885   }
 886   st->print_cr("pushq   rbp"); st->print("\t");
 887 
 888   if (VerifyStackAtCalls) {
 889     // Majik cookie to verify stack depth
 890     st->print_cr("pushq   0xffffffffbadb100d"
 891                   "\t# Majik cookie for stack depth check");
 892     st->print("\t");
 893     framesize -= wordSize; // Remove 2 for cookie
 894     need_nop = false;
 895   }
 896 
 897   if (framesize) {
 898     st->print("subq    rsp, #%d\t# Create frame", framesize);
 899     if (framesize < 0x80 && need_nop) {
 900       st->print("\n\tnop\t# nop for patch_verified_entry");
 901     }
 902   }
 903 }
 904 #endif
 905 
 906 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 907 {
 908   Compile* C = ra_->C;
 909 
 910   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 911   // NativeJump::patch_verified_entry will be able to patch out the entry
 912   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 913   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 914   // 6 bytes. So if we don't do the fldcw or the push then we must
 915   // use the 6 byte frame allocation even if we have no frame. :-(
 916   // If method sets FPU control word do it now
 917 
 918   int framesize = C->frame_slots() << LogBytesPerInt;
 919   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 920   // Remove wordSize for return adr already pushed
 921   // and another for the RBP we are going to save
 922   framesize -= 2*wordSize;
 923   bool need_nop = true;
 924 
 925   // Calls to C2R adapters often do not accept exceptional returns.
 926   // We require that their callers must bang for them.  But be
 927   // careful, because some VM calls (such as call site linkage) can
 928   // use several kilobytes of stack.  But the stack safety zone should
 929   // account for that.  See bugs 4446381, 4468289, 4497237.
 930   if (C->need_stack_bang(framesize)) {
 931     MacroAssembler masm(&cbuf);
 932     masm.generate_stack_overflow_check(framesize);
 933     need_nop = false;
 934   }
 935 
 936   // We always push rbp so that on return to interpreter rbp will be
 937   // restored correctly and we can correct the stack.
 938   emit_opcode(cbuf, 0x50 | RBP_enc);
 939 
 940   if (VerifyStackAtCalls) {
 941     // Majik cookie to verify stack depth
 942     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 943     emit_d32(cbuf, 0xbadb100d);
 944     framesize -= wordSize; // Remove 2 for cookie
 945     need_nop = false;
 946   }
 947 
 948   if (framesize) {
 949     emit_opcode(cbuf, Assembler::REX_W);
 950     if (framesize < 0x80) {
 951       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 952       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 953       emit_d8(cbuf, framesize);
 954       if (need_nop) {
 955         emit_opcode(cbuf, 0x90); // nop
 956       }
 957     } else {
 958       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 959       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 960       emit_d32(cbuf, framesize);
 961     }
 962   }
 963 
 964   C->set_frame_complete(cbuf.insts_size());
 965 
 966 #ifdef ASSERT
 967   if (VerifyStackAtCalls) {
 968     Label L;
 969     MacroAssembler masm(&cbuf);
 970     masm.push(rax);
 971     masm.mov(rax, rsp);
 972     masm.andptr(rax, StackAlignmentInBytes-1);
 973     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 974     masm.pop(rax);
 975     masm.jcc(Assembler::equal, L);
 976     masm.stop("Stack is not properly aligned!");
 977     masm.bind(L);
 978   }
 979 #endif
 980 }
 981 
 982 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 983 {
 984   return MachNode::size(ra_); // too many variables; just compute it
 985                               // the hard way
 986 }
 987 
 988 int MachPrologNode::reloc() const
 989 {
 990   return 0; // a large enough number
 991 }
 992 
 993 //=============================================================================
 994 #ifndef PRODUCT
 995 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 996 {
 997   Compile* C = ra_->C;
 998   int framesize = C->frame_slots() << LogBytesPerInt;
 999   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1000   // Remove word for return adr already pushed
1001   // and RBP
1002   framesize -= 2*wordSize;
1003 
1004   if (framesize) {
1005     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
1006     st->print("\t");
1007   }
1008 
1009   st->print_cr("popq   rbp");
1010   if (do_polling() && C->is_method_compilation()) {
1011     st->print("\t");
1012     if (Assembler::is_polling_page_far()) {
1013       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1014                    "testl  rax, [rscratch1]\t"
1015                    "# Safepoint: poll for GC");
1016     } else {
1017       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1018                    "# Safepoint: poll for GC");
1019     }
1020   }
1021 }
1022 #endif
1023 
1024 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1025 {
1026   Compile* C = ra_->C;
1027   int framesize = C->frame_slots() << LogBytesPerInt;
1028   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1029   // Remove word for return adr already pushed
1030   // and RBP
1031   framesize -= 2*wordSize;
1032 
1033   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1034 
1035   if (framesize) {
1036     emit_opcode(cbuf, Assembler::REX_W);
1037     if (framesize < 0x80) {
1038       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1039       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1040       emit_d8(cbuf, framesize);
1041     } else {
1042       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1043       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1044       emit_d32(cbuf, framesize);
1045     }
1046   }
1047 
1048   // popq rbp
1049   emit_opcode(cbuf, 0x58 | RBP_enc);
1050 
1051   if (do_polling() && C->is_method_compilation()) {
1052     MacroAssembler _masm(&cbuf);
1053     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1054     if (Assembler::is_polling_page_far()) {
1055       __ lea(rscratch1, polling_page);
1056       __ relocate(relocInfo::poll_return_type);
1057       __ testl(rax, Address(rscratch1, 0));
1058     } else {
1059       __ testl(rax, polling_page);
1060     }
1061   }
1062 }
1063 
1064 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1065 {
1066   return MachNode::size(ra_); // too many variables; just compute it
1067                               // the hard way
1068 }
1069 
1070 int MachEpilogNode::reloc() const
1071 {
1072   return 2; // a large enough number
1073 }
1074 
1075 const Pipeline* MachEpilogNode::pipeline() const
1076 {
1077   return MachNode::pipeline_class();
1078 }
1079 
1080 int MachEpilogNode::safepoint_offset() const
1081 {
1082   return 0;
1083 }
1084 
1085 //=============================================================================
1086 
1087 enum RC {
1088   rc_bad,
1089   rc_int,
1090   rc_float,
1091   rc_stack
1092 };
1093 
1094 static enum RC rc_class(OptoReg::Name reg)
1095 {
1096   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1097 
1098   if (OptoReg::is_stack(reg)) return rc_stack;
1099 
1100   VMReg r = OptoReg::as_VMReg(reg);
1101 
1102   if (r->is_Register()) return rc_int;
1103 
1104   assert(r->is_XMMRegister(), "must be");
1105   return rc_float;
1106 }
1107 
1108 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1109                                        PhaseRegAlloc* ra_,
1110                                        bool do_size,
1111                                        outputStream* st) const
1112 {
1113 
1114   // Get registers to move
1115   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1116   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1117   OptoReg::Name dst_second = ra_->get_reg_second(this);
1118   OptoReg::Name dst_first = ra_->get_reg_first(this);
1119 
1120   enum RC src_second_rc = rc_class(src_second);
1121   enum RC src_first_rc = rc_class(src_first);
1122   enum RC dst_second_rc = rc_class(dst_second);
1123   enum RC dst_first_rc = rc_class(dst_first);
1124 
1125   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1126          "must move at least 1 register" );
1127 
1128   if (src_first == dst_first && src_second == dst_second) {
1129     // Self copy, no move
1130     return 0;
1131   } else if (src_first_rc == rc_stack) {
1132     // mem ->
1133     if (dst_first_rc == rc_stack) {
1134       // mem -> mem
1135       assert(src_second != dst_first, "overlap");
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int src_offset = ra_->reg2offset(src_first);
1140         int dst_offset = ra_->reg2offset(dst_first);
1141         if (cbuf) {
1142           emit_opcode(*cbuf, 0xFF);
1143           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1144 
1145           emit_opcode(*cbuf, 0x8F);
1146           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1147 
1148 #ifndef PRODUCT
1149         } else if (!do_size) {
1150           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1151                      "popq    [rsp + #%d]",
1152                      src_offset,
1153                      dst_offset);
1154 #endif
1155         }
1156         return
1157           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1158           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1159       } else {
1160         // 32-bit
1161         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1162         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1163         // No pushl/popl, so:
1164         int src_offset = ra_->reg2offset(src_first);
1165         int dst_offset = ra_->reg2offset(dst_first);
1166         if (cbuf) {
1167           emit_opcode(*cbuf, Assembler::REX_W);
1168           emit_opcode(*cbuf, 0x89);
1169           emit_opcode(*cbuf, 0x44);
1170           emit_opcode(*cbuf, 0x24);
1171           emit_opcode(*cbuf, 0xF8);
1172 
1173           emit_opcode(*cbuf, 0x8B);
1174           encode_RegMem(*cbuf,
1175                         RAX_enc,
1176                         RSP_enc, 0x4, 0, src_offset,
1177                         false);
1178 
1179           emit_opcode(*cbuf, 0x89);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, dst_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, Assembler::REX_W);
1186           emit_opcode(*cbuf, 0x8B);
1187           emit_opcode(*cbuf, 0x44);
1188           emit_opcode(*cbuf, 0x24);
1189           emit_opcode(*cbuf, 0xF8);
1190 
1191 #ifndef PRODUCT
1192         } else if (!do_size) {
1193           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1194                      "movl    rax, [rsp + #%d]\n\t"
1195                      "movl    [rsp + #%d], rax\n\t"
1196                      "movq    rax, [rsp - #8]",
1197                      src_offset,
1198                      dst_offset);
1199 #endif
1200         }
1201         return
1202           5 + // movq
1203           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1204           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1205           5; // movq
1206       }
1207     } else if (dst_first_rc == rc_int) {
1208       // mem -> gpr
1209       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1210           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1211         // 64-bit
1212         int offset = ra_->reg2offset(src_first);
1213         if (cbuf) {
1214           if (Matcher::_regEncode[dst_first] < 8) {
1215             emit_opcode(*cbuf, Assembler::REX_W);
1216           } else {
1217             emit_opcode(*cbuf, Assembler::REX_WR);
1218           }
1219           emit_opcode(*cbuf, 0x8B);
1220           encode_RegMem(*cbuf,
1221                         Matcher::_regEncode[dst_first],
1222                         RSP_enc, 0x4, 0, offset,
1223                         false);
1224 #ifndef PRODUCT
1225         } else if (!do_size) {
1226           st->print("movq    %s, [rsp + #%d]\t# spill",
1227                      Matcher::regName[dst_first],
1228                      offset);
1229 #endif
1230         }
1231         return
1232           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1233       } else {
1234         // 32-bit
1235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1237         int offset = ra_->reg2offset(src_first);
1238         if (cbuf) {
1239           if (Matcher::_regEncode[dst_first] >= 8) {
1240             emit_opcode(*cbuf, Assembler::REX_R);
1241           }
1242           emit_opcode(*cbuf, 0x8B);
1243           encode_RegMem(*cbuf,
1244                         Matcher::_regEncode[dst_first],
1245                         RSP_enc, 0x4, 0, offset,
1246                         false);
1247 #ifndef PRODUCT
1248         } else if (!do_size) {
1249           st->print("movl    %s, [rsp + #%d]\t# spill",
1250                      Matcher::regName[dst_first],
1251                      offset);
1252 #endif
1253         }
1254         return
1255           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1256           ((Matcher::_regEncode[dst_first] < 8)
1257            ? 3
1258            : 4); // REX
1259       }
1260     } else if (dst_first_rc == rc_float) {
1261       // mem-> xmm
1262       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1263           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1264         // 64-bit
1265         int offset = ra_->reg2offset(src_first);
1266         if (cbuf) {
1267           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1268           if (Matcher::_regEncode[dst_first] >= 8) {
1269             emit_opcode(*cbuf, Assembler::REX_R);
1270           }
1271           emit_opcode(*cbuf, 0x0F);
1272           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1273           encode_RegMem(*cbuf,
1274                         Matcher::_regEncode[dst_first],
1275                         RSP_enc, 0x4, 0, offset,
1276                         false);
1277 #ifndef PRODUCT
1278         } else if (!do_size) {
1279           st->print("%s  %s, [rsp + #%d]\t# spill",
1280                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1281                      Matcher::regName[dst_first],
1282                      offset);
1283 #endif
1284         }
1285         return
1286           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1287           ((Matcher::_regEncode[dst_first] < 8)
1288            ? 5
1289            : 6); // REX
1290       } else {
1291         // 32-bit
1292         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1293         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1294         int offset = ra_->reg2offset(src_first);
1295         if (cbuf) {
1296           emit_opcode(*cbuf, 0xF3);
1297           if (Matcher::_regEncode[dst_first] >= 8) {
1298             emit_opcode(*cbuf, Assembler::REX_R);
1299           }
1300           emit_opcode(*cbuf, 0x0F);
1301           emit_opcode(*cbuf, 0x10);
1302           encode_RegMem(*cbuf,
1303                         Matcher::_regEncode[dst_first],
1304                         RSP_enc, 0x4, 0, offset,
1305                         false);
1306 #ifndef PRODUCT
1307         } else if (!do_size) {
1308           st->print("movss   %s, [rsp + #%d]\t# spill",
1309                      Matcher::regName[dst_first],
1310                      offset);
1311 #endif
1312         }
1313         return
1314           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1315           ((Matcher::_regEncode[dst_first] < 8)
1316            ? 5
1317            : 6); // REX
1318       }
1319     }
1320   } else if (src_first_rc == rc_int) {
1321     // gpr ->
1322     if (dst_first_rc == rc_stack) {
1323       // gpr -> mem
1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1326         // 64-bit
1327         int offset = ra_->reg2offset(dst_first);
1328         if (cbuf) {
1329           if (Matcher::_regEncode[src_first] < 8) {
1330             emit_opcode(*cbuf, Assembler::REX_W);
1331           } else {
1332             emit_opcode(*cbuf, Assembler::REX_WR);
1333           }
1334           emit_opcode(*cbuf, 0x89);
1335           encode_RegMem(*cbuf,
1336                         Matcher::_regEncode[src_first],
1337                         RSP_enc, 0x4, 0, offset,
1338                         false);
1339 #ifndef PRODUCT
1340         } else if (!do_size) {
1341           st->print("movq    [rsp + #%d], %s\t# spill",
1342                      offset,
1343                      Matcher::regName[src_first]);
1344 #endif
1345         }
1346         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1347       } else {
1348         // 32-bit
1349         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1350         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1351         int offset = ra_->reg2offset(dst_first);
1352         if (cbuf) {
1353           if (Matcher::_regEncode[src_first] >= 8) {
1354             emit_opcode(*cbuf, Assembler::REX_R);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movl    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return
1369           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1370           ((Matcher::_regEncode[src_first] < 8)
1371            ? 3
1372            : 4); // REX
1373       }
1374     } else if (dst_first_rc == rc_int) {
1375       // gpr -> gpr
1376       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1377           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1378         // 64-bit
1379         if (cbuf) {
1380           if (Matcher::_regEncode[dst_first] < 8) {
1381             if (Matcher::_regEncode[src_first] < 8) {
1382               emit_opcode(*cbuf, Assembler::REX_W);
1383             } else {
1384               emit_opcode(*cbuf, Assembler::REX_WB);
1385             }
1386           } else {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_WR);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WRB);
1391             }
1392           }
1393           emit_opcode(*cbuf, 0x8B);
1394           emit_rm(*cbuf, 0x3,
1395                   Matcher::_regEncode[dst_first] & 7,
1396                   Matcher::_regEncode[src_first] & 7);
1397 #ifndef PRODUCT
1398         } else if (!do_size) {
1399           st->print("movq    %s, %s\t# spill",
1400                      Matcher::regName[dst_first],
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404         return 3; // REX
1405       } else {
1406         // 32-bit
1407         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1408         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1409         if (cbuf) {
1410           if (Matcher::_regEncode[dst_first] < 8) {
1411             if (Matcher::_regEncode[src_first] >= 8) {
1412               emit_opcode(*cbuf, Assembler::REX_B);
1413             }
1414           } else {
1415             if (Matcher::_regEncode[src_first] < 8) {
1416               emit_opcode(*cbuf, Assembler::REX_R);
1417             } else {
1418               emit_opcode(*cbuf, Assembler::REX_RB);
1419             }
1420           }
1421           emit_opcode(*cbuf, 0x8B);
1422           emit_rm(*cbuf, 0x3,
1423                   Matcher::_regEncode[dst_first] & 7,
1424                   Matcher::_regEncode[src_first] & 7);
1425 #ifndef PRODUCT
1426         } else if (!do_size) {
1427           st->print("movl    %s, %s\t# spill",
1428                      Matcher::regName[dst_first],
1429                      Matcher::regName[src_first]);
1430 #endif
1431         }
1432         return
1433           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1434           ? 2
1435           : 3; // REX
1436       }
1437     } else if (dst_first_rc == rc_float) {
1438       // gpr -> xmm
1439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1441         // 64-bit
1442         if (cbuf) {
1443           emit_opcode(*cbuf, 0x66);
1444           if (Matcher::_regEncode[dst_first] < 8) {
1445             if (Matcher::_regEncode[src_first] < 8) {
1446               emit_opcode(*cbuf, Assembler::REX_W);
1447             } else {
1448               emit_opcode(*cbuf, Assembler::REX_WB);
1449             }
1450           } else {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_WR);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WRB);
1455             }
1456           }
1457           emit_opcode(*cbuf, 0x0F);
1458           emit_opcode(*cbuf, 0x6E);
1459           emit_rm(*cbuf, 0x3,
1460                   Matcher::_regEncode[dst_first] & 7,
1461                   Matcher::_regEncode[src_first] & 7);
1462 #ifndef PRODUCT
1463         } else if (!do_size) {
1464           st->print("movdq   %s, %s\t# spill",
1465                      Matcher::regName[dst_first],
1466                      Matcher::regName[src_first]);
1467 #endif
1468         }
1469         return 5; // REX
1470       } else {
1471         // 32-bit
1472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1474         if (cbuf) {
1475           emit_opcode(*cbuf, 0x66);
1476           if (Matcher::_regEncode[dst_first] < 8) {
1477             if (Matcher::_regEncode[src_first] >= 8) {
1478               emit_opcode(*cbuf, Assembler::REX_B);
1479             }
1480           } else {
1481             if (Matcher::_regEncode[src_first] < 8) {
1482               emit_opcode(*cbuf, Assembler::REX_R);
1483             } else {
1484               emit_opcode(*cbuf, Assembler::REX_RB);
1485             }
1486           }
1487           emit_opcode(*cbuf, 0x0F);
1488           emit_opcode(*cbuf, 0x6E);
1489           emit_rm(*cbuf, 0x3,
1490                   Matcher::_regEncode[dst_first] & 7,
1491                   Matcher::_regEncode[src_first] & 7);
1492 #ifndef PRODUCT
1493         } else if (!do_size) {
1494           st->print("movdl   %s, %s\t# spill",
1495                      Matcher::regName[dst_first],
1496                      Matcher::regName[src_first]);
1497 #endif
1498         }
1499         return
1500           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1501           ? 4
1502           : 5; // REX
1503       }
1504     }
1505   } else if (src_first_rc == rc_float) {
1506     // xmm ->
1507     if (dst_first_rc == rc_stack) {
1508       // xmm -> mem
1509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1511         // 64-bit
1512         int offset = ra_->reg2offset(dst_first);
1513         if (cbuf) {
1514           emit_opcode(*cbuf, 0xF2);
1515           if (Matcher::_regEncode[src_first] >= 8) {
1516               emit_opcode(*cbuf, Assembler::REX_R);
1517           }
1518           emit_opcode(*cbuf, 0x0F);
1519           emit_opcode(*cbuf, 0x11);
1520           encode_RegMem(*cbuf,
1521                         Matcher::_regEncode[src_first],
1522                         RSP_enc, 0x4, 0, offset,
1523                         false);
1524 #ifndef PRODUCT
1525         } else if (!do_size) {
1526           st->print("movsd   [rsp + #%d], %s\t# spill",
1527                      offset,
1528                      Matcher::regName[src_first]);
1529 #endif
1530         }
1531         return
1532           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1533           ((Matcher::_regEncode[src_first] < 8)
1534            ? 5
1535            : 6); // REX
1536       } else {
1537         // 32-bit
1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1540         int offset = ra_->reg2offset(dst_first);
1541         if (cbuf) {
1542           emit_opcode(*cbuf, 0xF3);
1543           if (Matcher::_regEncode[src_first] >= 8) {
1544               emit_opcode(*cbuf, Assembler::REX_R);
1545           }
1546           emit_opcode(*cbuf, 0x0F);
1547           emit_opcode(*cbuf, 0x11);
1548           encode_RegMem(*cbuf,
1549                         Matcher::_regEncode[src_first],
1550                         RSP_enc, 0x4, 0, offset,
1551                         false);
1552 #ifndef PRODUCT
1553         } else if (!do_size) {
1554           st->print("movss   [rsp + #%d], %s\t# spill",
1555                      offset,
1556                      Matcher::regName[src_first]);
1557 #endif
1558         }
1559         return
1560           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1561           ((Matcher::_regEncode[src_first] < 8)
1562            ? 5
1563            : 6); // REX
1564       }
1565     } else if (dst_first_rc == rc_int) {
1566       // xmm -> gpr
1567       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1568           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1569         // 64-bit
1570         if (cbuf) {
1571           emit_opcode(*cbuf, 0x66);
1572           if (Matcher::_regEncode[dst_first] < 8) {
1573             if (Matcher::_regEncode[src_first] < 8) {
1574               emit_opcode(*cbuf, Assembler::REX_W);
1575             } else {
1576               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1577             }
1578           } else {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WRB);
1583             }
1584           }
1585           emit_opcode(*cbuf, 0x0F);
1586           emit_opcode(*cbuf, 0x7E);
1587           emit_rm(*cbuf, 0x3,
1588                   Matcher::_regEncode[src_first] & 7,
1589                   Matcher::_regEncode[dst_first] & 7);
1590 #ifndef PRODUCT
1591         } else if (!do_size) {
1592           st->print("movdq   %s, %s\t# spill",
1593                      Matcher::regName[dst_first],
1594                      Matcher::regName[src_first]);
1595 #endif
1596         }
1597         return 5; // REX
1598       } else {
1599         // 32-bit
1600         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1601         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1602         if (cbuf) {
1603           emit_opcode(*cbuf, 0x66);
1604           if (Matcher::_regEncode[dst_first] < 8) {
1605             if (Matcher::_regEncode[src_first] >= 8) {
1606               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1607             }
1608           } else {
1609             if (Matcher::_regEncode[src_first] < 8) {
1610               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1611             } else {
1612               emit_opcode(*cbuf, Assembler::REX_RB);
1613             }
1614           }
1615           emit_opcode(*cbuf, 0x0F);
1616           emit_opcode(*cbuf, 0x7E);
1617           emit_rm(*cbuf, 0x3,
1618                   Matcher::_regEncode[src_first] & 7,
1619                   Matcher::_regEncode[dst_first] & 7);
1620 #ifndef PRODUCT
1621         } else if (!do_size) {
1622           st->print("movdl   %s, %s\t# spill",
1623                      Matcher::regName[dst_first],
1624                      Matcher::regName[src_first]);
1625 #endif
1626         }
1627         return
1628           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1629           ? 4
1630           : 5; // REX
1631       }
1632     } else if (dst_first_rc == rc_float) {
1633       // xmm -> xmm
1634       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1635           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1636         // 64-bit
1637         if (cbuf) {
1638           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1639           if (Matcher::_regEncode[dst_first] < 8) {
1640             if (Matcher::_regEncode[src_first] >= 8) {
1641               emit_opcode(*cbuf, Assembler::REX_B);
1642             }
1643           } else {
1644             if (Matcher::_regEncode[src_first] < 8) {
1645               emit_opcode(*cbuf, Assembler::REX_R);
1646             } else {
1647               emit_opcode(*cbuf, Assembler::REX_RB);
1648             }
1649           }
1650           emit_opcode(*cbuf, 0x0F);
1651           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1652           emit_rm(*cbuf, 0x3,
1653                   Matcher::_regEncode[dst_first] & 7,
1654                   Matcher::_regEncode[src_first] & 7);
1655 #ifndef PRODUCT
1656         } else if (!do_size) {
1657           st->print("%s  %s, %s\t# spill",
1658                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1659                      Matcher::regName[dst_first],
1660                      Matcher::regName[src_first]);
1661 #endif
1662         }
1663         return
1664           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1665           ? 4
1666           : 5; // REX
1667       } else {
1668         // 32-bit
1669         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1670         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1671         if (cbuf) {
1672           if (!UseXmmRegToRegMoveAll)
1673             emit_opcode(*cbuf, 0xF3);
1674           if (Matcher::_regEncode[dst_first] < 8) {
1675             if (Matcher::_regEncode[src_first] >= 8) {
1676               emit_opcode(*cbuf, Assembler::REX_B);
1677             }
1678           } else {
1679             if (Matcher::_regEncode[src_first] < 8) {
1680               emit_opcode(*cbuf, Assembler::REX_R);
1681             } else {
1682               emit_opcode(*cbuf, Assembler::REX_RB);
1683             }
1684           }
1685           emit_opcode(*cbuf, 0x0F);
1686           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1687           emit_rm(*cbuf, 0x3,
1688                   Matcher::_regEncode[dst_first] & 7,
1689                   Matcher::_regEncode[src_first] & 7);
1690 #ifndef PRODUCT
1691         } else if (!do_size) {
1692           st->print("%s  %s, %s\t# spill",
1693                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1694                      Matcher::regName[dst_first],
1695                      Matcher::regName[src_first]);
1696 #endif
1697         }
1698         return
1699           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1700           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1701           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1702       }
1703     }
1704   }
1705 
1706   assert(0," foo ");
1707   Unimplemented();
1708 
1709   return 0;
1710 }
1711 
1712 #ifndef PRODUCT
1713 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1714 {
1715   implementation(NULL, ra_, false, st);
1716 }
1717 #endif
1718 
1719 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1720 {
1721   implementation(&cbuf, ra_, false, NULL);
1722 }
1723 
1724 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1725 {
1726   return implementation(NULL, ra_, true, NULL);
1727 }
1728 
1729 //=============================================================================
1730 #ifndef PRODUCT
1731 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1732 {
1733   st->print("nop \t# %d bytes pad for loops and calls", _count);
1734 }
1735 #endif
1736 
1737 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1738 {
1739   MacroAssembler _masm(&cbuf);
1740   __ nop(_count);
1741 }
1742 
1743 uint MachNopNode::size(PhaseRegAlloc*) const
1744 {
1745   return _count;
1746 }
1747 
1748 
1749 //=============================================================================
1750 #ifndef PRODUCT
1751 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1752 {
1753   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1754   int reg = ra_->get_reg_first(this);
1755   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1756             Matcher::regName[reg], offset);
1757 }
1758 #endif
1759 
1760 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1761 {
1762   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1763   int reg = ra_->get_encode(this);
1764   if (offset >= 0x80) {
1765     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1766     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1767     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1768     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1769     emit_d32(cbuf, offset);
1770   } else {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d8(cbuf, offset);
1776   }
1777 }
1778 
1779 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1780 {
1781   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1782   return (offset < 0x80) ? 5 : 8; // REX
1783 }
1784 
1785 //=============================================================================
1786 
1787 // emit call stub, compiled java to interpreter
1788 void emit_java_to_interp(CodeBuffer& cbuf)
1789 {
1790   // Stub is fixed up when the corresponding call is converted from
1791   // calling compiled code to calling interpreted code.
1792   // movq rbx, 0
1793   // jmp -5 # to self
1794 
1795   address mark = cbuf.insts_mark();  // get mark within main instrs section
1796 
1797   // Note that the code buffer's insts_mark is always relative to insts.
1798   // That's why we must use the macroassembler to generate a stub.
1799   MacroAssembler _masm(&cbuf);
1800 
1801   address base =
1802   __ start_a_stub(Compile::MAX_stubs_size);
1803   if (base == NULL)  return;  // CodeBuffer::expand failed
1804   // static stub relocation stores the instruction address of the call
1805   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1806   // static stub relocation also tags the methodOop in the code-stream.
1807   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1808   // This is recognized as unresolved by relocs/nativeinst/ic code
1809   __ jump(RuntimeAddress(__ pc()));
1810 
1811   // Update current stubs pointer and restore insts_end.
1812   __ end_a_stub();
1813 }
1814 
1815 // size of call stub, compiled java to interpretor
1816 uint size_java_to_interp()
1817 {
1818   return 15;  // movq (1+1+8); jmp (1+4)
1819 }
1820 
1821 // relocation entries for call stub, compiled java to interpretor
1822 uint reloc_java_to_interp()
1823 {
1824   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1825 }
1826 
1827 //=============================================================================
1828 #ifndef PRODUCT
1829 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1830 {
1831   if (UseCompressedOops) {
1832     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1833     if (Universe::narrow_oop_shift() != 0) {
1834       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1835     }
1836     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1837   } else {
1838     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1839                  "# Inline cache check");
1840   }
1841   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1842   st->print_cr("\tnop\t# nops to align entry point");
1843 }
1844 #endif
1845 
1846 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1847 {
1848   MacroAssembler masm(&cbuf);
1849   uint insts_size = cbuf.insts_size();
1850   if (UseCompressedOops) {
1851     masm.load_klass(rscratch1, j_rarg0);
1852     masm.cmpptr(rax, rscratch1);
1853   } else {
1854     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1855   }
1856 
1857   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1858 
1859   /* WARNING these NOPs are critical so that verified entry point is properly
1860      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1861   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1862   if (OptoBreakpoint) {
1863     // Leave space for int3
1864     nops_cnt -= 1;
1865   }
1866   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1867   if (nops_cnt > 0)
1868     masm.nop(nops_cnt);
1869 }
1870 
1871 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1872 {
1873   return MachNode::size(ra_); // too many variables; just compute it
1874                               // the hard way
1875 }
1876 
1877 
1878 //=============================================================================
1879 uint size_exception_handler()
1880 {
1881   // NativeCall instruction size is the same as NativeJump.
1882   // Note that this value is also credited (in output.cpp) to
1883   // the size of the code section.
1884   return NativeJump::instruction_size;
1885 }
1886 
1887 // Emit exception handler code.
1888 int emit_exception_handler(CodeBuffer& cbuf)
1889 {
1890 
1891   // Note that the code buffer's insts_mark is always relative to insts.
1892   // That's why we must use the macroassembler to generate a handler.
1893   MacroAssembler _masm(&cbuf);
1894   address base =
1895   __ start_a_stub(size_exception_handler());
1896   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1897   int offset = __ offset();
1898   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1899   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1900   __ end_a_stub();
1901   return offset;
1902 }
1903 
1904 uint size_deopt_handler()
1905 {
1906   // three 5 byte instructions
1907   return 15;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's insts_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_deopt_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   address the_pc = (address) __ pc();
1922   Label next;
1923   // push a "the_pc" on the stack without destroying any registers
1924   // as they all may be live.
1925 
1926   // push address of "next"
1927   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1928   __ bind(next);
1929   // adjust it so it matches "the_pc"
1930   __ subptr(Address(rsp, 0), __ offset() - offset);
1931   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1932   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1933   __ end_a_stub();
1934   return offset;
1935 }
1936 
1937 
1938 const bool Matcher::match_rule_supported(int opcode) {
1939   if (!has_match_rule(opcode))
1940     return false;
1941 
1942   return true;  // Per default match rules are supported.
1943 }
1944 
1945 int Matcher::regnum_to_fpu_offset(int regnum)
1946 {
1947   return regnum - 32; // The FP registers are in the second chunk
1948 }
1949 
1950 // This is UltraSparc specific, true just means we have fast l2f conversion
1951 const bool Matcher::convL2FSupported(void) {
1952   return true;
1953 }
1954 
1955 // Vector width in bytes
1956 const uint Matcher::vector_width_in_bytes(void) {
1957   return 8;
1958 }
1959 
1960 // Vector ideal reg
1961 const uint Matcher::vector_ideal_reg(void) {
1962   return Op_RegD;
1963 }
1964 
1965 // Is this branch offset short enough that a short branch can be used?
1966 //
1967 // NOTE: If the platform does not provide any short branch variants, then
1968 //       this method should return false for offset 0.
1969 bool Matcher::is_short_branch_offset(int rule, int offset) {
1970   // the short version of jmpConUCF2 contains multiple branches,
1971   // making the reach slightly less
1972   if (rule == jmpConUCF2_rule)
1973     return (-126 <= offset && offset <= 125);
1974   return (-128 <= offset && offset <= 127);
1975 }
1976 
1977 const bool Matcher::isSimpleConstant64(jlong value) {
1978   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1979   //return value == (int) value;  // Cf. storeImmL and immL32.
1980 
1981   // Probably always true, even if a temp register is required.
1982   return true;
1983 }
1984 
1985 // The ecx parameter to rep stosq for the ClearArray node is in words.
1986 const bool Matcher::init_array_count_is_in_bytes = false;
1987 
1988 // Threshold size for cleararray.
1989 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1990 
1991 // Should the Matcher clone shifts on addressing modes, expecting them
1992 // to be subsumed into complex addressing expressions or compute them
1993 // into registers?  True for Intel but false for most RISCs
1994 const bool Matcher::clone_shift_expressions = true;
1995 
1996 // Do we need to mask the count passed to shift instructions or does
1997 // the cpu only look at the lower 5/6 bits anyway?
1998 const bool Matcher::need_masked_shift_count = false;
1999 
2000 bool Matcher::narrow_oop_use_complex_address() {
2001   assert(UseCompressedOops, "only for compressed oops code");
2002   return (LogMinObjAlignmentInBytes <= 3);
2003 }
2004 
2005 // Is it better to copy float constants, or load them directly from
2006 // memory?  Intel can load a float constant from a direct address,
2007 // requiring no extra registers.  Most RISCs will have to materialize
2008 // an address into a register first, so they would do better to copy
2009 // the constant from stack.
2010 const bool Matcher::rematerialize_float_constants = true; // XXX
2011 
2012 // If CPU can load and store mis-aligned doubles directly then no
2013 // fixup is needed.  Else we split the double into 2 integer pieces
2014 // and move it piece-by-piece.  Only happens when passing doubles into
2015 // C code as the Java calling convention forces doubles to be aligned.
2016 const bool Matcher::misaligned_doubles_ok = true;
2017 
2018 // No-op on amd64
2019 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2020 
2021 // Advertise here if the CPU requires explicit rounding operations to
2022 // implement the UseStrictFP mode.
2023 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2024 
2025 // Are floats conerted to double when stored to stack during deoptimization?
2026 // On x64 it is stored without convertion so we can use normal access.
2027 bool Matcher::float_in_double() { return false; }
2028 
2029 // Do ints take an entire long register or just half?
2030 const bool Matcher::int_in_long = true;
2031 
2032 // Return whether or not this register is ever used as an argument.
2033 // This function is used on startup to build the trampoline stubs in
2034 // generateOptoStub.  Registers not mentioned will be killed by the VM
2035 // call in the trampoline, and arguments in those registers not be
2036 // available to the callee.
2037 bool Matcher::can_be_java_arg(int reg)
2038 {
2039   return
2040     reg ==  RDI_num || reg ==  RDI_H_num ||
2041     reg ==  RSI_num || reg ==  RSI_H_num ||
2042     reg ==  RDX_num || reg ==  RDX_H_num ||
2043     reg ==  RCX_num || reg ==  RCX_H_num ||
2044     reg ==   R8_num || reg ==   R8_H_num ||
2045     reg ==   R9_num || reg ==   R9_H_num ||
2046     reg ==  R12_num || reg ==  R12_H_num ||
2047     reg == XMM0_num || reg == XMM0_H_num ||
2048     reg == XMM1_num || reg == XMM1_H_num ||
2049     reg == XMM2_num || reg == XMM2_H_num ||
2050     reg == XMM3_num || reg == XMM3_H_num ||
2051     reg == XMM4_num || reg == XMM4_H_num ||
2052     reg == XMM5_num || reg == XMM5_H_num ||
2053     reg == XMM6_num || reg == XMM6_H_num ||
2054     reg == XMM7_num || reg == XMM7_H_num;
2055 }
2056 
2057 bool Matcher::is_spillable_arg(int reg)
2058 {
2059   return can_be_java_arg(reg);
2060 }
2061 
2062 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2063   // In 64 bit mode a code which use multiply when
2064   // devisor is constant is faster than hardware
2065   // DIV instruction (it uses MulHiL).
2066   return false;
2067 }
2068 
2069 // Register for DIVI projection of divmodI
2070 RegMask Matcher::divI_proj_mask() {
2071   return INT_RAX_REG_mask;
2072 }
2073 
2074 // Register for MODI projection of divmodI
2075 RegMask Matcher::modI_proj_mask() {
2076   return INT_RDX_REG_mask;
2077 }
2078 
2079 // Register for DIVL projection of divmodL
2080 RegMask Matcher::divL_proj_mask() {
2081   return LONG_RAX_REG_mask;
2082 }
2083 
2084 // Register for MODL projection of divmodL
2085 RegMask Matcher::modL_proj_mask() {
2086   return LONG_RDX_REG_mask;
2087 }
2088 
2089 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2090   return PTR_RBP_REG_mask;
2091 }
2092 
2093 static Address build_address(int b, int i, int s, int d) {
2094   Register index = as_Register(i);
2095   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2096   if (index == rsp) {
2097     index = noreg;
2098     scale = Address::no_scale;
2099   }
2100   Address addr(as_Register(b), index, scale, d);
2101   return addr;
2102 }
2103 
2104 %}
2105 
2106 //----------ENCODING BLOCK-----------------------------------------------------
2107 // This block specifies the encoding classes used by the compiler to
2108 // output byte streams.  Encoding classes are parameterized macros
2109 // used by Machine Instruction Nodes in order to generate the bit
2110 // encoding of the instruction.  Operands specify their base encoding
2111 // interface with the interface keyword.  There are currently
2112 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2113 // COND_INTER.  REG_INTER causes an operand to generate a function
2114 // which returns its register number when queried.  CONST_INTER causes
2115 // an operand to generate a function which returns the value of the
2116 // constant when queried.  MEMORY_INTER causes an operand to generate
2117 // four functions which return the Base Register, the Index Register,
2118 // the Scale Value, and the Offset Value of the operand when queried.
2119 // COND_INTER causes an operand to generate six functions which return
2120 // the encoding code (ie - encoding bits for the instruction)
2121 // associated with each basic boolean condition for a conditional
2122 // instruction.
2123 //
2124 // Instructions specify two basic values for encoding.  Again, a
2125 // function is available to check if the constant displacement is an
2126 // oop. They use the ins_encode keyword to specify their encoding
2127 // classes (which must be a sequence of enc_class names, and their
2128 // parameters, specified in the encoding block), and they use the
2129 // opcode keyword to specify, in order, their primary, secondary, and
2130 // tertiary opcode.  Only the opcode sections which a particular
2131 // instruction needs for encoding need to be specified.
2132 encode %{
2133   // Build emit functions for each basic byte or larger field in the
2134   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2135   // from C++ code in the enc_class source block.  Emit functions will
2136   // live in the main source block for now.  In future, we can
2137   // generalize this by adding a syntax that specifies the sizes of
2138   // fields in an order, so that the adlc can build the emit functions
2139   // automagically
2140 
2141   // Emit primary opcode
2142   enc_class OpcP
2143   %{
2144     emit_opcode(cbuf, $primary);
2145   %}
2146 
2147   // Emit secondary opcode
2148   enc_class OpcS
2149   %{
2150     emit_opcode(cbuf, $secondary);
2151   %}
2152 
2153   // Emit tertiary opcode
2154   enc_class OpcT
2155   %{
2156     emit_opcode(cbuf, $tertiary);
2157   %}
2158 
2159   // Emit opcode directly
2160   enc_class Opcode(immI d8)
2161   %{
2162     emit_opcode(cbuf, $d8$$constant);
2163   %}
2164 
2165   // Emit size prefix
2166   enc_class SizePrefix
2167   %{
2168     emit_opcode(cbuf, 0x66);
2169   %}
2170 
2171   enc_class reg(rRegI reg)
2172   %{
2173     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2174   %}
2175 
2176   enc_class reg_reg(rRegI dst, rRegI src)
2177   %{
2178     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2179   %}
2180 
2181   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2182   %{
2183     emit_opcode(cbuf, $opcode$$constant);
2184     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2185   %}
2186 
2187   enc_class cmpfp_fixup() %{
2188       MacroAssembler _masm(&cbuf);
2189       emit_cmpfp_fixup(_masm);
2190   %}
2191 
2192   enc_class cmpfp3(rRegI dst)
2193   %{
2194     int dstenc = $dst$$reg;
2195 
2196     // movl $dst, -1
2197     if (dstenc >= 8) {
2198       emit_opcode(cbuf, Assembler::REX_B);
2199     }
2200     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2201     emit_d32(cbuf, -1);
2202 
2203     // jp,s done
2204     emit_opcode(cbuf, 0x7A);
2205     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2206 
2207     // jb,s done
2208     emit_opcode(cbuf, 0x72);
2209     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2210 
2211     // setne $dst
2212     if (dstenc >= 4) {
2213       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2214     }
2215     emit_opcode(cbuf, 0x0F);
2216     emit_opcode(cbuf, 0x95);
2217     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2218 
2219     // movzbl $dst, $dst
2220     if (dstenc >= 4) {
2221       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2222     }
2223     emit_opcode(cbuf, 0x0F);
2224     emit_opcode(cbuf, 0xB6);
2225     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2226   %}
2227 
2228   enc_class cdql_enc(no_rax_rdx_RegI div)
2229   %{
2230     // Full implementation of Java idiv and irem; checks for
2231     // special case as described in JVM spec., p.243 & p.271.
2232     //
2233     //         normal case                           special case
2234     //
2235     // input : rax: dividend                         min_int
2236     //         reg: divisor                          -1
2237     //
2238     // output: rax: quotient  (= rax idiv reg)       min_int
2239     //         rdx: remainder (= rax irem reg)       0
2240     //
2241     //  Code sequnce:
2242     //
2243     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2244     //    5:   75 07/08                jne    e <normal>
2245     //    7:   33 d2                   xor    %edx,%edx
2246     //  [div >= 8 -> offset + 1]
2247     //  [REX_B]
2248     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2249     //    c:   74 03/04                je     11 <done>
2250     // 000000000000000e <normal>:
2251     //    e:   99                      cltd
2252     //  [div >= 8 -> offset + 1]
2253     //  [REX_B]
2254     //    f:   f7 f9                   idiv   $div
2255     // 0000000000000011 <done>:
2256 
2257     // cmp    $0x80000000,%eax
2258     emit_opcode(cbuf, 0x3d);
2259     emit_d8(cbuf, 0x00);
2260     emit_d8(cbuf, 0x00);
2261     emit_d8(cbuf, 0x00);
2262     emit_d8(cbuf, 0x80);
2263 
2264     // jne    e <normal>
2265     emit_opcode(cbuf, 0x75);
2266     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2267 
2268     // xor    %edx,%edx
2269     emit_opcode(cbuf, 0x33);
2270     emit_d8(cbuf, 0xD2);
2271 
2272     // cmp    $0xffffffffffffffff,%ecx
2273     if ($div$$reg >= 8) {
2274       emit_opcode(cbuf, Assembler::REX_B);
2275     }
2276     emit_opcode(cbuf, 0x83);
2277     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2278     emit_d8(cbuf, 0xFF);
2279 
2280     // je     11 <done>
2281     emit_opcode(cbuf, 0x74);
2282     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2283 
2284     // <normal>
2285     // cltd
2286     emit_opcode(cbuf, 0x99);
2287 
2288     // idivl (note: must be emitted by the user of this rule)
2289     // <done>
2290   %}
2291 
2292   enc_class cdqq_enc(no_rax_rdx_RegL div)
2293   %{
2294     // Full implementation of Java ldiv and lrem; checks for
2295     // special case as described in JVM spec., p.243 & p.271.
2296     //
2297     //         normal case                           special case
2298     //
2299     // input : rax: dividend                         min_long
2300     //         reg: divisor                          -1
2301     //
2302     // output: rax: quotient  (= rax idiv reg)       min_long
2303     //         rdx: remainder (= rax irem reg)       0
2304     //
2305     //  Code sequnce:
2306     //
2307     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2308     //    7:   00 00 80
2309     //    a:   48 39 d0                cmp    %rdx,%rax
2310     //    d:   75 08                   jne    17 <normal>
2311     //    f:   33 d2                   xor    %edx,%edx
2312     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2313     //   15:   74 05                   je     1c <done>
2314     // 0000000000000017 <normal>:
2315     //   17:   48 99                   cqto
2316     //   19:   48 f7 f9                idiv   $div
2317     // 000000000000001c <done>:
2318 
2319     // mov    $0x8000000000000000,%rdx
2320     emit_opcode(cbuf, Assembler::REX_W);
2321     emit_opcode(cbuf, 0xBA);
2322     emit_d8(cbuf, 0x00);
2323     emit_d8(cbuf, 0x00);
2324     emit_d8(cbuf, 0x00);
2325     emit_d8(cbuf, 0x00);
2326     emit_d8(cbuf, 0x00);
2327     emit_d8(cbuf, 0x00);
2328     emit_d8(cbuf, 0x00);
2329     emit_d8(cbuf, 0x80);
2330 
2331     // cmp    %rdx,%rax
2332     emit_opcode(cbuf, Assembler::REX_W);
2333     emit_opcode(cbuf, 0x39);
2334     emit_d8(cbuf, 0xD0);
2335 
2336     // jne    17 <normal>
2337     emit_opcode(cbuf, 0x75);
2338     emit_d8(cbuf, 0x08);
2339 
2340     // xor    %edx,%edx
2341     emit_opcode(cbuf, 0x33);
2342     emit_d8(cbuf, 0xD2);
2343 
2344     // cmp    $0xffffffffffffffff,$div
2345     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2346     emit_opcode(cbuf, 0x83);
2347     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2348     emit_d8(cbuf, 0xFF);
2349 
2350     // je     1e <done>
2351     emit_opcode(cbuf, 0x74);
2352     emit_d8(cbuf, 0x05);
2353 
2354     // <normal>
2355     // cqto
2356     emit_opcode(cbuf, Assembler::REX_W);
2357     emit_opcode(cbuf, 0x99);
2358 
2359     // idivq (note: must be emitted by the user of this rule)
2360     // <done>
2361   %}
2362 
2363   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2364   enc_class OpcSE(immI imm)
2365   %{
2366     // Emit primary opcode and set sign-extend bit
2367     // Check for 8-bit immediate, and set sign extend bit in opcode
2368     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2369       emit_opcode(cbuf, $primary | 0x02);
2370     } else {
2371       // 32-bit immediate
2372       emit_opcode(cbuf, $primary);
2373     }
2374   %}
2375 
2376   enc_class OpcSErm(rRegI dst, immI imm)
2377   %{
2378     // OpcSEr/m
2379     int dstenc = $dst$$reg;
2380     if (dstenc >= 8) {
2381       emit_opcode(cbuf, Assembler::REX_B);
2382       dstenc -= 8;
2383     }
2384     // Emit primary opcode and set sign-extend bit
2385     // Check for 8-bit immediate, and set sign extend bit in opcode
2386     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2387       emit_opcode(cbuf, $primary | 0x02);
2388     } else {
2389       // 32-bit immediate
2390       emit_opcode(cbuf, $primary);
2391     }
2392     // Emit r/m byte with secondary opcode, after primary opcode.
2393     emit_rm(cbuf, 0x3, $secondary, dstenc);
2394   %}
2395 
2396   enc_class OpcSErm_wide(rRegL dst, immI imm)
2397   %{
2398     // OpcSEr/m
2399     int dstenc = $dst$$reg;
2400     if (dstenc < 8) {
2401       emit_opcode(cbuf, Assembler::REX_W);
2402     } else {
2403       emit_opcode(cbuf, Assembler::REX_WB);
2404       dstenc -= 8;
2405     }
2406     // Emit primary opcode and set sign-extend bit
2407     // Check for 8-bit immediate, and set sign extend bit in opcode
2408     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2409       emit_opcode(cbuf, $primary | 0x02);
2410     } else {
2411       // 32-bit immediate
2412       emit_opcode(cbuf, $primary);
2413     }
2414     // Emit r/m byte with secondary opcode, after primary opcode.
2415     emit_rm(cbuf, 0x3, $secondary, dstenc);
2416   %}
2417 
2418   enc_class Con8or32(immI imm)
2419   %{
2420     // Check for 8-bit immediate, and set sign extend bit in opcode
2421     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2422       $$$emit8$imm$$constant;
2423     } else {
2424       // 32-bit immediate
2425       $$$emit32$imm$$constant;
2426     }
2427   %}
2428 
2429   enc_class Lbl(label labl)
2430   %{
2431     // GOTO
2432     Label* l = $labl$$label;
2433     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4)));
2434   %}
2435 
2436   enc_class LblShort(label labl)
2437   %{
2438     // GOTO
2439     Label* l = $labl$$label;
2440     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
2441     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2442     emit_d8(cbuf, disp);
2443   %}
2444 
2445   enc_class opc2_reg(rRegI dst)
2446   %{
2447     // BSWAP
2448     emit_cc(cbuf, $secondary, $dst$$reg);
2449   %}
2450 
2451   enc_class opc3_reg(rRegI dst)
2452   %{
2453     // BSWAP
2454     emit_cc(cbuf, $tertiary, $dst$$reg);
2455   %}
2456 
2457   enc_class reg_opc(rRegI div)
2458   %{
2459     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2460     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2461   %}
2462 
2463   enc_class Jcc(cmpOp cop, label labl)
2464   %{
2465     // JCC
2466     Label* l = $labl$$label;
2467     $$$emit8$primary;
2468     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2469     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4)));
2470   %}
2471 
2472   enc_class JccShort (cmpOp cop, label labl)
2473   %{
2474   // JCC
2475     Label *l = $labl$$label;
2476     emit_cc(cbuf, $primary, $cop$$cmpcode);
2477     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
2478     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2479     emit_d8(cbuf, disp);
2480   %}
2481 
2482   enc_class enc_cmov(cmpOp cop)
2483   %{
2484     // CMOV
2485     $$$emit8$primary;
2486     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2487   %}
2488 
2489   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2490   %{
2491     // Invert sense of branch from sense of cmov
2492     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2493     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2494                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2495                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2496     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2497     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2498     if ($dst$$reg < 8) {
2499       if ($src$$reg >= 8) {
2500         emit_opcode(cbuf, Assembler::REX_B);
2501       }
2502     } else {
2503       if ($src$$reg < 8) {
2504         emit_opcode(cbuf, Assembler::REX_R);
2505       } else {
2506         emit_opcode(cbuf, Assembler::REX_RB);
2507       }
2508     }
2509     emit_opcode(cbuf, 0x0F);
2510     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2511     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2512   %}
2513 
2514   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2515   %{
2516     // Invert sense of branch from sense of cmov
2517     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2518     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2519 
2520     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2521     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2522     if ($dst$$reg < 8) {
2523       if ($src$$reg >= 8) {
2524         emit_opcode(cbuf, Assembler::REX_B);
2525       }
2526     } else {
2527       if ($src$$reg < 8) {
2528         emit_opcode(cbuf, Assembler::REX_R);
2529       } else {
2530         emit_opcode(cbuf, Assembler::REX_RB);
2531       }
2532     }
2533     emit_opcode(cbuf, 0x0F);
2534     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2535     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2536   %}
2537 
2538   enc_class enc_PartialSubtypeCheck()
2539   %{
2540     Register Rrdi = as_Register(RDI_enc); // result register
2541     Register Rrax = as_Register(RAX_enc); // super class
2542     Register Rrcx = as_Register(RCX_enc); // killed
2543     Register Rrsi = as_Register(RSI_enc); // sub class
2544     Label miss;
2545     const bool set_cond_codes = true;
2546 
2547     MacroAssembler _masm(&cbuf);
2548     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2549                                      NULL, &miss,
2550                                      /*set_cond_codes:*/ true);
2551     if ($primary) {
2552       __ xorptr(Rrdi, Rrdi);
2553     }
2554     __ bind(miss);
2555   %}
2556 
2557   enc_class Java_To_Interpreter(method meth)
2558   %{
2559     // CALL Java_To_Interpreter
2560     // This is the instruction starting address for relocation info.
2561     cbuf.set_insts_mark();
2562     $$$emit8$primary;
2563     // CALL directly to the runtime
2564     emit_d32_reloc(cbuf,
2565                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2566                    runtime_call_Relocation::spec(),
2567                    RELOC_DISP32);
2568   %}
2569 
2570   enc_class preserve_SP %{
2571     debug_only(int off0 = cbuf.insts_size());
2572     MacroAssembler _masm(&cbuf);
2573     // RBP is preserved across all calls, even compiled calls.
2574     // Use it to preserve RSP in places where the callee might change the SP.
2575     __ movptr(rbp_mh_SP_save, rsp);
2576     debug_only(int off1 = cbuf.insts_size());
2577     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2578   %}
2579 
2580   enc_class restore_SP %{
2581     MacroAssembler _masm(&cbuf);
2582     __ movptr(rsp, rbp_mh_SP_save);
2583   %}
2584 
2585   enc_class Java_Static_Call(method meth)
2586   %{
2587     // JAVA STATIC CALL
2588     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2589     // determine who we intended to call.
2590     cbuf.set_insts_mark();
2591     $$$emit8$primary;
2592 
2593     if (!_method) {
2594       emit_d32_reloc(cbuf,
2595                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2596                      runtime_call_Relocation::spec(),
2597                      RELOC_DISP32);
2598     } else if (_optimized_virtual) {
2599       emit_d32_reloc(cbuf,
2600                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2601                      opt_virtual_call_Relocation::spec(),
2602                      RELOC_DISP32);
2603     } else {
2604       emit_d32_reloc(cbuf,
2605                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2606                      static_call_Relocation::spec(),
2607                      RELOC_DISP32);
2608     }
2609     if (_method) {
2610       // Emit stub for static call
2611       emit_java_to_interp(cbuf);
2612     }
2613   %}
2614 
2615   enc_class Java_Dynamic_Call(method meth)
2616   %{
2617     // JAVA DYNAMIC CALL
2618     // !!!!!
2619     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2620     // emit_call_dynamic_prologue( cbuf );
2621     cbuf.set_insts_mark();
2622 
2623     // movq rax, -1
2624     emit_opcode(cbuf, Assembler::REX_W);
2625     emit_opcode(cbuf, 0xB8 | RAX_enc);
2626     emit_d64_reloc(cbuf,
2627                    (int64_t) Universe::non_oop_word(),
2628                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2629     address virtual_call_oop_addr = cbuf.insts_mark();
2630     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2631     // who we intended to call.
2632     cbuf.set_insts_mark();
2633     $$$emit8$primary;
2634     emit_d32_reloc(cbuf,
2635                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2636                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2637                    RELOC_DISP32);
2638   %}
2639 
2640   enc_class Java_Compiled_Call(method meth)
2641   %{
2642     // JAVA COMPILED CALL
2643     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2644 
2645     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2646     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2647 
2648     // callq *disp(%rax)
2649     cbuf.set_insts_mark();
2650     $$$emit8$primary;
2651     if (disp < 0x80) {
2652       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2653       emit_d8(cbuf, disp); // Displacement
2654     } else {
2655       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2656       emit_d32(cbuf, disp); // Displacement
2657     }
2658   %}
2659 
2660   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2661   %{
2662     // SAL, SAR, SHR
2663     int dstenc = $dst$$reg;
2664     if (dstenc >= 8) {
2665       emit_opcode(cbuf, Assembler::REX_B);
2666       dstenc -= 8;
2667     }
2668     $$$emit8$primary;
2669     emit_rm(cbuf, 0x3, $secondary, dstenc);
2670     $$$emit8$shift$$constant;
2671   %}
2672 
2673   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2674   %{
2675     // SAL, SAR, SHR
2676     int dstenc = $dst$$reg;
2677     if (dstenc < 8) {
2678       emit_opcode(cbuf, Assembler::REX_W);
2679     } else {
2680       emit_opcode(cbuf, Assembler::REX_WB);
2681       dstenc -= 8;
2682     }
2683     $$$emit8$primary;
2684     emit_rm(cbuf, 0x3, $secondary, dstenc);
2685     $$$emit8$shift$$constant;
2686   %}
2687 
2688   enc_class load_immI(rRegI dst, immI src)
2689   %{
2690     int dstenc = $dst$$reg;
2691     if (dstenc >= 8) {
2692       emit_opcode(cbuf, Assembler::REX_B);
2693       dstenc -= 8;
2694     }
2695     emit_opcode(cbuf, 0xB8 | dstenc);
2696     $$$emit32$src$$constant;
2697   %}
2698 
2699   enc_class load_immL(rRegL dst, immL src)
2700   %{
2701     int dstenc = $dst$$reg;
2702     if (dstenc < 8) {
2703       emit_opcode(cbuf, Assembler::REX_W);
2704     } else {
2705       emit_opcode(cbuf, Assembler::REX_WB);
2706       dstenc -= 8;
2707     }
2708     emit_opcode(cbuf, 0xB8 | dstenc);
2709     emit_d64(cbuf, $src$$constant);
2710   %}
2711 
2712   enc_class load_immUL32(rRegL dst, immUL32 src)
2713   %{
2714     // same as load_immI, but this time we care about zeroes in the high word
2715     int dstenc = $dst$$reg;
2716     if (dstenc >= 8) {
2717       emit_opcode(cbuf, Assembler::REX_B);
2718       dstenc -= 8;
2719     }
2720     emit_opcode(cbuf, 0xB8 | dstenc);
2721     $$$emit32$src$$constant;
2722   %}
2723 
2724   enc_class load_immL32(rRegL dst, immL32 src)
2725   %{
2726     int dstenc = $dst$$reg;
2727     if (dstenc < 8) {
2728       emit_opcode(cbuf, Assembler::REX_W);
2729     } else {
2730       emit_opcode(cbuf, Assembler::REX_WB);
2731       dstenc -= 8;
2732     }
2733     emit_opcode(cbuf, 0xC7);
2734     emit_rm(cbuf, 0x03, 0x00, dstenc);
2735     $$$emit32$src$$constant;
2736   %}
2737 
2738   enc_class load_immP31(rRegP dst, immP32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immP(rRegP dst, immP src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xB8 | dstenc);
2760     // This next line should be generated from ADLC
2761     if ($src->constant_is_oop()) {
2762       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2763     } else {
2764       emit_d64(cbuf, $src$$constant);
2765     }
2766   %}
2767 
2768   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2769   enc_class enc_copy(rRegI dst, rRegI src)
2770   %{
2771     encode_copy(cbuf, $dst$$reg, $src$$reg);
2772   %}
2773 
2774   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2775   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2776     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2777   %}
2778 
2779   enc_class enc_copy_always(rRegI dst, rRegI src)
2780   %{
2781     int srcenc = $src$$reg;
2782     int dstenc = $dst$$reg;
2783 
2784     if (dstenc < 8) {
2785       if (srcenc >= 8) {
2786         emit_opcode(cbuf, Assembler::REX_B);
2787         srcenc -= 8;
2788       }
2789     } else {
2790       if (srcenc < 8) {
2791         emit_opcode(cbuf, Assembler::REX_R);
2792       } else {
2793         emit_opcode(cbuf, Assembler::REX_RB);
2794         srcenc -= 8;
2795       }
2796       dstenc -= 8;
2797     }
2798 
2799     emit_opcode(cbuf, 0x8B);
2800     emit_rm(cbuf, 0x3, dstenc, srcenc);
2801   %}
2802 
2803   enc_class enc_copy_wide(rRegL dst, rRegL src)
2804   %{
2805     int srcenc = $src$$reg;
2806     int dstenc = $dst$$reg;
2807 
2808     if (dstenc != srcenc) {
2809       if (dstenc < 8) {
2810         if (srcenc < 8) {
2811           emit_opcode(cbuf, Assembler::REX_W);
2812         } else {
2813           emit_opcode(cbuf, Assembler::REX_WB);
2814           srcenc -= 8;
2815         }
2816       } else {
2817         if (srcenc < 8) {
2818           emit_opcode(cbuf, Assembler::REX_WR);
2819         } else {
2820           emit_opcode(cbuf, Assembler::REX_WRB);
2821           srcenc -= 8;
2822         }
2823         dstenc -= 8;
2824       }
2825       emit_opcode(cbuf, 0x8B);
2826       emit_rm(cbuf, 0x3, dstenc, srcenc);
2827     }
2828   %}
2829 
2830   enc_class Con32(immI src)
2831   %{
2832     // Output immediate
2833     $$$emit32$src$$constant;
2834   %}
2835 
2836   enc_class Con64(immL src)
2837   %{
2838     // Output immediate
2839     emit_d64($src$$constant);
2840   %}
2841 
2842   enc_class Con32F_as_bits(immF src)
2843   %{
2844     // Output Float immediate bits
2845     jfloat jf = $src$$constant;
2846     jint jf_as_bits = jint_cast(jf);
2847     emit_d32(cbuf, jf_as_bits);
2848   %}
2849 
2850   enc_class Con16(immI src)
2851   %{
2852     // Output immediate
2853     $$$emit16$src$$constant;
2854   %}
2855 
2856   // How is this different from Con32??? XXX
2857   enc_class Con_d32(immI src)
2858   %{
2859     emit_d32(cbuf,$src$$constant);
2860   %}
2861 
2862   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2863     // Output immediate memory reference
2864     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2865     emit_d32(cbuf, 0x00);
2866   %}
2867 
2868   enc_class lock_prefix()
2869   %{
2870     if (os::is_MP()) {
2871       emit_opcode(cbuf, 0xF0); // lock
2872     }
2873   %}
2874 
2875   enc_class REX_mem(memory mem)
2876   %{
2877     if ($mem$$base >= 8) {
2878       if ($mem$$index < 8) {
2879         emit_opcode(cbuf, Assembler::REX_B);
2880       } else {
2881         emit_opcode(cbuf, Assembler::REX_XB);
2882       }
2883     } else {
2884       if ($mem$$index >= 8) {
2885         emit_opcode(cbuf, Assembler::REX_X);
2886       }
2887     }
2888   %}
2889 
2890   enc_class REX_mem_wide(memory mem)
2891   %{
2892     if ($mem$$base >= 8) {
2893       if ($mem$$index < 8) {
2894         emit_opcode(cbuf, Assembler::REX_WB);
2895       } else {
2896         emit_opcode(cbuf, Assembler::REX_WXB);
2897       }
2898     } else {
2899       if ($mem$$index < 8) {
2900         emit_opcode(cbuf, Assembler::REX_W);
2901       } else {
2902         emit_opcode(cbuf, Assembler::REX_WX);
2903       }
2904     }
2905   %}
2906 
2907   // for byte regs
2908   enc_class REX_breg(rRegI reg)
2909   %{
2910     if ($reg$$reg >= 4) {
2911       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2912     }
2913   %}
2914 
2915   // for byte regs
2916   enc_class REX_reg_breg(rRegI dst, rRegI src)
2917   %{
2918     if ($dst$$reg < 8) {
2919       if ($src$$reg >= 4) {
2920         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2921       }
2922     } else {
2923       if ($src$$reg < 8) {
2924         emit_opcode(cbuf, Assembler::REX_R);
2925       } else {
2926         emit_opcode(cbuf, Assembler::REX_RB);
2927       }
2928     }
2929   %}
2930 
2931   // for byte regs
2932   enc_class REX_breg_mem(rRegI reg, memory mem)
2933   %{
2934     if ($reg$$reg < 8) {
2935       if ($mem$$base < 8) {
2936         if ($mem$$index >= 8) {
2937           emit_opcode(cbuf, Assembler::REX_X);
2938         } else if ($reg$$reg >= 4) {
2939           emit_opcode(cbuf, Assembler::REX);
2940         }
2941       } else {
2942         if ($mem$$index < 8) {
2943           emit_opcode(cbuf, Assembler::REX_B);
2944         } else {
2945           emit_opcode(cbuf, Assembler::REX_XB);
2946         }
2947       }
2948     } else {
2949       if ($mem$$base < 8) {
2950         if ($mem$$index < 8) {
2951           emit_opcode(cbuf, Assembler::REX_R);
2952         } else {
2953           emit_opcode(cbuf, Assembler::REX_RX);
2954         }
2955       } else {
2956         if ($mem$$index < 8) {
2957           emit_opcode(cbuf, Assembler::REX_RB);
2958         } else {
2959           emit_opcode(cbuf, Assembler::REX_RXB);
2960         }
2961       }
2962     }
2963   %}
2964 
2965   enc_class REX_reg(rRegI reg)
2966   %{
2967     if ($reg$$reg >= 8) {
2968       emit_opcode(cbuf, Assembler::REX_B);
2969     }
2970   %}
2971 
2972   enc_class REX_reg_wide(rRegI reg)
2973   %{
2974     if ($reg$$reg < 8) {
2975       emit_opcode(cbuf, Assembler::REX_W);
2976     } else {
2977       emit_opcode(cbuf, Assembler::REX_WB);
2978     }
2979   %}
2980 
2981   enc_class REX_reg_reg(rRegI dst, rRegI src)
2982   %{
2983     if ($dst$$reg < 8) {
2984       if ($src$$reg >= 8) {
2985         emit_opcode(cbuf, Assembler::REX_B);
2986       }
2987     } else {
2988       if ($src$$reg < 8) {
2989         emit_opcode(cbuf, Assembler::REX_R);
2990       } else {
2991         emit_opcode(cbuf, Assembler::REX_RB);
2992       }
2993     }
2994   %}
2995 
2996   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2997   %{
2998     if ($dst$$reg < 8) {
2999       if ($src$$reg < 8) {
3000         emit_opcode(cbuf, Assembler::REX_W);
3001       } else {
3002         emit_opcode(cbuf, Assembler::REX_WB);
3003       }
3004     } else {
3005       if ($src$$reg < 8) {
3006         emit_opcode(cbuf, Assembler::REX_WR);
3007       } else {
3008         emit_opcode(cbuf, Assembler::REX_WRB);
3009       }
3010     }
3011   %}
3012 
3013   enc_class REX_reg_mem(rRegI reg, memory mem)
3014   %{
3015     if ($reg$$reg < 8) {
3016       if ($mem$$base < 8) {
3017         if ($mem$$index >= 8) {
3018           emit_opcode(cbuf, Assembler::REX_X);
3019         }
3020       } else {
3021         if ($mem$$index < 8) {
3022           emit_opcode(cbuf, Assembler::REX_B);
3023         } else {
3024           emit_opcode(cbuf, Assembler::REX_XB);
3025         }
3026       }
3027     } else {
3028       if ($mem$$base < 8) {
3029         if ($mem$$index < 8) {
3030           emit_opcode(cbuf, Assembler::REX_R);
3031         } else {
3032           emit_opcode(cbuf, Assembler::REX_RX);
3033         }
3034       } else {
3035         if ($mem$$index < 8) {
3036           emit_opcode(cbuf, Assembler::REX_RB);
3037         } else {
3038           emit_opcode(cbuf, Assembler::REX_RXB);
3039         }
3040       }
3041     }
3042   %}
3043 
3044   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3045   %{
3046     if ($reg$$reg < 8) {
3047       if ($mem$$base < 8) {
3048         if ($mem$$index < 8) {
3049           emit_opcode(cbuf, Assembler::REX_W);
3050         } else {
3051           emit_opcode(cbuf, Assembler::REX_WX);
3052         }
3053       } else {
3054         if ($mem$$index < 8) {
3055           emit_opcode(cbuf, Assembler::REX_WB);
3056         } else {
3057           emit_opcode(cbuf, Assembler::REX_WXB);
3058         }
3059       }
3060     } else {
3061       if ($mem$$base < 8) {
3062         if ($mem$$index < 8) {
3063           emit_opcode(cbuf, Assembler::REX_WR);
3064         } else {
3065           emit_opcode(cbuf, Assembler::REX_WRX);
3066         }
3067       } else {
3068         if ($mem$$index < 8) {
3069           emit_opcode(cbuf, Assembler::REX_WRB);
3070         } else {
3071           emit_opcode(cbuf, Assembler::REX_WRXB);
3072         }
3073       }
3074     }
3075   %}
3076 
3077   enc_class reg_mem(rRegI ereg, memory mem)
3078   %{
3079     // High registers handle in encode_RegMem
3080     int reg = $ereg$$reg;
3081     int base = $mem$$base;
3082     int index = $mem$$index;
3083     int scale = $mem$$scale;
3084     int disp = $mem$$disp;
3085     bool disp_is_oop = $mem->disp_is_oop();
3086 
3087     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3088   %}
3089 
3090   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3091   %{
3092     int rm_byte_opcode = $rm_opcode$$constant;
3093 
3094     // High registers handle in encode_RegMem
3095     int base = $mem$$base;
3096     int index = $mem$$index;
3097     int scale = $mem$$scale;
3098     int displace = $mem$$disp;
3099 
3100     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3101                                             // working with static
3102                                             // globals
3103     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3104                   disp_is_oop);
3105   %}
3106 
3107   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3108   %{
3109     int reg_encoding = $dst$$reg;
3110     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3111     int index        = 0x04;            // 0x04 indicates no index
3112     int scale        = 0x00;            // 0x00 indicates no scale
3113     int displace     = $src1$$constant; // 0x00 indicates no displacement
3114     bool disp_is_oop = false;
3115     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3116                   disp_is_oop);
3117   %}
3118 
3119   enc_class neg_reg(rRegI dst)
3120   %{
3121     int dstenc = $dst$$reg;
3122     if (dstenc >= 8) {
3123       emit_opcode(cbuf, Assembler::REX_B);
3124       dstenc -= 8;
3125     }
3126     // NEG $dst
3127     emit_opcode(cbuf, 0xF7);
3128     emit_rm(cbuf, 0x3, 0x03, dstenc);
3129   %}
3130 
3131   enc_class neg_reg_wide(rRegI dst)
3132   %{
3133     int dstenc = $dst$$reg;
3134     if (dstenc < 8) {
3135       emit_opcode(cbuf, Assembler::REX_W);
3136     } else {
3137       emit_opcode(cbuf, Assembler::REX_WB);
3138       dstenc -= 8;
3139     }
3140     // NEG $dst
3141     emit_opcode(cbuf, 0xF7);
3142     emit_rm(cbuf, 0x3, 0x03, dstenc);
3143   %}
3144 
3145   enc_class setLT_reg(rRegI dst)
3146   %{
3147     int dstenc = $dst$$reg;
3148     if (dstenc >= 8) {
3149       emit_opcode(cbuf, Assembler::REX_B);
3150       dstenc -= 8;
3151     } else if (dstenc >= 4) {
3152       emit_opcode(cbuf, Assembler::REX);
3153     }
3154     // SETLT $dst
3155     emit_opcode(cbuf, 0x0F);
3156     emit_opcode(cbuf, 0x9C);
3157     emit_rm(cbuf, 0x3, 0x0, dstenc);
3158   %}
3159 
3160   enc_class setNZ_reg(rRegI dst)
3161   %{
3162     int dstenc = $dst$$reg;
3163     if (dstenc >= 8) {
3164       emit_opcode(cbuf, Assembler::REX_B);
3165       dstenc -= 8;
3166     } else if (dstenc >= 4) {
3167       emit_opcode(cbuf, Assembler::REX);
3168     }
3169     // SETNZ $dst
3170     emit_opcode(cbuf, 0x0F);
3171     emit_opcode(cbuf, 0x95);
3172     emit_rm(cbuf, 0x3, 0x0, dstenc);
3173   %}
3174 
3175 
3176   // Compare the lonogs and set -1, 0, or 1 into dst
3177   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3178   %{
3179     int src1enc = $src1$$reg;
3180     int src2enc = $src2$$reg;
3181     int dstenc = $dst$$reg;
3182 
3183     // cmpq $src1, $src2
3184     if (src1enc < 8) {
3185       if (src2enc < 8) {
3186         emit_opcode(cbuf, Assembler::REX_W);
3187       } else {
3188         emit_opcode(cbuf, Assembler::REX_WB);
3189       }
3190     } else {
3191       if (src2enc < 8) {
3192         emit_opcode(cbuf, Assembler::REX_WR);
3193       } else {
3194         emit_opcode(cbuf, Assembler::REX_WRB);
3195       }
3196     }
3197     emit_opcode(cbuf, 0x3B);
3198     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3199 
3200     // movl $dst, -1
3201     if (dstenc >= 8) {
3202       emit_opcode(cbuf, Assembler::REX_B);
3203     }
3204     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3205     emit_d32(cbuf, -1);
3206 
3207     // jl,s done
3208     emit_opcode(cbuf, 0x7C);
3209     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3210 
3211     // setne $dst
3212     if (dstenc >= 4) {
3213       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3214     }
3215     emit_opcode(cbuf, 0x0F);
3216     emit_opcode(cbuf, 0x95);
3217     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3218 
3219     // movzbl $dst, $dst
3220     if (dstenc >= 4) {
3221       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3222     }
3223     emit_opcode(cbuf, 0x0F);
3224     emit_opcode(cbuf, 0xB6);
3225     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3226   %}
3227 
3228   enc_class Push_ResultXD(regD dst) %{
3229     int dstenc = $dst$$reg;
3230 
3231     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3232 
3233     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3234     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3235     if (dstenc >= 8) {
3236       emit_opcode(cbuf, Assembler::REX_R);
3237     }
3238     emit_opcode  (cbuf, 0x0F );
3239     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3240     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3241 
3242     // add rsp,8
3243     emit_opcode(cbuf, Assembler::REX_W);
3244     emit_opcode(cbuf,0x83);
3245     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3246     emit_d8(cbuf,0x08);
3247   %}
3248 
3249   enc_class Push_SrcXD(regD src) %{
3250     int srcenc = $src$$reg;
3251 
3252     // subq rsp,#8
3253     emit_opcode(cbuf, Assembler::REX_W);
3254     emit_opcode(cbuf, 0x83);
3255     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3256     emit_d8(cbuf, 0x8);
3257 
3258     // movsd [rsp],src
3259     emit_opcode(cbuf, 0xF2);
3260     if (srcenc >= 8) {
3261       emit_opcode(cbuf, Assembler::REX_R);
3262     }
3263     emit_opcode(cbuf, 0x0F);
3264     emit_opcode(cbuf, 0x11);
3265     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3266 
3267     // fldd [rsp]
3268     emit_opcode(cbuf, 0x66);
3269     emit_opcode(cbuf, 0xDD);
3270     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3271   %}
3272 
3273 
3274   enc_class movq_ld(regD dst, memory mem) %{
3275     MacroAssembler _masm(&cbuf);
3276     __ movq($dst$$XMMRegister, $mem$$Address);
3277   %}
3278 
3279   enc_class movq_st(memory mem, regD src) %{
3280     MacroAssembler _masm(&cbuf);
3281     __ movq($mem$$Address, $src$$XMMRegister);
3282   %}
3283 
3284   enc_class pshufd_8x8(regF dst, regF src) %{
3285     MacroAssembler _masm(&cbuf);
3286 
3287     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3288     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3289     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3290   %}
3291 
3292   enc_class pshufd_4x16(regF dst, regF src) %{
3293     MacroAssembler _masm(&cbuf);
3294 
3295     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3296   %}
3297 
3298   enc_class pshufd(regD dst, regD src, int mode) %{
3299     MacroAssembler _masm(&cbuf);
3300 
3301     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3302   %}
3303 
3304   enc_class pxor(regD dst, regD src) %{
3305     MacroAssembler _masm(&cbuf);
3306 
3307     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3308   %}
3309 
3310   enc_class mov_i2x(regD dst, rRegI src) %{
3311     MacroAssembler _masm(&cbuf);
3312 
3313     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3314   %}
3315 
3316   // obj: object to lock
3317   // box: box address (header location) -- killed
3318   // tmp: rax -- killed
3319   // scr: rbx -- killed
3320   //
3321   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3322   // from i486.ad.  See that file for comments.
3323   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3324   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3325 
3326 
3327   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3328   %{
3329     Register objReg = as_Register((int)$obj$$reg);
3330     Register boxReg = as_Register((int)$box$$reg);
3331     Register tmpReg = as_Register($tmp$$reg);
3332     Register scrReg = as_Register($scr$$reg);
3333     MacroAssembler masm(&cbuf);
3334 
3335     // Verify uniqueness of register assignments -- necessary but not sufficient
3336     assert (objReg != boxReg && objReg != tmpReg &&
3337             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3338 
3339     if (_counters != NULL) {
3340       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3341     }
3342     if (EmitSync & 1) {
3343         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3344         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3345         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3346     } else
3347     if (EmitSync & 2) {
3348         Label DONE_LABEL;
3349         if (UseBiasedLocking) {
3350            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3351           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3352         }
3353         // QQQ was movl...
3354         masm.movptr(tmpReg, 0x1);
3355         masm.orptr(tmpReg, Address(objReg, 0));
3356         masm.movptr(Address(boxReg, 0), tmpReg);
3357         if (os::is_MP()) {
3358           masm.lock();
3359         }
3360         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3361         masm.jcc(Assembler::equal, DONE_LABEL);
3362 
3363         // Recursive locking
3364         masm.subptr(tmpReg, rsp);
3365         masm.andptr(tmpReg, 7 - os::vm_page_size());
3366         masm.movptr(Address(boxReg, 0), tmpReg);
3367 
3368         masm.bind(DONE_LABEL);
3369         masm.nop(); // avoid branch to branch
3370     } else {
3371         Label DONE_LABEL, IsInflated, Egress;
3372 
3373         masm.movptr(tmpReg, Address(objReg, 0)) ;
3374         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3375         masm.jcc   (Assembler::notZero, IsInflated) ;
3376 
3377         // it's stack-locked, biased or neutral
3378         // TODO: optimize markword triage order to reduce the number of
3379         // conditional branches in the most common cases.
3380         // Beware -- there's a subtle invariant that fetch of the markword
3381         // at [FETCH], below, will never observe a biased encoding (*101b).
3382         // If this invariant is not held we'll suffer exclusion (safety) failure.
3383 
3384         if (UseBiasedLocking && !UseOptoBiasInlining) {
3385           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3386           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3387         }
3388 
3389         // was q will it destroy high?
3390         masm.orl   (tmpReg, 1) ;
3391         masm.movptr(Address(boxReg, 0), tmpReg) ;
3392         if (os::is_MP()) { masm.lock(); }
3393         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3394         if (_counters != NULL) {
3395            masm.cond_inc32(Assembler::equal,
3396                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3397         }
3398         masm.jcc   (Assembler::equal, DONE_LABEL);
3399 
3400         // Recursive locking
3401         masm.subptr(tmpReg, rsp);
3402         masm.andptr(tmpReg, 7 - os::vm_page_size());
3403         masm.movptr(Address(boxReg, 0), tmpReg);
3404         if (_counters != NULL) {
3405            masm.cond_inc32(Assembler::equal,
3406                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3407         }
3408         masm.jmp   (DONE_LABEL) ;
3409 
3410         masm.bind  (IsInflated) ;
3411         // It's inflated
3412 
3413         // TODO: someday avoid the ST-before-CAS penalty by
3414         // relocating (deferring) the following ST.
3415         // We should also think about trying a CAS without having
3416         // fetched _owner.  If the CAS is successful we may
3417         // avoid an RTO->RTS upgrade on the $line.
3418         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3419         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3420 
3421         masm.mov    (boxReg, tmpReg) ;
3422         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3423         masm.testptr(tmpReg, tmpReg) ;
3424         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3425 
3426         // It's inflated and appears unlocked
3427         if (os::is_MP()) { masm.lock(); }
3428         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3429         // Intentional fall-through into DONE_LABEL ...
3430 
3431         masm.bind  (DONE_LABEL) ;
3432         masm.nop   () ;                 // avoid jmp to jmp
3433     }
3434   %}
3435 
3436   // obj: object to unlock
3437   // box: box address (displaced header location), killed
3438   // RBX: killed tmp; cannot be obj nor box
3439   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3440   %{
3441 
3442     Register objReg = as_Register($obj$$reg);
3443     Register boxReg = as_Register($box$$reg);
3444     Register tmpReg = as_Register($tmp$$reg);
3445     MacroAssembler masm(&cbuf);
3446 
3447     if (EmitSync & 4) {
3448        masm.cmpptr(rsp, 0) ;
3449     } else
3450     if (EmitSync & 8) {
3451        Label DONE_LABEL;
3452        if (UseBiasedLocking) {
3453          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3454        }
3455 
3456        // Check whether the displaced header is 0
3457        //(=> recursive unlock)
3458        masm.movptr(tmpReg, Address(boxReg, 0));
3459        masm.testptr(tmpReg, tmpReg);
3460        masm.jcc(Assembler::zero, DONE_LABEL);
3461 
3462        // If not recursive lock, reset the header to displaced header
3463        if (os::is_MP()) {
3464          masm.lock();
3465        }
3466        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3467        masm.bind(DONE_LABEL);
3468        masm.nop(); // avoid branch to branch
3469     } else {
3470        Label DONE_LABEL, Stacked, CheckSucc ;
3471 
3472        if (UseBiasedLocking && !UseOptoBiasInlining) {
3473          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3474        }
3475 
3476        masm.movptr(tmpReg, Address(objReg, 0)) ;
3477        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3478        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3479        masm.testl (tmpReg, 0x02) ;
3480        masm.jcc   (Assembler::zero, Stacked) ;
3481 
3482        // It's inflated
3483        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3484        masm.xorptr(boxReg, r15_thread) ;
3485        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3486        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3487        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3488        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3489        masm.jcc   (Assembler::notZero, CheckSucc) ;
3490        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3491        masm.jmp   (DONE_LABEL) ;
3492 
3493        if ((EmitSync & 65536) == 0) {
3494          Label LSuccess, LGoSlowPath ;
3495          masm.bind  (CheckSucc) ;
3496          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3497          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3498 
3499          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3500          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3501          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3502          // are all faster when the write buffer is populated.
3503          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3504          if (os::is_MP()) {
3505             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3506          }
3507          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3508          masm.jcc   (Assembler::notZero, LSuccess) ;
3509 
3510          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3511          if (os::is_MP()) { masm.lock(); }
3512          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3513          masm.jcc   (Assembler::notEqual, LSuccess) ;
3514          // Intentional fall-through into slow-path
3515 
3516          masm.bind  (LGoSlowPath) ;
3517          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3518          masm.jmp   (DONE_LABEL) ;
3519 
3520          masm.bind  (LSuccess) ;
3521          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3522          masm.jmp   (DONE_LABEL) ;
3523        }
3524 
3525        masm.bind  (Stacked) ;
3526        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3527        if (os::is_MP()) { masm.lock(); }
3528        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3529 
3530        if (EmitSync & 65536) {
3531           masm.bind (CheckSucc) ;
3532        }
3533        masm.bind(DONE_LABEL);
3534        if (EmitSync & 32768) {
3535           masm.nop();                      // avoid branch to branch
3536        }
3537     }
3538   %}
3539 
3540 
3541   enc_class enc_rethrow()
3542   %{
3543     cbuf.set_insts_mark();
3544     emit_opcode(cbuf, 0xE9); // jmp entry
3545     emit_d32_reloc(cbuf,
3546                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3547                    runtime_call_Relocation::spec(),
3548                    RELOC_DISP32);
3549   %}
3550 
3551   enc_class absF_encoding(regF dst)
3552   %{
3553     int dstenc = $dst$$reg;
3554     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3555 
3556     cbuf.set_insts_mark();
3557     if (dstenc >= 8) {
3558       emit_opcode(cbuf, Assembler::REX_R);
3559       dstenc -= 8;
3560     }
3561     // XXX reg_mem doesn't support RIP-relative addressing yet
3562     emit_opcode(cbuf, 0x0F);
3563     emit_opcode(cbuf, 0x54);
3564     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3565     emit_d32_reloc(cbuf, signmask_address);
3566   %}
3567 
3568   enc_class absD_encoding(regD dst)
3569   %{
3570     int dstenc = $dst$$reg;
3571     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3572 
3573     cbuf.set_insts_mark();
3574     emit_opcode(cbuf, 0x66);
3575     if (dstenc >= 8) {
3576       emit_opcode(cbuf, Assembler::REX_R);
3577       dstenc -= 8;
3578     }
3579     // XXX reg_mem doesn't support RIP-relative addressing yet
3580     emit_opcode(cbuf, 0x0F);
3581     emit_opcode(cbuf, 0x54);
3582     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3583     emit_d32_reloc(cbuf, signmask_address);
3584   %}
3585 
3586   enc_class negF_encoding(regF dst)
3587   %{
3588     int dstenc = $dst$$reg;
3589     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3590 
3591     cbuf.set_insts_mark();
3592     if (dstenc >= 8) {
3593       emit_opcode(cbuf, Assembler::REX_R);
3594       dstenc -= 8;
3595     }
3596     // XXX reg_mem doesn't support RIP-relative addressing yet
3597     emit_opcode(cbuf, 0x0F);
3598     emit_opcode(cbuf, 0x57);
3599     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3600     emit_d32_reloc(cbuf, signflip_address);
3601   %}
3602 
3603   enc_class negD_encoding(regD dst)
3604   %{
3605     int dstenc = $dst$$reg;
3606     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3607 
3608     cbuf.set_insts_mark();
3609     emit_opcode(cbuf, 0x66);
3610     if (dstenc >= 8) {
3611       emit_opcode(cbuf, Assembler::REX_R);
3612       dstenc -= 8;
3613     }
3614     // XXX reg_mem doesn't support RIP-relative addressing yet
3615     emit_opcode(cbuf, 0x0F);
3616     emit_opcode(cbuf, 0x57);
3617     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3618     emit_d32_reloc(cbuf, signflip_address);
3619   %}
3620 
3621   enc_class f2i_fixup(rRegI dst, regF src)
3622   %{
3623     int dstenc = $dst$$reg;
3624     int srcenc = $src$$reg;
3625 
3626     // cmpl $dst, #0x80000000
3627     if (dstenc >= 8) {
3628       emit_opcode(cbuf, Assembler::REX_B);
3629     }
3630     emit_opcode(cbuf, 0x81);
3631     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3632     emit_d32(cbuf, 0x80000000);
3633 
3634     // jne,s done
3635     emit_opcode(cbuf, 0x75);
3636     if (srcenc < 8 && dstenc < 8) {
3637       emit_d8(cbuf, 0xF);
3638     } else if (srcenc >= 8 && dstenc >= 8) {
3639       emit_d8(cbuf, 0x11);
3640     } else {
3641       emit_d8(cbuf, 0x10);
3642     }
3643 
3644     // subq rsp, #8
3645     emit_opcode(cbuf, Assembler::REX_W);
3646     emit_opcode(cbuf, 0x83);
3647     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3648     emit_d8(cbuf, 8);
3649 
3650     // movss [rsp], $src
3651     emit_opcode(cbuf, 0xF3);
3652     if (srcenc >= 8) {
3653       emit_opcode(cbuf, Assembler::REX_R);
3654     }
3655     emit_opcode(cbuf, 0x0F);
3656     emit_opcode(cbuf, 0x11);
3657     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3658 
3659     // call f2i_fixup
3660     cbuf.set_insts_mark();
3661     emit_opcode(cbuf, 0xE8);
3662     emit_d32_reloc(cbuf,
3663                    (int)
3664                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3665                    runtime_call_Relocation::spec(),
3666                    RELOC_DISP32);
3667 
3668     // popq $dst
3669     if (dstenc >= 8) {
3670       emit_opcode(cbuf, Assembler::REX_B);
3671     }
3672     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3673 
3674     // done:
3675   %}
3676 
3677   enc_class f2l_fixup(rRegL dst, regF src)
3678   %{
3679     int dstenc = $dst$$reg;
3680     int srcenc = $src$$reg;
3681     address const_address = (address) StubRoutines::x86::double_sign_flip();
3682 
3683     // cmpq $dst, [0x8000000000000000]
3684     cbuf.set_insts_mark();
3685     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3686     emit_opcode(cbuf, 0x39);
3687     // XXX reg_mem doesn't support RIP-relative addressing yet
3688     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3689     emit_d32_reloc(cbuf, const_address);
3690 
3691 
3692     // jne,s done
3693     emit_opcode(cbuf, 0x75);
3694     if (srcenc < 8 && dstenc < 8) {
3695       emit_d8(cbuf, 0xF);
3696     } else if (srcenc >= 8 && dstenc >= 8) {
3697       emit_d8(cbuf, 0x11);
3698     } else {
3699       emit_d8(cbuf, 0x10);
3700     }
3701 
3702     // subq rsp, #8
3703     emit_opcode(cbuf, Assembler::REX_W);
3704     emit_opcode(cbuf, 0x83);
3705     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3706     emit_d8(cbuf, 8);
3707 
3708     // movss [rsp], $src
3709     emit_opcode(cbuf, 0xF3);
3710     if (srcenc >= 8) {
3711       emit_opcode(cbuf, Assembler::REX_R);
3712     }
3713     emit_opcode(cbuf, 0x0F);
3714     emit_opcode(cbuf, 0x11);
3715     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3716 
3717     // call f2l_fixup
3718     cbuf.set_insts_mark();
3719     emit_opcode(cbuf, 0xE8);
3720     emit_d32_reloc(cbuf,
3721                    (int)
3722                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3723                    runtime_call_Relocation::spec(),
3724                    RELOC_DISP32);
3725 
3726     // popq $dst
3727     if (dstenc >= 8) {
3728       emit_opcode(cbuf, Assembler::REX_B);
3729     }
3730     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3731 
3732     // done:
3733   %}
3734 
3735   enc_class d2i_fixup(rRegI dst, regD src)
3736   %{
3737     int dstenc = $dst$$reg;
3738     int srcenc = $src$$reg;
3739 
3740     // cmpl $dst, #0x80000000
3741     if (dstenc >= 8) {
3742       emit_opcode(cbuf, Assembler::REX_B);
3743     }
3744     emit_opcode(cbuf, 0x81);
3745     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3746     emit_d32(cbuf, 0x80000000);
3747 
3748     // jne,s done
3749     emit_opcode(cbuf, 0x75);
3750     if (srcenc < 8 && dstenc < 8) {
3751       emit_d8(cbuf, 0xF);
3752     } else if (srcenc >= 8 && dstenc >= 8) {
3753       emit_d8(cbuf, 0x11);
3754     } else {
3755       emit_d8(cbuf, 0x10);
3756     }
3757 
3758     // subq rsp, #8
3759     emit_opcode(cbuf, Assembler::REX_W);
3760     emit_opcode(cbuf, 0x83);
3761     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3762     emit_d8(cbuf, 8);
3763 
3764     // movsd [rsp], $src
3765     emit_opcode(cbuf, 0xF2);
3766     if (srcenc >= 8) {
3767       emit_opcode(cbuf, Assembler::REX_R);
3768     }
3769     emit_opcode(cbuf, 0x0F);
3770     emit_opcode(cbuf, 0x11);
3771     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3772 
3773     // call d2i_fixup
3774     cbuf.set_insts_mark();
3775     emit_opcode(cbuf, 0xE8);
3776     emit_d32_reloc(cbuf,
3777                    (int)
3778                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3779                    runtime_call_Relocation::spec(),
3780                    RELOC_DISP32);
3781 
3782     // popq $dst
3783     if (dstenc >= 8) {
3784       emit_opcode(cbuf, Assembler::REX_B);
3785     }
3786     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3787 
3788     // done:
3789   %}
3790 
3791   enc_class d2l_fixup(rRegL dst, regD src)
3792   %{
3793     int dstenc = $dst$$reg;
3794     int srcenc = $src$$reg;
3795     address const_address = (address) StubRoutines::x86::double_sign_flip();
3796 
3797     // cmpq $dst, [0x8000000000000000]
3798     cbuf.set_insts_mark();
3799     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3800     emit_opcode(cbuf, 0x39);
3801     // XXX reg_mem doesn't support RIP-relative addressing yet
3802     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3803     emit_d32_reloc(cbuf, const_address);
3804 
3805 
3806     // jne,s done
3807     emit_opcode(cbuf, 0x75);
3808     if (srcenc < 8 && dstenc < 8) {
3809       emit_d8(cbuf, 0xF);
3810     } else if (srcenc >= 8 && dstenc >= 8) {
3811       emit_d8(cbuf, 0x11);
3812     } else {
3813       emit_d8(cbuf, 0x10);
3814     }
3815 
3816     // subq rsp, #8
3817     emit_opcode(cbuf, Assembler::REX_W);
3818     emit_opcode(cbuf, 0x83);
3819     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3820     emit_d8(cbuf, 8);
3821 
3822     // movsd [rsp], $src
3823     emit_opcode(cbuf, 0xF2);
3824     if (srcenc >= 8) {
3825       emit_opcode(cbuf, Assembler::REX_R);
3826     }
3827     emit_opcode(cbuf, 0x0F);
3828     emit_opcode(cbuf, 0x11);
3829     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3830 
3831     // call d2l_fixup
3832     cbuf.set_insts_mark();
3833     emit_opcode(cbuf, 0xE8);
3834     emit_d32_reloc(cbuf,
3835                    (int)
3836                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3837                    runtime_call_Relocation::spec(),
3838                    RELOC_DISP32);
3839 
3840     // popq $dst
3841     if (dstenc >= 8) {
3842       emit_opcode(cbuf, Assembler::REX_B);
3843     }
3844     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3845 
3846     // done:
3847   %}
3848 %}
3849 
3850 
3851 
3852 //----------FRAME--------------------------------------------------------------
3853 // Definition of frame structure and management information.
3854 //
3855 //  S T A C K   L A Y O U T    Allocators stack-slot number
3856 //                             |   (to get allocators register number
3857 //  G  Owned by    |        |  v    add OptoReg::stack0())
3858 //  r   CALLER     |        |
3859 //  o     |        +--------+      pad to even-align allocators stack-slot
3860 //  w     V        |  pad0  |        numbers; owned by CALLER
3861 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3862 //  h     ^        |   in   |  5
3863 //        |        |  args  |  4   Holes in incoming args owned by SELF
3864 //  |     |        |        |  3
3865 //  |     |        +--------+
3866 //  V     |        | old out|      Empty on Intel, window on Sparc
3867 //        |    old |preserve|      Must be even aligned.
3868 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3869 //        |        |   in   |  3   area for Intel ret address
3870 //     Owned by    |preserve|      Empty on Sparc.
3871 //       SELF      +--------+
3872 //        |        |  pad2  |  2   pad to align old SP
3873 //        |        +--------+  1
3874 //        |        | locks  |  0
3875 //        |        +--------+----> OptoReg::stack0(), even aligned
3876 //        |        |  pad1  | 11   pad to align new SP
3877 //        |        +--------+
3878 //        |        |        | 10
3879 //        |        | spills |  9   spills
3880 //        V        |        |  8   (pad0 slot for callee)
3881 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3882 //        ^        |  out   |  7
3883 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3884 //     Owned by    +--------+
3885 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3886 //        |    new |preserve|      Must be even-aligned.
3887 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3888 //        |        |        |
3889 //
3890 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3891 //         known from SELF's arguments and the Java calling convention.
3892 //         Region 6-7 is determined per call site.
3893 // Note 2: If the calling convention leaves holes in the incoming argument
3894 //         area, those holes are owned by SELF.  Holes in the outgoing area
3895 //         are owned by the CALLEE.  Holes should not be nessecary in the
3896 //         incoming area, as the Java calling convention is completely under
3897 //         the control of the AD file.  Doubles can be sorted and packed to
3898 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3899 //         varargs C calling conventions.
3900 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3901 //         even aligned with pad0 as needed.
3902 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3903 //         region 6-11 is even aligned; it may be padded out more so that
3904 //         the region from SP to FP meets the minimum stack alignment.
3905 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3906 //         alignment.  Region 11, pad1, may be dynamically extended so that
3907 //         SP meets the minimum alignment.
3908 
3909 frame
3910 %{
3911   // What direction does stack grow in (assumed to be same for C & Java)
3912   stack_direction(TOWARDS_LOW);
3913 
3914   // These three registers define part of the calling convention
3915   // between compiled code and the interpreter.
3916   inline_cache_reg(RAX);                // Inline Cache Register
3917   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3918                                         // calling interpreter
3919 
3920   // Optional: name the operand used by cisc-spilling to access
3921   // [stack_pointer + offset]
3922   cisc_spilling_operand_name(indOffset32);
3923 
3924   // Number of stack slots consumed by locking an object
3925   sync_stack_slots(2);
3926 
3927   // Compiled code's Frame Pointer
3928   frame_pointer(RSP);
3929 
3930   // Interpreter stores its frame pointer in a register which is
3931   // stored to the stack by I2CAdaptors.
3932   // I2CAdaptors convert from interpreted java to compiled java.
3933   interpreter_frame_pointer(RBP);
3934 
3935   // Stack alignment requirement
3936   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3937 
3938   // Number of stack slots between incoming argument block and the start of
3939   // a new frame.  The PROLOG must add this many slots to the stack.  The
3940   // EPILOG must remove this many slots.  amd64 needs two slots for
3941   // return address.
3942   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3943 
3944   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3945   // for calls to C.  Supports the var-args backing area for register parms.
3946   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3947 
3948   // The after-PROLOG location of the return address.  Location of
3949   // return address specifies a type (REG or STACK) and a number
3950   // representing the register number (i.e. - use a register name) or
3951   // stack slot.
3952   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3953   // Otherwise, it is above the locks and verification slot and alignment word
3954   return_addr(STACK - 2 +
3955               round_to(2 + 2 * VerifyStackAtCalls +
3956                        Compile::current()->fixed_slots(),
3957                        WordsPerLong * 2));
3958 
3959   // Body of function which returns an integer array locating
3960   // arguments either in registers or in stack slots.  Passed an array
3961   // of ideal registers called "sig" and a "length" count.  Stack-slot
3962   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3963   // arguments for a CALLEE.  Incoming stack arguments are
3964   // automatically biased by the preserve_stack_slots field above.
3965 
3966   calling_convention
3967   %{
3968     // No difference between ingoing/outgoing just pass false
3969     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3970   %}
3971 
3972   c_calling_convention
3973   %{
3974     // This is obviously always outgoing
3975     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3976   %}
3977 
3978   // Location of compiled Java return values.  Same as C for now.
3979   return_value
3980   %{
3981     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3982            "only return normal values");
3983 
3984     static const int lo[Op_RegL + 1] = {
3985       0,
3986       0,
3987       RAX_num,  // Op_RegN
3988       RAX_num,  // Op_RegI
3989       RAX_num,  // Op_RegP
3990       XMM0_num, // Op_RegF
3991       XMM0_num, // Op_RegD
3992       RAX_num   // Op_RegL
3993     };
3994     static const int hi[Op_RegL + 1] = {
3995       0,
3996       0,
3997       OptoReg::Bad, // Op_RegN
3998       OptoReg::Bad, // Op_RegI
3999       RAX_H_num,    // Op_RegP
4000       OptoReg::Bad, // Op_RegF
4001       XMM0_H_num,   // Op_RegD
4002       RAX_H_num     // Op_RegL
4003     };
4004     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4005     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4006   %}
4007 %}
4008 
4009 //----------ATTRIBUTES---------------------------------------------------------
4010 //----------Operand Attributes-------------------------------------------------
4011 op_attrib op_cost(0);        // Required cost attribute
4012 
4013 //----------Instruction Attributes---------------------------------------------
4014 ins_attrib ins_cost(100);       // Required cost attribute
4015 ins_attrib ins_size(8);         // Required size attribute (in bits)
4016 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4017 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4018                                 // a non-matching short branch variant
4019                                 // of some long branch?
4020 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4021                                 // be a power of 2) specifies the
4022                                 // alignment that some part of the
4023                                 // instruction (not necessarily the
4024                                 // start) requires.  If > 1, a
4025                                 // compute_padding() function must be
4026                                 // provided for the instruction
4027 
4028 //----------OPERANDS-----------------------------------------------------------
4029 // Operand definitions must precede instruction definitions for correct parsing
4030 // in the ADLC because operands constitute user defined types which are used in
4031 // instruction definitions.
4032 
4033 //----------Simple Operands----------------------------------------------------
4034 // Immediate Operands
4035 // Integer Immediate
4036 operand immI()
4037 %{
4038   match(ConI);
4039 
4040   op_cost(10);
4041   format %{ %}
4042   interface(CONST_INTER);
4043 %}
4044 
4045 // Constant for test vs zero
4046 operand immI0()
4047 %{
4048   predicate(n->get_int() == 0);
4049   match(ConI);
4050 
4051   op_cost(0);
4052   format %{ %}
4053   interface(CONST_INTER);
4054 %}
4055 
4056 // Constant for increment
4057 operand immI1()
4058 %{
4059   predicate(n->get_int() == 1);
4060   match(ConI);
4061 
4062   op_cost(0);
4063   format %{ %}
4064   interface(CONST_INTER);
4065 %}
4066 
4067 // Constant for decrement
4068 operand immI_M1()
4069 %{
4070   predicate(n->get_int() == -1);
4071   match(ConI);
4072 
4073   op_cost(0);
4074   format %{ %}
4075   interface(CONST_INTER);
4076 %}
4077 
4078 // Valid scale values for addressing modes
4079 operand immI2()
4080 %{
4081   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4082   match(ConI);
4083 
4084   format %{ %}
4085   interface(CONST_INTER);
4086 %}
4087 
4088 operand immI8()
4089 %{
4090   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4091   match(ConI);
4092 
4093   op_cost(5);
4094   format %{ %}
4095   interface(CONST_INTER);
4096 %}
4097 
4098 operand immI16()
4099 %{
4100   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4101   match(ConI);
4102 
4103   op_cost(10);
4104   format %{ %}
4105   interface(CONST_INTER);
4106 %}
4107 
4108 // Constant for long shifts
4109 operand immI_32()
4110 %{
4111   predicate( n->get_int() == 32 );
4112   match(ConI);
4113 
4114   op_cost(0);
4115   format %{ %}
4116   interface(CONST_INTER);
4117 %}
4118 
4119 // Constant for long shifts
4120 operand immI_64()
4121 %{
4122   predicate( n->get_int() == 64 );
4123   match(ConI);
4124 
4125   op_cost(0);
4126   format %{ %}
4127   interface(CONST_INTER);
4128 %}
4129 
4130 // Pointer Immediate
4131 operand immP()
4132 %{
4133   match(ConP);
4134 
4135   op_cost(10);
4136   format %{ %}
4137   interface(CONST_INTER);
4138 %}
4139 
4140 // NULL Pointer Immediate
4141 operand immP0()
4142 %{
4143   predicate(n->get_ptr() == 0);
4144   match(ConP);
4145 
4146   op_cost(5);
4147   format %{ %}
4148   interface(CONST_INTER);
4149 %}
4150 
4151 operand immP_poll() %{
4152   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4153   match(ConP);
4154 
4155   // formats are generated automatically for constants and base registers
4156   format %{ %}
4157   interface(CONST_INTER);
4158 %}
4159 
4160 // Pointer Immediate
4161 operand immN() %{
4162   match(ConN);
4163 
4164   op_cost(10);
4165   format %{ %}
4166   interface(CONST_INTER);
4167 %}
4168 
4169 // NULL Pointer Immediate
4170 operand immN0() %{
4171   predicate(n->get_narrowcon() == 0);
4172   match(ConN);
4173 
4174   op_cost(5);
4175   format %{ %}
4176   interface(CONST_INTER);
4177 %}
4178 
4179 operand immP31()
4180 %{
4181   predicate(!n->as_Type()->type()->isa_oopptr()
4182             && (n->get_ptr() >> 31) == 0);
4183   match(ConP);
4184 
4185   op_cost(5);
4186   format %{ %}
4187   interface(CONST_INTER);
4188 %}
4189 
4190 
4191 // Long Immediate
4192 operand immL()
4193 %{
4194   match(ConL);
4195 
4196   op_cost(20);
4197   format %{ %}
4198   interface(CONST_INTER);
4199 %}
4200 
4201 // Long Immediate 8-bit
4202 operand immL8()
4203 %{
4204   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4205   match(ConL);
4206 
4207   op_cost(5);
4208   format %{ %}
4209   interface(CONST_INTER);
4210 %}
4211 
4212 // Long Immediate 32-bit unsigned
4213 operand immUL32()
4214 %{
4215   predicate(n->get_long() == (unsigned int) (n->get_long()));
4216   match(ConL);
4217 
4218   op_cost(10);
4219   format %{ %}
4220   interface(CONST_INTER);
4221 %}
4222 
4223 // Long Immediate 32-bit signed
4224 operand immL32()
4225 %{
4226   predicate(n->get_long() == (int) (n->get_long()));
4227   match(ConL);
4228 
4229   op_cost(15);
4230   format %{ %}
4231   interface(CONST_INTER);
4232 %}
4233 
4234 // Long Immediate zero
4235 operand immL0()
4236 %{
4237   predicate(n->get_long() == 0L);
4238   match(ConL);
4239 
4240   op_cost(10);
4241   format %{ %}
4242   interface(CONST_INTER);
4243 %}
4244 
4245 // Constant for increment
4246 operand immL1()
4247 %{
4248   predicate(n->get_long() == 1);
4249   match(ConL);
4250 
4251   format %{ %}
4252   interface(CONST_INTER);
4253 %}
4254 
4255 // Constant for decrement
4256 operand immL_M1()
4257 %{
4258   predicate(n->get_long() == -1);
4259   match(ConL);
4260 
4261   format %{ %}
4262   interface(CONST_INTER);
4263 %}
4264 
4265 // Long Immediate: the value 10
4266 operand immL10()
4267 %{
4268   predicate(n->get_long() == 10);
4269   match(ConL);
4270 
4271   format %{ %}
4272   interface(CONST_INTER);
4273 %}
4274 
4275 // Long immediate from 0 to 127.
4276 // Used for a shorter form of long mul by 10.
4277 operand immL_127()
4278 %{
4279   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4280   match(ConL);
4281 
4282   op_cost(10);
4283   format %{ %}
4284   interface(CONST_INTER);
4285 %}
4286 
4287 // Long Immediate: low 32-bit mask
4288 operand immL_32bits()
4289 %{
4290   predicate(n->get_long() == 0xFFFFFFFFL);
4291   match(ConL);
4292   op_cost(20);
4293 
4294   format %{ %}
4295   interface(CONST_INTER);
4296 %}
4297 
4298 // Float Immediate zero
4299 operand immF0()
4300 %{
4301   predicate(jint_cast(n->getf()) == 0);
4302   match(ConF);
4303 
4304   op_cost(5);
4305   format %{ %}
4306   interface(CONST_INTER);
4307 %}
4308 
4309 // Float Immediate
4310 operand immF()
4311 %{
4312   match(ConF);
4313 
4314   op_cost(15);
4315   format %{ %}
4316   interface(CONST_INTER);
4317 %}
4318 
4319 // Double Immediate zero
4320 operand immD0()
4321 %{
4322   predicate(jlong_cast(n->getd()) == 0);
4323   match(ConD);
4324 
4325   op_cost(5);
4326   format %{ %}
4327   interface(CONST_INTER);
4328 %}
4329 
4330 // Double Immediate
4331 operand immD()
4332 %{
4333   match(ConD);
4334 
4335   op_cost(15);
4336   format %{ %}
4337   interface(CONST_INTER);
4338 %}
4339 
4340 // Immediates for special shifts (sign extend)
4341 
4342 // Constants for increment
4343 operand immI_16()
4344 %{
4345   predicate(n->get_int() == 16);
4346   match(ConI);
4347 
4348   format %{ %}
4349   interface(CONST_INTER);
4350 %}
4351 
4352 operand immI_24()
4353 %{
4354   predicate(n->get_int() == 24);
4355   match(ConI);
4356 
4357   format %{ %}
4358   interface(CONST_INTER);
4359 %}
4360 
4361 // Constant for byte-wide masking
4362 operand immI_255()
4363 %{
4364   predicate(n->get_int() == 255);
4365   match(ConI);
4366 
4367   format %{ %}
4368   interface(CONST_INTER);
4369 %}
4370 
4371 // Constant for short-wide masking
4372 operand immI_65535()
4373 %{
4374   predicate(n->get_int() == 65535);
4375   match(ConI);
4376 
4377   format %{ %}
4378   interface(CONST_INTER);
4379 %}
4380 
4381 // Constant for byte-wide masking
4382 operand immL_255()
4383 %{
4384   predicate(n->get_long() == 255);
4385   match(ConL);
4386 
4387   format %{ %}
4388   interface(CONST_INTER);
4389 %}
4390 
4391 // Constant for short-wide masking
4392 operand immL_65535()
4393 %{
4394   predicate(n->get_long() == 65535);
4395   match(ConL);
4396 
4397   format %{ %}
4398   interface(CONST_INTER);
4399 %}
4400 
4401 // Register Operands
4402 // Integer Register
4403 operand rRegI()
4404 %{
4405   constraint(ALLOC_IN_RC(int_reg));
4406   match(RegI);
4407 
4408   match(rax_RegI);
4409   match(rbx_RegI);
4410   match(rcx_RegI);
4411   match(rdx_RegI);
4412   match(rdi_RegI);
4413 
4414   format %{ %}
4415   interface(REG_INTER);
4416 %}
4417 
4418 // Special Registers
4419 operand rax_RegI()
4420 %{
4421   constraint(ALLOC_IN_RC(int_rax_reg));
4422   match(RegI);
4423   match(rRegI);
4424 
4425   format %{ "RAX" %}
4426   interface(REG_INTER);
4427 %}
4428 
4429 // Special Registers
4430 operand rbx_RegI()
4431 %{
4432   constraint(ALLOC_IN_RC(int_rbx_reg));
4433   match(RegI);
4434   match(rRegI);
4435 
4436   format %{ "RBX" %}
4437   interface(REG_INTER);
4438 %}
4439 
4440 operand rcx_RegI()
4441 %{
4442   constraint(ALLOC_IN_RC(int_rcx_reg));
4443   match(RegI);
4444   match(rRegI);
4445 
4446   format %{ "RCX" %}
4447   interface(REG_INTER);
4448 %}
4449 
4450 operand rdx_RegI()
4451 %{
4452   constraint(ALLOC_IN_RC(int_rdx_reg));
4453   match(RegI);
4454   match(rRegI);
4455 
4456   format %{ "RDX" %}
4457   interface(REG_INTER);
4458 %}
4459 
4460 operand rdi_RegI()
4461 %{
4462   constraint(ALLOC_IN_RC(int_rdi_reg));
4463   match(RegI);
4464   match(rRegI);
4465 
4466   format %{ "RDI" %}
4467   interface(REG_INTER);
4468 %}
4469 
4470 operand no_rcx_RegI()
4471 %{
4472   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4473   match(RegI);
4474   match(rax_RegI);
4475   match(rbx_RegI);
4476   match(rdx_RegI);
4477   match(rdi_RegI);
4478 
4479   format %{ %}
4480   interface(REG_INTER);
4481 %}
4482 
4483 operand no_rax_rdx_RegI()
4484 %{
4485   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4486   match(RegI);
4487   match(rbx_RegI);
4488   match(rcx_RegI);
4489   match(rdi_RegI);
4490 
4491   format %{ %}
4492   interface(REG_INTER);
4493 %}
4494 
4495 // Pointer Register
4496 operand any_RegP()
4497 %{
4498   constraint(ALLOC_IN_RC(any_reg));
4499   match(RegP);
4500   match(rax_RegP);
4501   match(rbx_RegP);
4502   match(rdi_RegP);
4503   match(rsi_RegP);
4504   match(rbp_RegP);
4505   match(r15_RegP);
4506   match(rRegP);
4507 
4508   format %{ %}
4509   interface(REG_INTER);
4510 %}
4511 
4512 operand rRegP()
4513 %{
4514   constraint(ALLOC_IN_RC(ptr_reg));
4515   match(RegP);
4516   match(rax_RegP);
4517   match(rbx_RegP);
4518   match(rdi_RegP);
4519   match(rsi_RegP);
4520   match(rbp_RegP);
4521   match(r15_RegP);  // See Q&A below about r15_RegP.
4522 
4523   format %{ %}
4524   interface(REG_INTER);
4525 %}
4526 
4527 operand rRegN() %{
4528   constraint(ALLOC_IN_RC(int_reg));
4529   match(RegN);
4530 
4531   format %{ %}
4532   interface(REG_INTER);
4533 %}
4534 
4535 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4536 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4537 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4538 // The output of an instruction is controlled by the allocator, which respects
4539 // register class masks, not match rules.  Unless an instruction mentions
4540 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4541 // by the allocator as an input.
4542 
4543 operand no_rax_RegP()
4544 %{
4545   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4546   match(RegP);
4547   match(rbx_RegP);
4548   match(rsi_RegP);
4549   match(rdi_RegP);
4550 
4551   format %{ %}
4552   interface(REG_INTER);
4553 %}
4554 
4555 operand no_rbp_RegP()
4556 %{
4557   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4558   match(RegP);
4559   match(rbx_RegP);
4560   match(rsi_RegP);
4561   match(rdi_RegP);
4562 
4563   format %{ %}
4564   interface(REG_INTER);
4565 %}
4566 
4567 operand no_rax_rbx_RegP()
4568 %{
4569   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4570   match(RegP);
4571   match(rsi_RegP);
4572   match(rdi_RegP);
4573 
4574   format %{ %}
4575   interface(REG_INTER);
4576 %}
4577 
4578 // Special Registers
4579 // Return a pointer value
4580 operand rax_RegP()
4581 %{
4582   constraint(ALLOC_IN_RC(ptr_rax_reg));
4583   match(RegP);
4584   match(rRegP);
4585 
4586   format %{ %}
4587   interface(REG_INTER);
4588 %}
4589 
4590 // Special Registers
4591 // Return a compressed pointer value
4592 operand rax_RegN()
4593 %{
4594   constraint(ALLOC_IN_RC(int_rax_reg));
4595   match(RegN);
4596   match(rRegN);
4597 
4598   format %{ %}
4599   interface(REG_INTER);
4600 %}
4601 
4602 // Used in AtomicAdd
4603 operand rbx_RegP()
4604 %{
4605   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4606   match(RegP);
4607   match(rRegP);
4608 
4609   format %{ %}
4610   interface(REG_INTER);
4611 %}
4612 
4613 operand rsi_RegP()
4614 %{
4615   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4616   match(RegP);
4617   match(rRegP);
4618 
4619   format %{ %}
4620   interface(REG_INTER);
4621 %}
4622 
4623 // Used in rep stosq
4624 operand rdi_RegP()
4625 %{
4626   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4627   match(RegP);
4628   match(rRegP);
4629 
4630   format %{ %}
4631   interface(REG_INTER);
4632 %}
4633 
4634 operand rbp_RegP()
4635 %{
4636   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4637   match(RegP);
4638   match(rRegP);
4639 
4640   format %{ %}
4641   interface(REG_INTER);
4642 %}
4643 
4644 operand r15_RegP()
4645 %{
4646   constraint(ALLOC_IN_RC(ptr_r15_reg));
4647   match(RegP);
4648   match(rRegP);
4649 
4650   format %{ %}
4651   interface(REG_INTER);
4652 %}
4653 
4654 operand rRegL()
4655 %{
4656   constraint(ALLOC_IN_RC(long_reg));
4657   match(RegL);
4658   match(rax_RegL);
4659   match(rdx_RegL);
4660 
4661   format %{ %}
4662   interface(REG_INTER);
4663 %}
4664 
4665 // Special Registers
4666 operand no_rax_rdx_RegL()
4667 %{
4668   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4669   match(RegL);
4670   match(rRegL);
4671 
4672   format %{ %}
4673   interface(REG_INTER);
4674 %}
4675 
4676 operand no_rax_RegL()
4677 %{
4678   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4679   match(RegL);
4680   match(rRegL);
4681   match(rdx_RegL);
4682 
4683   format %{ %}
4684   interface(REG_INTER);
4685 %}
4686 
4687 operand no_rcx_RegL()
4688 %{
4689   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4690   match(RegL);
4691   match(rRegL);
4692 
4693   format %{ %}
4694   interface(REG_INTER);
4695 %}
4696 
4697 operand rax_RegL()
4698 %{
4699   constraint(ALLOC_IN_RC(long_rax_reg));
4700   match(RegL);
4701   match(rRegL);
4702 
4703   format %{ "RAX" %}
4704   interface(REG_INTER);
4705 %}
4706 
4707 operand rcx_RegL()
4708 %{
4709   constraint(ALLOC_IN_RC(long_rcx_reg));
4710   match(RegL);
4711   match(rRegL);
4712 
4713   format %{ %}
4714   interface(REG_INTER);
4715 %}
4716 
4717 operand rdx_RegL()
4718 %{
4719   constraint(ALLOC_IN_RC(long_rdx_reg));
4720   match(RegL);
4721   match(rRegL);
4722 
4723   format %{ %}
4724   interface(REG_INTER);
4725 %}
4726 
4727 // Flags register, used as output of compare instructions
4728 operand rFlagsReg()
4729 %{
4730   constraint(ALLOC_IN_RC(int_flags));
4731   match(RegFlags);
4732 
4733   format %{ "RFLAGS" %}
4734   interface(REG_INTER);
4735 %}
4736 
4737 // Flags register, used as output of FLOATING POINT compare instructions
4738 operand rFlagsRegU()
4739 %{
4740   constraint(ALLOC_IN_RC(int_flags));
4741   match(RegFlags);
4742 
4743   format %{ "RFLAGS_U" %}
4744   interface(REG_INTER);
4745 %}
4746 
4747 operand rFlagsRegUCF() %{
4748   constraint(ALLOC_IN_RC(int_flags));
4749   match(RegFlags);
4750   predicate(false);
4751 
4752   format %{ "RFLAGS_U_CF" %}
4753   interface(REG_INTER);
4754 %}
4755 
4756 // Float register operands
4757 operand regF()
4758 %{
4759   constraint(ALLOC_IN_RC(float_reg));
4760   match(RegF);
4761 
4762   format %{ %}
4763   interface(REG_INTER);
4764 %}
4765 
4766 // Double register operands
4767 operand regD()
4768 %{
4769   constraint(ALLOC_IN_RC(double_reg));
4770   match(RegD);
4771 
4772   format %{ %}
4773   interface(REG_INTER);
4774 %}
4775 
4776 
4777 //----------Memory Operands----------------------------------------------------
4778 // Direct Memory Operand
4779 // operand direct(immP addr)
4780 // %{
4781 //   match(addr);
4782 
4783 //   format %{ "[$addr]" %}
4784 //   interface(MEMORY_INTER) %{
4785 //     base(0xFFFFFFFF);
4786 //     index(0x4);
4787 //     scale(0x0);
4788 //     disp($addr);
4789 //   %}
4790 // %}
4791 
4792 // Indirect Memory Operand
4793 operand indirect(any_RegP reg)
4794 %{
4795   constraint(ALLOC_IN_RC(ptr_reg));
4796   match(reg);
4797 
4798   format %{ "[$reg]" %}
4799   interface(MEMORY_INTER) %{
4800     base($reg);
4801     index(0x4);
4802     scale(0x0);
4803     disp(0x0);
4804   %}
4805 %}
4806 
4807 // Indirect Memory Plus Short Offset Operand
4808 operand indOffset8(any_RegP reg, immL8 off)
4809 %{
4810   constraint(ALLOC_IN_RC(ptr_reg));
4811   match(AddP reg off);
4812 
4813   format %{ "[$reg + $off (8-bit)]" %}
4814   interface(MEMORY_INTER) %{
4815     base($reg);
4816     index(0x4);
4817     scale(0x0);
4818     disp($off);
4819   %}
4820 %}
4821 
4822 // Indirect Memory Plus Long Offset Operand
4823 operand indOffset32(any_RegP reg, immL32 off)
4824 %{
4825   constraint(ALLOC_IN_RC(ptr_reg));
4826   match(AddP reg off);
4827 
4828   format %{ "[$reg + $off (32-bit)]" %}
4829   interface(MEMORY_INTER) %{
4830     base($reg);
4831     index(0x4);
4832     scale(0x0);
4833     disp($off);
4834   %}
4835 %}
4836 
4837 // Indirect Memory Plus Index Register Plus Offset Operand
4838 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4839 %{
4840   constraint(ALLOC_IN_RC(ptr_reg));
4841   match(AddP (AddP reg lreg) off);
4842 
4843   op_cost(10);
4844   format %{"[$reg + $off + $lreg]" %}
4845   interface(MEMORY_INTER) %{
4846     base($reg);
4847     index($lreg);
4848     scale(0x0);
4849     disp($off);
4850   %}
4851 %}
4852 
4853 // Indirect Memory Plus Index Register Plus Offset Operand
4854 operand indIndex(any_RegP reg, rRegL lreg)
4855 %{
4856   constraint(ALLOC_IN_RC(ptr_reg));
4857   match(AddP reg lreg);
4858 
4859   op_cost(10);
4860   format %{"[$reg + $lreg]" %}
4861   interface(MEMORY_INTER) %{
4862     base($reg);
4863     index($lreg);
4864     scale(0x0);
4865     disp(0x0);
4866   %}
4867 %}
4868 
4869 // Indirect Memory Times Scale Plus Index Register
4870 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4871 %{
4872   constraint(ALLOC_IN_RC(ptr_reg));
4873   match(AddP reg (LShiftL lreg scale));
4874 
4875   op_cost(10);
4876   format %{"[$reg + $lreg << $scale]" %}
4877   interface(MEMORY_INTER) %{
4878     base($reg);
4879     index($lreg);
4880     scale($scale);
4881     disp(0x0);
4882   %}
4883 %}
4884 
4885 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4886 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4887 %{
4888   constraint(ALLOC_IN_RC(ptr_reg));
4889   match(AddP (AddP reg (LShiftL lreg scale)) off);
4890 
4891   op_cost(10);
4892   format %{"[$reg + $off + $lreg << $scale]" %}
4893   interface(MEMORY_INTER) %{
4894     base($reg);
4895     index($lreg);
4896     scale($scale);
4897     disp($off);
4898   %}
4899 %}
4900 
4901 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4902 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4903 %{
4904   constraint(ALLOC_IN_RC(ptr_reg));
4905   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4906   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4907 
4908   op_cost(10);
4909   format %{"[$reg + $off + $idx << $scale]" %}
4910   interface(MEMORY_INTER) %{
4911     base($reg);
4912     index($idx);
4913     scale($scale);
4914     disp($off);
4915   %}
4916 %}
4917 
4918 // Indirect Narrow Oop Plus Offset Operand
4919 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4920 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4921 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4922   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4923   constraint(ALLOC_IN_RC(ptr_reg));
4924   match(AddP (DecodeN reg) off);
4925 
4926   op_cost(10);
4927   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4928   interface(MEMORY_INTER) %{
4929     base(0xc); // R12
4930     index($reg);
4931     scale(0x3);
4932     disp($off);
4933   %}
4934 %}
4935 
4936 // Indirect Memory Operand
4937 operand indirectNarrow(rRegN reg)
4938 %{
4939   predicate(Universe::narrow_oop_shift() == 0);
4940   constraint(ALLOC_IN_RC(ptr_reg));
4941   match(DecodeN reg);
4942 
4943   format %{ "[$reg]" %}
4944   interface(MEMORY_INTER) %{
4945     base($reg);
4946     index(0x4);
4947     scale(0x0);
4948     disp(0x0);
4949   %}
4950 %}
4951 
4952 // Indirect Memory Plus Short Offset Operand
4953 operand indOffset8Narrow(rRegN reg, immL8 off)
4954 %{
4955   predicate(Universe::narrow_oop_shift() == 0);
4956   constraint(ALLOC_IN_RC(ptr_reg));
4957   match(AddP (DecodeN reg) off);
4958 
4959   format %{ "[$reg + $off (8-bit)]" %}
4960   interface(MEMORY_INTER) %{
4961     base($reg);
4962     index(0x4);
4963     scale(0x0);
4964     disp($off);
4965   %}
4966 %}
4967 
4968 // Indirect Memory Plus Long Offset Operand
4969 operand indOffset32Narrow(rRegN reg, immL32 off)
4970 %{
4971   predicate(Universe::narrow_oop_shift() == 0);
4972   constraint(ALLOC_IN_RC(ptr_reg));
4973   match(AddP (DecodeN reg) off);
4974 
4975   format %{ "[$reg + $off (32-bit)]" %}
4976   interface(MEMORY_INTER) %{
4977     base($reg);
4978     index(0x4);
4979     scale(0x0);
4980     disp($off);
4981   %}
4982 %}
4983 
4984 // Indirect Memory Plus Index Register Plus Offset Operand
4985 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4986 %{
4987   predicate(Universe::narrow_oop_shift() == 0);
4988   constraint(ALLOC_IN_RC(ptr_reg));
4989   match(AddP (AddP (DecodeN reg) lreg) off);
4990 
4991   op_cost(10);
4992   format %{"[$reg + $off + $lreg]" %}
4993   interface(MEMORY_INTER) %{
4994     base($reg);
4995     index($lreg);
4996     scale(0x0);
4997     disp($off);
4998   %}
4999 %}
5000 
5001 // Indirect Memory Plus Index Register Plus Offset Operand
5002 operand indIndexNarrow(rRegN reg, rRegL lreg)
5003 %{
5004   predicate(Universe::narrow_oop_shift() == 0);
5005   constraint(ALLOC_IN_RC(ptr_reg));
5006   match(AddP (DecodeN reg) lreg);
5007 
5008   op_cost(10);
5009   format %{"[$reg + $lreg]" %}
5010   interface(MEMORY_INTER) %{
5011     base($reg);
5012     index($lreg);
5013     scale(0x0);
5014     disp(0x0);
5015   %}
5016 %}
5017 
5018 // Indirect Memory Times Scale Plus Index Register
5019 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5020 %{
5021   predicate(Universe::narrow_oop_shift() == 0);
5022   constraint(ALLOC_IN_RC(ptr_reg));
5023   match(AddP (DecodeN reg) (LShiftL lreg scale));
5024 
5025   op_cost(10);
5026   format %{"[$reg + $lreg << $scale]" %}
5027   interface(MEMORY_INTER) %{
5028     base($reg);
5029     index($lreg);
5030     scale($scale);
5031     disp(0x0);
5032   %}
5033 %}
5034 
5035 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5036 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5037 %{
5038   predicate(Universe::narrow_oop_shift() == 0);
5039   constraint(ALLOC_IN_RC(ptr_reg));
5040   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5041 
5042   op_cost(10);
5043   format %{"[$reg + $off + $lreg << $scale]" %}
5044   interface(MEMORY_INTER) %{
5045     base($reg);
5046     index($lreg);
5047     scale($scale);
5048     disp($off);
5049   %}
5050 %}
5051 
5052 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5053 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5054 %{
5055   constraint(ALLOC_IN_RC(ptr_reg));
5056   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5057   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5058 
5059   op_cost(10);
5060   format %{"[$reg + $off + $idx << $scale]" %}
5061   interface(MEMORY_INTER) %{
5062     base($reg);
5063     index($idx);
5064     scale($scale);
5065     disp($off);
5066   %}
5067 %}
5068 
5069 
5070 //----------Special Memory Operands--------------------------------------------
5071 // Stack Slot Operand - This operand is used for loading and storing temporary
5072 //                      values on the stack where a match requires a value to
5073 //                      flow through memory.
5074 operand stackSlotP(sRegP reg)
5075 %{
5076   constraint(ALLOC_IN_RC(stack_slots));
5077   // No match rule because this operand is only generated in matching
5078 
5079   format %{ "[$reg]" %}
5080   interface(MEMORY_INTER) %{
5081     base(0x4);   // RSP
5082     index(0x4);  // No Index
5083     scale(0x0);  // No Scale
5084     disp($reg);  // Stack Offset
5085   %}
5086 %}
5087 
5088 operand stackSlotI(sRegI reg)
5089 %{
5090   constraint(ALLOC_IN_RC(stack_slots));
5091   // No match rule because this operand is only generated in matching
5092 
5093   format %{ "[$reg]" %}
5094   interface(MEMORY_INTER) %{
5095     base(0x4);   // RSP
5096     index(0x4);  // No Index
5097     scale(0x0);  // No Scale
5098     disp($reg);  // Stack Offset
5099   %}
5100 %}
5101 
5102 operand stackSlotF(sRegF reg)
5103 %{
5104   constraint(ALLOC_IN_RC(stack_slots));
5105   // No match rule because this operand is only generated in matching
5106 
5107   format %{ "[$reg]" %}
5108   interface(MEMORY_INTER) %{
5109     base(0x4);   // RSP
5110     index(0x4);  // No Index
5111     scale(0x0);  // No Scale
5112     disp($reg);  // Stack Offset
5113   %}
5114 %}
5115 
5116 operand stackSlotD(sRegD reg)
5117 %{
5118   constraint(ALLOC_IN_RC(stack_slots));
5119   // No match rule because this operand is only generated in matching
5120 
5121   format %{ "[$reg]" %}
5122   interface(MEMORY_INTER) %{
5123     base(0x4);   // RSP
5124     index(0x4);  // No Index
5125     scale(0x0);  // No Scale
5126     disp($reg);  // Stack Offset
5127   %}
5128 %}
5129 operand stackSlotL(sRegL reg)
5130 %{
5131   constraint(ALLOC_IN_RC(stack_slots));
5132   // No match rule because this operand is only generated in matching
5133 
5134   format %{ "[$reg]" %}
5135   interface(MEMORY_INTER) %{
5136     base(0x4);   // RSP
5137     index(0x4);  // No Index
5138     scale(0x0);  // No Scale
5139     disp($reg);  // Stack Offset
5140   %}
5141 %}
5142 
5143 //----------Conditional Branch Operands----------------------------------------
5144 // Comparison Op  - This is the operation of the comparison, and is limited to
5145 //                  the following set of codes:
5146 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5147 //
5148 // Other attributes of the comparison, such as unsignedness, are specified
5149 // by the comparison instruction that sets a condition code flags register.
5150 // That result is represented by a flags operand whose subtype is appropriate
5151 // to the unsignedness (etc.) of the comparison.
5152 //
5153 // Later, the instruction which matches both the Comparison Op (a Bool) and
5154 // the flags (produced by the Cmp) specifies the coding of the comparison op
5155 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5156 
5157 // Comparision Code
5158 operand cmpOp()
5159 %{
5160   match(Bool);
5161 
5162   format %{ "" %}
5163   interface(COND_INTER) %{
5164     equal(0x4, "e");
5165     not_equal(0x5, "ne");
5166     less(0xC, "l");
5167     greater_equal(0xD, "ge");
5168     less_equal(0xE, "le");
5169     greater(0xF, "g");
5170   %}
5171 %}
5172 
5173 // Comparison Code, unsigned compare.  Used by FP also, with
5174 // C2 (unordered) turned into GT or LT already.  The other bits
5175 // C0 and C3 are turned into Carry & Zero flags.
5176 operand cmpOpU()
5177 %{
5178   match(Bool);
5179 
5180   format %{ "" %}
5181   interface(COND_INTER) %{
5182     equal(0x4, "e");
5183     not_equal(0x5, "ne");
5184     less(0x2, "b");
5185     greater_equal(0x3, "nb");
5186     less_equal(0x6, "be");
5187     greater(0x7, "nbe");
5188   %}
5189 %}
5190 
5191 
5192 // Floating comparisons that don't require any fixup for the unordered case
5193 operand cmpOpUCF() %{
5194   match(Bool);
5195   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5196             n->as_Bool()->_test._test == BoolTest::ge ||
5197             n->as_Bool()->_test._test == BoolTest::le ||
5198             n->as_Bool()->_test._test == BoolTest::gt);
5199   format %{ "" %}
5200   interface(COND_INTER) %{
5201     equal(0x4, "e");
5202     not_equal(0x5, "ne");
5203     less(0x2, "b");
5204     greater_equal(0x3, "nb");
5205     less_equal(0x6, "be");
5206     greater(0x7, "nbe");
5207   %}
5208 %}
5209 
5210 
5211 // Floating comparisons that can be fixed up with extra conditional jumps
5212 operand cmpOpUCF2() %{
5213   match(Bool);
5214   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5215             n->as_Bool()->_test._test == BoolTest::eq);
5216   format %{ "" %}
5217   interface(COND_INTER) %{
5218     equal(0x4, "e");
5219     not_equal(0x5, "ne");
5220     less(0x2, "b");
5221     greater_equal(0x3, "nb");
5222     less_equal(0x6, "be");
5223     greater(0x7, "nbe");
5224   %}
5225 %}
5226 
5227 
5228 //----------OPERAND CLASSES----------------------------------------------------
5229 // Operand Classes are groups of operands that are used as to simplify
5230 // instruction definitions by not requiring the AD writer to specify separate
5231 // instructions for every form of operand when the instruction accepts
5232 // multiple operand types with the same basic encoding and format.  The classic
5233 // case of this is memory operands.
5234 
5235 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5236                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5237                indCompressedOopOffset,
5238                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5239                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5240                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5241 
5242 //----------PIPELINE-----------------------------------------------------------
5243 // Rules which define the behavior of the target architectures pipeline.
5244 pipeline %{
5245 
5246 //----------ATTRIBUTES---------------------------------------------------------
5247 attributes %{
5248   variable_size_instructions;        // Fixed size instructions
5249   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5250   instruction_unit_size = 1;         // An instruction is 1 bytes long
5251   instruction_fetch_unit_size = 16;  // The processor fetches one line
5252   instruction_fetch_units = 1;       // of 16 bytes
5253 
5254   // List of nop instructions
5255   nops( MachNop );
5256 %}
5257 
5258 //----------RESOURCES----------------------------------------------------------
5259 // Resources are the functional units available to the machine
5260 
5261 // Generic P2/P3 pipeline
5262 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5263 // 3 instructions decoded per cycle.
5264 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5265 // 3 ALU op, only ALU0 handles mul instructions.
5266 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5267            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5268            BR, FPU,
5269            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5270 
5271 //----------PIPELINE DESCRIPTION-----------------------------------------------
5272 // Pipeline Description specifies the stages in the machine's pipeline
5273 
5274 // Generic P2/P3 pipeline
5275 pipe_desc(S0, S1, S2, S3, S4, S5);
5276 
5277 //----------PIPELINE CLASSES---------------------------------------------------
5278 // Pipeline Classes describe the stages in which input and output are
5279 // referenced by the hardware pipeline.
5280 
5281 // Naming convention: ialu or fpu
5282 // Then: _reg
5283 // Then: _reg if there is a 2nd register
5284 // Then: _long if it's a pair of instructions implementing a long
5285 // Then: _fat if it requires the big decoder
5286 //   Or: _mem if it requires the big decoder and a memory unit.
5287 
5288 // Integer ALU reg operation
5289 pipe_class ialu_reg(rRegI dst)
5290 %{
5291     single_instruction;
5292     dst    : S4(write);
5293     dst    : S3(read);
5294     DECODE : S0;        // any decoder
5295     ALU    : S3;        // any alu
5296 %}
5297 
5298 // Long ALU reg operation
5299 pipe_class ialu_reg_long(rRegL dst)
5300 %{
5301     instruction_count(2);
5302     dst    : S4(write);
5303     dst    : S3(read);
5304     DECODE : S0(2);     // any 2 decoders
5305     ALU    : S3(2);     // both alus
5306 %}
5307 
5308 // Integer ALU reg operation using big decoder
5309 pipe_class ialu_reg_fat(rRegI dst)
5310 %{
5311     single_instruction;
5312     dst    : S4(write);
5313     dst    : S3(read);
5314     D0     : S0;        // big decoder only
5315     ALU    : S3;        // any alu
5316 %}
5317 
5318 // Long ALU reg operation using big decoder
5319 pipe_class ialu_reg_long_fat(rRegL dst)
5320 %{
5321     instruction_count(2);
5322     dst    : S4(write);
5323     dst    : S3(read);
5324     D0     : S0(2);     // big decoder only; twice
5325     ALU    : S3(2);     // any 2 alus
5326 %}
5327 
5328 // Integer ALU reg-reg operation
5329 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5330 %{
5331     single_instruction;
5332     dst    : S4(write);
5333     src    : S3(read);
5334     DECODE : S0;        // any decoder
5335     ALU    : S3;        // any alu
5336 %}
5337 
5338 // Long ALU reg-reg operation
5339 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5340 %{
5341     instruction_count(2);
5342     dst    : S4(write);
5343     src    : S3(read);
5344     DECODE : S0(2);     // any 2 decoders
5345     ALU    : S3(2);     // both alus
5346 %}
5347 
5348 // Integer ALU reg-reg operation
5349 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5350 %{
5351     single_instruction;
5352     dst    : S4(write);
5353     src    : S3(read);
5354     D0     : S0;        // big decoder only
5355     ALU    : S3;        // any alu
5356 %}
5357 
5358 // Long ALU reg-reg operation
5359 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5360 %{
5361     instruction_count(2);
5362     dst    : S4(write);
5363     src    : S3(read);
5364     D0     : S0(2);     // big decoder only; twice
5365     ALU    : S3(2);     // both alus
5366 %}
5367 
5368 // Integer ALU reg-mem operation
5369 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5370 %{
5371     single_instruction;
5372     dst    : S5(write);
5373     mem    : S3(read);
5374     D0     : S0;        // big decoder only
5375     ALU    : S4;        // any alu
5376     MEM    : S3;        // any mem
5377 %}
5378 
5379 // Integer mem operation (prefetch)
5380 pipe_class ialu_mem(memory mem)
5381 %{
5382     single_instruction;
5383     mem    : S3(read);
5384     D0     : S0;        // big decoder only
5385     MEM    : S3;        // any mem
5386 %}
5387 
5388 // Integer Store to Memory
5389 pipe_class ialu_mem_reg(memory mem, rRegI src)
5390 %{
5391     single_instruction;
5392     mem    : S3(read);
5393     src    : S5(read);
5394     D0     : S0;        // big decoder only
5395     ALU    : S4;        // any alu
5396     MEM    : S3;
5397 %}
5398 
5399 // // Long Store to Memory
5400 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5401 // %{
5402 //     instruction_count(2);
5403 //     mem    : S3(read);
5404 //     src    : S5(read);
5405 //     D0     : S0(2);          // big decoder only; twice
5406 //     ALU    : S4(2);     // any 2 alus
5407 //     MEM    : S3(2);  // Both mems
5408 // %}
5409 
5410 // Integer Store to Memory
5411 pipe_class ialu_mem_imm(memory mem)
5412 %{
5413     single_instruction;
5414     mem    : S3(read);
5415     D0     : S0;        // big decoder only
5416     ALU    : S4;        // any alu
5417     MEM    : S3;
5418 %}
5419 
5420 // Integer ALU0 reg-reg operation
5421 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5422 %{
5423     single_instruction;
5424     dst    : S4(write);
5425     src    : S3(read);
5426     D0     : S0;        // Big decoder only
5427     ALU0   : S3;        // only alu0
5428 %}
5429 
5430 // Integer ALU0 reg-mem operation
5431 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5432 %{
5433     single_instruction;
5434     dst    : S5(write);
5435     mem    : S3(read);
5436     D0     : S0;        // big decoder only
5437     ALU0   : S4;        // ALU0 only
5438     MEM    : S3;        // any mem
5439 %}
5440 
5441 // Integer ALU reg-reg operation
5442 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5443 %{
5444     single_instruction;
5445     cr     : S4(write);
5446     src1   : S3(read);
5447     src2   : S3(read);
5448     DECODE : S0;        // any decoder
5449     ALU    : S3;        // any alu
5450 %}
5451 
5452 // Integer ALU reg-imm operation
5453 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5454 %{
5455     single_instruction;
5456     cr     : S4(write);
5457     src1   : S3(read);
5458     DECODE : S0;        // any decoder
5459     ALU    : S3;        // any alu
5460 %}
5461 
5462 // Integer ALU reg-mem operation
5463 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5464 %{
5465     single_instruction;
5466     cr     : S4(write);
5467     src1   : S3(read);
5468     src2   : S3(read);
5469     D0     : S0;        // big decoder only
5470     ALU    : S4;        // any alu
5471     MEM    : S3;
5472 %}
5473 
5474 // Conditional move reg-reg
5475 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5476 %{
5477     instruction_count(4);
5478     y      : S4(read);
5479     q      : S3(read);
5480     p      : S3(read);
5481     DECODE : S0(4);     // any decoder
5482 %}
5483 
5484 // Conditional move reg-reg
5485 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5486 %{
5487     single_instruction;
5488     dst    : S4(write);
5489     src    : S3(read);
5490     cr     : S3(read);
5491     DECODE : S0;        // any decoder
5492 %}
5493 
5494 // Conditional move reg-mem
5495 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5496 %{
5497     single_instruction;
5498     dst    : S4(write);
5499     src    : S3(read);
5500     cr     : S3(read);
5501     DECODE : S0;        // any decoder
5502     MEM    : S3;
5503 %}
5504 
5505 // Conditional move reg-reg long
5506 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5507 %{
5508     single_instruction;
5509     dst    : S4(write);
5510     src    : S3(read);
5511     cr     : S3(read);
5512     DECODE : S0(2);     // any 2 decoders
5513 %}
5514 
5515 // XXX
5516 // // Conditional move double reg-reg
5517 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5518 // %{
5519 //     single_instruction;
5520 //     dst    : S4(write);
5521 //     src    : S3(read);
5522 //     cr     : S3(read);
5523 //     DECODE : S0;     // any decoder
5524 // %}
5525 
5526 // Float reg-reg operation
5527 pipe_class fpu_reg(regD dst)
5528 %{
5529     instruction_count(2);
5530     dst    : S3(read);
5531     DECODE : S0(2);     // any 2 decoders
5532     FPU    : S3;
5533 %}
5534 
5535 // Float reg-reg operation
5536 pipe_class fpu_reg_reg(regD dst, regD src)
5537 %{
5538     instruction_count(2);
5539     dst    : S4(write);
5540     src    : S3(read);
5541     DECODE : S0(2);     // any 2 decoders
5542     FPU    : S3;
5543 %}
5544 
5545 // Float reg-reg operation
5546 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5547 %{
5548     instruction_count(3);
5549     dst    : S4(write);
5550     src1   : S3(read);
5551     src2   : S3(read);
5552     DECODE : S0(3);     // any 3 decoders
5553     FPU    : S3(2);
5554 %}
5555 
5556 // Float reg-reg operation
5557 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5558 %{
5559     instruction_count(4);
5560     dst    : S4(write);
5561     src1   : S3(read);
5562     src2   : S3(read);
5563     src3   : S3(read);
5564     DECODE : S0(4);     // any 3 decoders
5565     FPU    : S3(2);
5566 %}
5567 
5568 // Float reg-reg operation
5569 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5570 %{
5571     instruction_count(4);
5572     dst    : S4(write);
5573     src1   : S3(read);
5574     src2   : S3(read);
5575     src3   : S3(read);
5576     DECODE : S1(3);     // any 3 decoders
5577     D0     : S0;        // Big decoder only
5578     FPU    : S3(2);
5579     MEM    : S3;
5580 %}
5581 
5582 // Float reg-mem operation
5583 pipe_class fpu_reg_mem(regD dst, memory mem)
5584 %{
5585     instruction_count(2);
5586     dst    : S5(write);
5587     mem    : S3(read);
5588     D0     : S0;        // big decoder only
5589     DECODE : S1;        // any decoder for FPU POP
5590     FPU    : S4;
5591     MEM    : S3;        // any mem
5592 %}
5593 
5594 // Float reg-mem operation
5595 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5596 %{
5597     instruction_count(3);
5598     dst    : S5(write);
5599     src1   : S3(read);
5600     mem    : S3(read);
5601     D0     : S0;        // big decoder only
5602     DECODE : S1(2);     // any decoder for FPU POP
5603     FPU    : S4;
5604     MEM    : S3;        // any mem
5605 %}
5606 
5607 // Float mem-reg operation
5608 pipe_class fpu_mem_reg(memory mem, regD src)
5609 %{
5610     instruction_count(2);
5611     src    : S5(read);
5612     mem    : S3(read);
5613     DECODE : S0;        // any decoder for FPU PUSH
5614     D0     : S1;        // big decoder only
5615     FPU    : S4;
5616     MEM    : S3;        // any mem
5617 %}
5618 
5619 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5620 %{
5621     instruction_count(3);
5622     src1   : S3(read);
5623     src2   : S3(read);
5624     mem    : S3(read);
5625     DECODE : S0(2);     // any decoder for FPU PUSH
5626     D0     : S1;        // big decoder only
5627     FPU    : S4;
5628     MEM    : S3;        // any mem
5629 %}
5630 
5631 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5632 %{
5633     instruction_count(3);
5634     src1   : S3(read);
5635     src2   : S3(read);
5636     mem    : S4(read);
5637     DECODE : S0;        // any decoder for FPU PUSH
5638     D0     : S0(2);     // big decoder only
5639     FPU    : S4;
5640     MEM    : S3(2);     // any mem
5641 %}
5642 
5643 pipe_class fpu_mem_mem(memory dst, memory src1)
5644 %{
5645     instruction_count(2);
5646     src1   : S3(read);
5647     dst    : S4(read);
5648     D0     : S0(2);     // big decoder only
5649     MEM    : S3(2);     // any mem
5650 %}
5651 
5652 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5653 %{
5654     instruction_count(3);
5655     src1   : S3(read);
5656     src2   : S3(read);
5657     dst    : S4(read);
5658     D0     : S0(3);     // big decoder only
5659     FPU    : S4;
5660     MEM    : S3(3);     // any mem
5661 %}
5662 
5663 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5664 %{
5665     instruction_count(3);
5666     src1   : S4(read);
5667     mem    : S4(read);
5668     DECODE : S0;        // any decoder for FPU PUSH
5669     D0     : S0(2);     // big decoder only
5670     FPU    : S4;
5671     MEM    : S3(2);     // any mem
5672 %}
5673 
5674 // Float load constant
5675 pipe_class fpu_reg_con(regD dst)
5676 %{
5677     instruction_count(2);
5678     dst    : S5(write);
5679     D0     : S0;        // big decoder only for the load
5680     DECODE : S1;        // any decoder for FPU POP
5681     FPU    : S4;
5682     MEM    : S3;        // any mem
5683 %}
5684 
5685 // Float load constant
5686 pipe_class fpu_reg_reg_con(regD dst, regD src)
5687 %{
5688     instruction_count(3);
5689     dst    : S5(write);
5690     src    : S3(read);
5691     D0     : S0;        // big decoder only for the load
5692     DECODE : S1(2);     // any decoder for FPU POP
5693     FPU    : S4;
5694     MEM    : S3;        // any mem
5695 %}
5696 
5697 // UnConditional branch
5698 pipe_class pipe_jmp(label labl)
5699 %{
5700     single_instruction;
5701     BR   : S3;
5702 %}
5703 
5704 // Conditional branch
5705 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5706 %{
5707     single_instruction;
5708     cr    : S1(read);
5709     BR    : S3;
5710 %}
5711 
5712 // Allocation idiom
5713 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5714 %{
5715     instruction_count(1); force_serialization;
5716     fixed_latency(6);
5717     heap_ptr : S3(read);
5718     DECODE   : S0(3);
5719     D0       : S2;
5720     MEM      : S3;
5721     ALU      : S3(2);
5722     dst      : S5(write);
5723     BR       : S5;
5724 %}
5725 
5726 // Generic big/slow expanded idiom
5727 pipe_class pipe_slow()
5728 %{
5729     instruction_count(10); multiple_bundles; force_serialization;
5730     fixed_latency(100);
5731     D0  : S0(2);
5732     MEM : S3(2);
5733 %}
5734 
5735 // The real do-nothing guy
5736 pipe_class empty()
5737 %{
5738     instruction_count(0);
5739 %}
5740 
5741 // Define the class for the Nop node
5742 define
5743 %{
5744    MachNop = empty;
5745 %}
5746 
5747 %}
5748 
5749 //----------INSTRUCTIONS-------------------------------------------------------
5750 //
5751 // match      -- States which machine-independent subtree may be replaced
5752 //               by this instruction.
5753 // ins_cost   -- The estimated cost of this instruction is used by instruction
5754 //               selection to identify a minimum cost tree of machine
5755 //               instructions that matches a tree of machine-independent
5756 //               instructions.
5757 // format     -- A string providing the disassembly for this instruction.
5758 //               The value of an instruction's operand may be inserted
5759 //               by referring to it with a '$' prefix.
5760 // opcode     -- Three instruction opcodes may be provided.  These are referred
5761 //               to within an encode class as $primary, $secondary, and $tertiary
5762 //               rrspectively.  The primary opcode is commonly used to
5763 //               indicate the type of machine instruction, while secondary
5764 //               and tertiary are often used for prefix options or addressing
5765 //               modes.
5766 // ins_encode -- A list of encode classes with parameters. The encode class
5767 //               name must have been defined in an 'enc_class' specification
5768 //               in the encode section of the architecture description.
5769 
5770 
5771 //----------Load/Store/Move Instructions---------------------------------------
5772 //----------Load Instructions--------------------------------------------------
5773 
5774 // Load Byte (8 bit signed)
5775 instruct loadB(rRegI dst, memory mem)
5776 %{
5777   match(Set dst (LoadB mem));
5778 
5779   ins_cost(125);
5780   format %{ "movsbl  $dst, $mem\t# byte" %}
5781 
5782   ins_encode %{
5783     __ movsbl($dst$$Register, $mem$$Address);
5784   %}
5785 
5786   ins_pipe(ialu_reg_mem);
5787 %}
5788 
5789 // Load Byte (8 bit signed) into Long Register
5790 instruct loadB2L(rRegL dst, memory mem)
5791 %{
5792   match(Set dst (ConvI2L (LoadB mem)));
5793 
5794   ins_cost(125);
5795   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5796 
5797   ins_encode %{
5798     __ movsbq($dst$$Register, $mem$$Address);
5799   %}
5800 
5801   ins_pipe(ialu_reg_mem);
5802 %}
5803 
5804 // Load Unsigned Byte (8 bit UNsigned)
5805 instruct loadUB(rRegI dst, memory mem)
5806 %{
5807   match(Set dst (LoadUB mem));
5808 
5809   ins_cost(125);
5810   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5811 
5812   ins_encode %{
5813     __ movzbl($dst$$Register, $mem$$Address);
5814   %}
5815 
5816   ins_pipe(ialu_reg_mem);
5817 %}
5818 
5819 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5820 instruct loadUB2L(rRegL dst, memory mem)
5821 %{
5822   match(Set dst (ConvI2L (LoadUB mem)));
5823 
5824   ins_cost(125);
5825   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5826 
5827   ins_encode %{
5828     __ movzbq($dst$$Register, $mem$$Address);
5829   %}
5830 
5831   ins_pipe(ialu_reg_mem);
5832 %}
5833 
5834 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5835 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5836   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5837   effect(KILL cr);
5838 
5839   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5840             "andl    $dst, $mask" %}
5841   ins_encode %{
5842     Register Rdst = $dst$$Register;
5843     __ movzbq(Rdst, $mem$$Address);
5844     __ andl(Rdst, $mask$$constant);
5845   %}
5846   ins_pipe(ialu_reg_mem);
5847 %}
5848 
5849 // Load Short (16 bit signed)
5850 instruct loadS(rRegI dst, memory mem)
5851 %{
5852   match(Set dst (LoadS mem));
5853 
5854   ins_cost(125);
5855   format %{ "movswl $dst, $mem\t# short" %}
5856 
5857   ins_encode %{
5858     __ movswl($dst$$Register, $mem$$Address);
5859   %}
5860 
5861   ins_pipe(ialu_reg_mem);
5862 %}
5863 
5864 // Load Short (16 bit signed) to Byte (8 bit signed)
5865 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5866   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5867 
5868   ins_cost(125);
5869   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5870   ins_encode %{
5871     __ movsbl($dst$$Register, $mem$$Address);
5872   %}
5873   ins_pipe(ialu_reg_mem);
5874 %}
5875 
5876 // Load Short (16 bit signed) into Long Register
5877 instruct loadS2L(rRegL dst, memory mem)
5878 %{
5879   match(Set dst (ConvI2L (LoadS mem)));
5880 
5881   ins_cost(125);
5882   format %{ "movswq $dst, $mem\t# short -> long" %}
5883 
5884   ins_encode %{
5885     __ movswq($dst$$Register, $mem$$Address);
5886   %}
5887 
5888   ins_pipe(ialu_reg_mem);
5889 %}
5890 
5891 // Load Unsigned Short/Char (16 bit UNsigned)
5892 instruct loadUS(rRegI dst, memory mem)
5893 %{
5894   match(Set dst (LoadUS mem));
5895 
5896   ins_cost(125);
5897   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5898 
5899   ins_encode %{
5900     __ movzwl($dst$$Register, $mem$$Address);
5901   %}
5902 
5903   ins_pipe(ialu_reg_mem);
5904 %}
5905 
5906 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5907 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5908   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5909 
5910   ins_cost(125);
5911   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5912   ins_encode %{
5913     __ movsbl($dst$$Register, $mem$$Address);
5914   %}
5915   ins_pipe(ialu_reg_mem);
5916 %}
5917 
5918 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5919 instruct loadUS2L(rRegL dst, memory mem)
5920 %{
5921   match(Set dst (ConvI2L (LoadUS mem)));
5922 
5923   ins_cost(125);
5924   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5925 
5926   ins_encode %{
5927     __ movzwq($dst$$Register, $mem$$Address);
5928   %}
5929 
5930   ins_pipe(ialu_reg_mem);
5931 %}
5932 
5933 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5934 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5935   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5936 
5937   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5938   ins_encode %{
5939     __ movzbq($dst$$Register, $mem$$Address);
5940   %}
5941   ins_pipe(ialu_reg_mem);
5942 %}
5943 
5944 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5945 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5946   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5947   effect(KILL cr);
5948 
5949   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5950             "andl    $dst, $mask" %}
5951   ins_encode %{
5952     Register Rdst = $dst$$Register;
5953     __ movzwq(Rdst, $mem$$Address);
5954     __ andl(Rdst, $mask$$constant);
5955   %}
5956   ins_pipe(ialu_reg_mem);
5957 %}
5958 
5959 // Load Integer
5960 instruct loadI(rRegI dst, memory mem)
5961 %{
5962   match(Set dst (LoadI mem));
5963 
5964   ins_cost(125);
5965   format %{ "movl    $dst, $mem\t# int" %}
5966 
5967   ins_encode %{
5968     __ movl($dst$$Register, $mem$$Address);
5969   %}
5970 
5971   ins_pipe(ialu_reg_mem);
5972 %}
5973 
5974 // Load Integer (32 bit signed) to Byte (8 bit signed)
5975 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5976   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5977 
5978   ins_cost(125);
5979   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5980   ins_encode %{
5981     __ movsbl($dst$$Register, $mem$$Address);
5982   %}
5983   ins_pipe(ialu_reg_mem);
5984 %}
5985 
5986 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5987 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5988   match(Set dst (AndI (LoadI mem) mask));
5989 
5990   ins_cost(125);
5991   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5992   ins_encode %{
5993     __ movzbl($dst$$Register, $mem$$Address);
5994   %}
5995   ins_pipe(ialu_reg_mem);
5996 %}
5997 
5998 // Load Integer (32 bit signed) to Short (16 bit signed)
5999 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6000   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6001 
6002   ins_cost(125);
6003   format %{ "movswl  $dst, $mem\t# int -> short" %}
6004   ins_encode %{
6005     __ movswl($dst$$Register, $mem$$Address);
6006   %}
6007   ins_pipe(ialu_reg_mem);
6008 %}
6009 
6010 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6011 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6012   match(Set dst (AndI (LoadI mem) mask));
6013 
6014   ins_cost(125);
6015   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6016   ins_encode %{
6017     __ movzwl($dst$$Register, $mem$$Address);
6018   %}
6019   ins_pipe(ialu_reg_mem);
6020 %}
6021 
6022 // Load Integer into Long Register
6023 instruct loadI2L(rRegL dst, memory mem)
6024 %{
6025   match(Set dst (ConvI2L (LoadI mem)));
6026 
6027   ins_cost(125);
6028   format %{ "movslq  $dst, $mem\t# int -> long" %}
6029 
6030   ins_encode %{
6031     __ movslq($dst$$Register, $mem$$Address);
6032   %}
6033 
6034   ins_pipe(ialu_reg_mem);
6035 %}
6036 
6037 // Load Integer with mask 0xFF into Long Register
6038 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6039   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6040 
6041   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6042   ins_encode %{
6043     __ movzbq($dst$$Register, $mem$$Address);
6044   %}
6045   ins_pipe(ialu_reg_mem);
6046 %}
6047 
6048 // Load Integer with mask 0xFFFF into Long Register
6049 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6050   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6051 
6052   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6053   ins_encode %{
6054     __ movzwq($dst$$Register, $mem$$Address);
6055   %}
6056   ins_pipe(ialu_reg_mem);
6057 %}
6058 
6059 // Load Integer with a 32-bit mask into Long Register
6060 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6061   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6062   effect(KILL cr);
6063 
6064   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6065             "andl    $dst, $mask" %}
6066   ins_encode %{
6067     Register Rdst = $dst$$Register;
6068     __ movl(Rdst, $mem$$Address);
6069     __ andl(Rdst, $mask$$constant);
6070   %}
6071   ins_pipe(ialu_reg_mem);
6072 %}
6073 
6074 // Load Unsigned Integer into Long Register
6075 instruct loadUI2L(rRegL dst, memory mem)
6076 %{
6077   match(Set dst (LoadUI2L mem));
6078 
6079   ins_cost(125);
6080   format %{ "movl    $dst, $mem\t# uint -> long" %}
6081 
6082   ins_encode %{
6083     __ movl($dst$$Register, $mem$$Address);
6084   %}
6085 
6086   ins_pipe(ialu_reg_mem);
6087 %}
6088 
6089 // Load Long
6090 instruct loadL(rRegL dst, memory mem)
6091 %{
6092   match(Set dst (LoadL mem));
6093 
6094   ins_cost(125);
6095   format %{ "movq    $dst, $mem\t# long" %}
6096 
6097   ins_encode %{
6098     __ movq($dst$$Register, $mem$$Address);
6099   %}
6100 
6101   ins_pipe(ialu_reg_mem); // XXX
6102 %}
6103 
6104 // Load Range
6105 instruct loadRange(rRegI dst, memory mem)
6106 %{
6107   match(Set dst (LoadRange mem));
6108 
6109   ins_cost(125); // XXX
6110   format %{ "movl    $dst, $mem\t# range" %}
6111   opcode(0x8B);
6112   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6113   ins_pipe(ialu_reg_mem);
6114 %}
6115 
6116 // Load Pointer
6117 instruct loadP(rRegP dst, memory mem)
6118 %{
6119   match(Set dst (LoadP mem));
6120 
6121   ins_cost(125); // XXX
6122   format %{ "movq    $dst, $mem\t# ptr" %}
6123   opcode(0x8B);
6124   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6125   ins_pipe(ialu_reg_mem); // XXX
6126 %}
6127 
6128 // Load Compressed Pointer
6129 instruct loadN(rRegN dst, memory mem)
6130 %{
6131    match(Set dst (LoadN mem));
6132 
6133    ins_cost(125); // XXX
6134    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6135    ins_encode %{
6136      __ movl($dst$$Register, $mem$$Address);
6137    %}
6138    ins_pipe(ialu_reg_mem); // XXX
6139 %}
6140 
6141 
6142 // Load Klass Pointer
6143 instruct loadKlass(rRegP dst, memory mem)
6144 %{
6145   match(Set dst (LoadKlass mem));
6146 
6147   ins_cost(125); // XXX
6148   format %{ "movq    $dst, $mem\t# class" %}
6149   opcode(0x8B);
6150   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6151   ins_pipe(ialu_reg_mem); // XXX
6152 %}
6153 
6154 // Load narrow Klass Pointer
6155 instruct loadNKlass(rRegN dst, memory mem)
6156 %{
6157   match(Set dst (LoadNKlass mem));
6158 
6159   ins_cost(125); // XXX
6160   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6161   ins_encode %{
6162     __ movl($dst$$Register, $mem$$Address);
6163   %}
6164   ins_pipe(ialu_reg_mem); // XXX
6165 %}
6166 
6167 // Load Float
6168 instruct loadF(regF dst, memory mem)
6169 %{
6170   match(Set dst (LoadF mem));
6171 
6172   ins_cost(145); // XXX
6173   format %{ "movss   $dst, $mem\t# float" %}
6174   opcode(0xF3, 0x0F, 0x10);
6175   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6176   ins_pipe(pipe_slow); // XXX
6177 %}
6178 
6179 // Load Double
6180 instruct loadD_partial(regD dst, memory mem)
6181 %{
6182   predicate(!UseXmmLoadAndClearUpper);
6183   match(Set dst (LoadD mem));
6184 
6185   ins_cost(145); // XXX
6186   format %{ "movlpd  $dst, $mem\t# double" %}
6187   opcode(0x66, 0x0F, 0x12);
6188   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6189   ins_pipe(pipe_slow); // XXX
6190 %}
6191 
6192 instruct loadD(regD dst, memory mem)
6193 %{
6194   predicate(UseXmmLoadAndClearUpper);
6195   match(Set dst (LoadD mem));
6196 
6197   ins_cost(145); // XXX
6198   format %{ "movsd   $dst, $mem\t# double" %}
6199   opcode(0xF2, 0x0F, 0x10);
6200   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6201   ins_pipe(pipe_slow); // XXX
6202 %}
6203 
6204 // Load Aligned Packed Byte to XMM register
6205 instruct loadA8B(regD dst, memory mem) %{
6206   match(Set dst (Load8B mem));
6207   ins_cost(125);
6208   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6209   ins_encode( movq_ld(dst, mem));
6210   ins_pipe( pipe_slow );
6211 %}
6212 
6213 // Load Aligned Packed Short to XMM register
6214 instruct loadA4S(regD dst, memory mem) %{
6215   match(Set dst (Load4S mem));
6216   ins_cost(125);
6217   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6218   ins_encode( movq_ld(dst, mem));
6219   ins_pipe( pipe_slow );
6220 %}
6221 
6222 // Load Aligned Packed Char to XMM register
6223 instruct loadA4C(regD dst, memory mem) %{
6224   match(Set dst (Load4C mem));
6225   ins_cost(125);
6226   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6227   ins_encode( movq_ld(dst, mem));
6228   ins_pipe( pipe_slow );
6229 %}
6230 
6231 // Load Aligned Packed Integer to XMM register
6232 instruct load2IU(regD dst, memory mem) %{
6233   match(Set dst (Load2I mem));
6234   ins_cost(125);
6235   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6236   ins_encode( movq_ld(dst, mem));
6237   ins_pipe( pipe_slow );
6238 %}
6239 
6240 // Load Aligned Packed Single to XMM
6241 instruct loadA2F(regD dst, memory mem) %{
6242   match(Set dst (Load2F mem));
6243   ins_cost(145);
6244   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6245   ins_encode( movq_ld(dst, mem));
6246   ins_pipe( pipe_slow );
6247 %}
6248 
6249 // Load Effective Address
6250 instruct leaP8(rRegP dst, indOffset8 mem)
6251 %{
6252   match(Set dst mem);
6253 
6254   ins_cost(110); // XXX
6255   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6256   opcode(0x8D);
6257   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6258   ins_pipe(ialu_reg_reg_fat);
6259 %}
6260 
6261 instruct leaP32(rRegP dst, indOffset32 mem)
6262 %{
6263   match(Set dst mem);
6264 
6265   ins_cost(110);
6266   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6267   opcode(0x8D);
6268   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6269   ins_pipe(ialu_reg_reg_fat);
6270 %}
6271 
6272 // instruct leaPIdx(rRegP dst, indIndex mem)
6273 // %{
6274 //   match(Set dst mem);
6275 
6276 //   ins_cost(110);
6277 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6278 //   opcode(0x8D);
6279 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6280 //   ins_pipe(ialu_reg_reg_fat);
6281 // %}
6282 
6283 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6284 %{
6285   match(Set dst mem);
6286 
6287   ins_cost(110);
6288   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6289   opcode(0x8D);
6290   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6291   ins_pipe(ialu_reg_reg_fat);
6292 %}
6293 
6294 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6295 %{
6296   match(Set dst mem);
6297 
6298   ins_cost(110);
6299   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6300   opcode(0x8D);
6301   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6302   ins_pipe(ialu_reg_reg_fat);
6303 %}
6304 
6305 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6306 %{
6307   match(Set dst mem);
6308 
6309   ins_cost(110);
6310   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6311   opcode(0x8D);
6312   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6313   ins_pipe(ialu_reg_reg_fat);
6314 %}
6315 
6316 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6317 %{
6318   match(Set dst mem);
6319 
6320   ins_cost(110);
6321   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6322   opcode(0x8D);
6323   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6324   ins_pipe(ialu_reg_reg_fat);
6325 %}
6326 
6327 // Load Effective Address which uses Narrow (32-bits) oop
6328 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6329 %{
6330   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6331   match(Set dst mem);
6332 
6333   ins_cost(110);
6334   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6335   opcode(0x8D);
6336   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6337   ins_pipe(ialu_reg_reg_fat);
6338 %}
6339 
6340 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6341 %{
6342   predicate(Universe::narrow_oop_shift() == 0);
6343   match(Set dst mem);
6344 
6345   ins_cost(110); // XXX
6346   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6347   opcode(0x8D);
6348   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6349   ins_pipe(ialu_reg_reg_fat);
6350 %}
6351 
6352 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6353 %{
6354   predicate(Universe::narrow_oop_shift() == 0);
6355   match(Set dst mem);
6356 
6357   ins_cost(110);
6358   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6359   opcode(0x8D);
6360   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6361   ins_pipe(ialu_reg_reg_fat);
6362 %}
6363 
6364 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6365 %{
6366   predicate(Universe::narrow_oop_shift() == 0);
6367   match(Set dst mem);
6368 
6369   ins_cost(110);
6370   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6371   opcode(0x8D);
6372   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6373   ins_pipe(ialu_reg_reg_fat);
6374 %}
6375 
6376 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6377 %{
6378   predicate(Universe::narrow_oop_shift() == 0);
6379   match(Set dst mem);
6380 
6381   ins_cost(110);
6382   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6383   opcode(0x8D);
6384   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6385   ins_pipe(ialu_reg_reg_fat);
6386 %}
6387 
6388 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6389 %{
6390   predicate(Universe::narrow_oop_shift() == 0);
6391   match(Set dst mem);
6392 
6393   ins_cost(110);
6394   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6395   opcode(0x8D);
6396   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6397   ins_pipe(ialu_reg_reg_fat);
6398 %}
6399 
6400 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6401 %{
6402   predicate(Universe::narrow_oop_shift() == 0);
6403   match(Set dst mem);
6404 
6405   ins_cost(110);
6406   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6407   opcode(0x8D);
6408   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6409   ins_pipe(ialu_reg_reg_fat);
6410 %}
6411 
6412 instruct loadConI(rRegI dst, immI src)
6413 %{
6414   match(Set dst src);
6415 
6416   format %{ "movl    $dst, $src\t# int" %}
6417   ins_encode(load_immI(dst, src));
6418   ins_pipe(ialu_reg_fat); // XXX
6419 %}
6420 
6421 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6422 %{
6423   match(Set dst src);
6424   effect(KILL cr);
6425 
6426   ins_cost(50);
6427   format %{ "xorl    $dst, $dst\t# int" %}
6428   opcode(0x33); /* + rd */
6429   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6430   ins_pipe(ialu_reg);
6431 %}
6432 
6433 instruct loadConL(rRegL dst, immL src)
6434 %{
6435   match(Set dst src);
6436 
6437   ins_cost(150);
6438   format %{ "movq    $dst, $src\t# long" %}
6439   ins_encode(load_immL(dst, src));
6440   ins_pipe(ialu_reg);
6441 %}
6442 
6443 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6444 %{
6445   match(Set dst src);
6446   effect(KILL cr);
6447 
6448   ins_cost(50);
6449   format %{ "xorl    $dst, $dst\t# long" %}
6450   opcode(0x33); /* + rd */
6451   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6452   ins_pipe(ialu_reg); // XXX
6453 %}
6454 
6455 instruct loadConUL32(rRegL dst, immUL32 src)
6456 %{
6457   match(Set dst src);
6458 
6459   ins_cost(60);
6460   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6461   ins_encode(load_immUL32(dst, src));
6462   ins_pipe(ialu_reg);
6463 %}
6464 
6465 instruct loadConL32(rRegL dst, immL32 src)
6466 %{
6467   match(Set dst src);
6468 
6469   ins_cost(70);
6470   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6471   ins_encode(load_immL32(dst, src));
6472   ins_pipe(ialu_reg);
6473 %}
6474 
6475 instruct loadConP(rRegP dst, immP con) %{
6476   match(Set dst con);
6477 
6478   format %{ "movq    $dst, $con\t# ptr" %}
6479   ins_encode(load_immP(dst, con));
6480   ins_pipe(ialu_reg_fat); // XXX
6481 %}
6482 
6483 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6484 %{
6485   match(Set dst src);
6486   effect(KILL cr);
6487 
6488   ins_cost(50);
6489   format %{ "xorl    $dst, $dst\t# ptr" %}
6490   opcode(0x33); /* + rd */
6491   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6492   ins_pipe(ialu_reg);
6493 %}
6494 
6495 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6496   match(Set dst src);
6497   format %{ "movq    $dst, $src\t!ptr" %}
6498   ins_encode %{
6499     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6500     __ lea($dst$$Register, polling_page);
6501   %}
6502   ins_pipe(ialu_reg_fat);
6503 %}
6504 
6505 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6506 %{
6507   match(Set dst src);
6508   effect(KILL cr);
6509 
6510   ins_cost(60);
6511   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6512   ins_encode(load_immP31(dst, src));
6513   ins_pipe(ialu_reg);
6514 %}
6515 
6516 instruct loadConF(regF dst, immF con) %{
6517   match(Set dst con);
6518   ins_cost(125);
6519   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6520   ins_encode %{
6521     __ movflt($dst$$XMMRegister, $constantaddress($con));
6522   %}
6523   ins_pipe(pipe_slow);
6524 %}
6525 
6526 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6527   match(Set dst src);
6528   effect(KILL cr);
6529   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6530   ins_encode %{
6531     __ xorq($dst$$Register, $dst$$Register);
6532   %}
6533   ins_pipe(ialu_reg);
6534 %}
6535 
6536 instruct loadConN(rRegN dst, immN src) %{
6537   match(Set dst src);
6538 
6539   ins_cost(125);
6540   format %{ "movl    $dst, $src\t# compressed ptr" %}
6541   ins_encode %{
6542     address con = (address)$src$$constant;
6543     if (con == NULL) {
6544       ShouldNotReachHere();
6545     } else {
6546       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6547     }
6548   %}
6549   ins_pipe(ialu_reg_fat); // XXX
6550 %}
6551 
6552 instruct loadConF0(regF dst, immF0 src)
6553 %{
6554   match(Set dst src);
6555   ins_cost(100);
6556 
6557   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6558   opcode(0x0F, 0x57);
6559   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6560   ins_pipe(pipe_slow);
6561 %}
6562 
6563 // Use the same format since predicate() can not be used here.
6564 instruct loadConD(regD dst, immD con) %{
6565   match(Set dst con);
6566   ins_cost(125);
6567   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6568   ins_encode %{
6569     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6570   %}
6571   ins_pipe(pipe_slow);
6572 %}
6573 
6574 instruct loadConD0(regD dst, immD0 src)
6575 %{
6576   match(Set dst src);
6577   ins_cost(100);
6578 
6579   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6580   opcode(0x66, 0x0F, 0x57);
6581   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6582   ins_pipe(pipe_slow);
6583 %}
6584 
6585 instruct loadSSI(rRegI dst, stackSlotI src)
6586 %{
6587   match(Set dst src);
6588 
6589   ins_cost(125);
6590   format %{ "movl    $dst, $src\t# int stk" %}
6591   opcode(0x8B);
6592   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6593   ins_pipe(ialu_reg_mem);
6594 %}
6595 
6596 instruct loadSSL(rRegL dst, stackSlotL src)
6597 %{
6598   match(Set dst src);
6599 
6600   ins_cost(125);
6601   format %{ "movq    $dst, $src\t# long stk" %}
6602   opcode(0x8B);
6603   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6604   ins_pipe(ialu_reg_mem);
6605 %}
6606 
6607 instruct loadSSP(rRegP dst, stackSlotP src)
6608 %{
6609   match(Set dst src);
6610 
6611   ins_cost(125);
6612   format %{ "movq    $dst, $src\t# ptr stk" %}
6613   opcode(0x8B);
6614   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6615   ins_pipe(ialu_reg_mem);
6616 %}
6617 
6618 instruct loadSSF(regF dst, stackSlotF src)
6619 %{
6620   match(Set dst src);
6621 
6622   ins_cost(125);
6623   format %{ "movss   $dst, $src\t# float stk" %}
6624   opcode(0xF3, 0x0F, 0x10);
6625   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6626   ins_pipe(pipe_slow); // XXX
6627 %}
6628 
6629 // Use the same format since predicate() can not be used here.
6630 instruct loadSSD(regD dst, stackSlotD src)
6631 %{
6632   match(Set dst src);
6633 
6634   ins_cost(125);
6635   format %{ "movsd   $dst, $src\t# double stk" %}
6636   ins_encode  %{
6637     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6638   %}
6639   ins_pipe(pipe_slow); // XXX
6640 %}
6641 
6642 // Prefetch instructions.
6643 // Must be safe to execute with invalid address (cannot fault).
6644 
6645 instruct prefetchr( memory mem ) %{
6646   predicate(ReadPrefetchInstr==3);
6647   match(PrefetchRead mem);
6648   ins_cost(125);
6649 
6650   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6651   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6652   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6653   ins_pipe(ialu_mem);
6654 %}
6655 
6656 instruct prefetchrNTA( memory mem ) %{
6657   predicate(ReadPrefetchInstr==0);
6658   match(PrefetchRead mem);
6659   ins_cost(125);
6660 
6661   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6662   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6663   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6664   ins_pipe(ialu_mem);
6665 %}
6666 
6667 instruct prefetchrT0( memory mem ) %{
6668   predicate(ReadPrefetchInstr==1);
6669   match(PrefetchRead mem);
6670   ins_cost(125);
6671 
6672   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6673   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6674   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6675   ins_pipe(ialu_mem);
6676 %}
6677 
6678 instruct prefetchrT2( memory mem ) %{
6679   predicate(ReadPrefetchInstr==2);
6680   match(PrefetchRead mem);
6681   ins_cost(125);
6682 
6683   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6684   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6685   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6686   ins_pipe(ialu_mem);
6687 %}
6688 
6689 instruct prefetchw( memory mem ) %{
6690   predicate(AllocatePrefetchInstr==3);
6691   match(PrefetchWrite mem);
6692   ins_cost(125);
6693 
6694   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6695   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6696   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6697   ins_pipe(ialu_mem);
6698 %}
6699 
6700 instruct prefetchwNTA( memory mem ) %{
6701   predicate(AllocatePrefetchInstr==0);
6702   match(PrefetchWrite mem);
6703   ins_cost(125);
6704 
6705   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6706   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6707   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6708   ins_pipe(ialu_mem);
6709 %}
6710 
6711 instruct prefetchwT0( memory mem ) %{
6712   predicate(AllocatePrefetchInstr==1);
6713   match(PrefetchWrite mem);
6714   ins_cost(125);
6715 
6716   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6717   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6718   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6719   ins_pipe(ialu_mem);
6720 %}
6721 
6722 instruct prefetchwT2( memory mem ) %{
6723   predicate(AllocatePrefetchInstr==2);
6724   match(PrefetchWrite mem);
6725   ins_cost(125);
6726 
6727   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6728   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6729   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6730   ins_pipe(ialu_mem);
6731 %}
6732 
6733 //----------Store Instructions-------------------------------------------------
6734 
6735 // Store Byte
6736 instruct storeB(memory mem, rRegI src)
6737 %{
6738   match(Set mem (StoreB mem src));
6739 
6740   ins_cost(125); // XXX
6741   format %{ "movb    $mem, $src\t# byte" %}
6742   opcode(0x88);
6743   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6744   ins_pipe(ialu_mem_reg);
6745 %}
6746 
6747 // Store Char/Short
6748 instruct storeC(memory mem, rRegI src)
6749 %{
6750   match(Set mem (StoreC mem src));
6751 
6752   ins_cost(125); // XXX
6753   format %{ "movw    $mem, $src\t# char/short" %}
6754   opcode(0x89);
6755   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6756   ins_pipe(ialu_mem_reg);
6757 %}
6758 
6759 // Store Integer
6760 instruct storeI(memory mem, rRegI src)
6761 %{
6762   match(Set mem (StoreI mem src));
6763 
6764   ins_cost(125); // XXX
6765   format %{ "movl    $mem, $src\t# int" %}
6766   opcode(0x89);
6767   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6768   ins_pipe(ialu_mem_reg);
6769 %}
6770 
6771 // Store Long
6772 instruct storeL(memory mem, rRegL src)
6773 %{
6774   match(Set mem (StoreL mem src));
6775 
6776   ins_cost(125); // XXX
6777   format %{ "movq    $mem, $src\t# long" %}
6778   opcode(0x89);
6779   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6780   ins_pipe(ialu_mem_reg); // XXX
6781 %}
6782 
6783 // Store Pointer
6784 instruct storeP(memory mem, any_RegP src)
6785 %{
6786   match(Set mem (StoreP mem src));
6787 
6788   ins_cost(125); // XXX
6789   format %{ "movq    $mem, $src\t# ptr" %}
6790   opcode(0x89);
6791   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6792   ins_pipe(ialu_mem_reg);
6793 %}
6794 
6795 instruct storeImmP0(memory mem, immP0 zero)
6796 %{
6797   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6798   match(Set mem (StoreP mem zero));
6799 
6800   ins_cost(125); // XXX
6801   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6802   ins_encode %{
6803     __ movq($mem$$Address, r12);
6804   %}
6805   ins_pipe(ialu_mem_reg);
6806 %}
6807 
6808 // Store NULL Pointer, mark word, or other simple pointer constant.
6809 instruct storeImmP(memory mem, immP31 src)
6810 %{
6811   match(Set mem (StoreP mem src));
6812 
6813   ins_cost(150); // XXX
6814   format %{ "movq    $mem, $src\t# ptr" %}
6815   opcode(0xC7); /* C7 /0 */
6816   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6817   ins_pipe(ialu_mem_imm);
6818 %}
6819 
6820 // Store Compressed Pointer
6821 instruct storeN(memory mem, rRegN src)
6822 %{
6823   match(Set mem (StoreN mem src));
6824 
6825   ins_cost(125); // XXX
6826   format %{ "movl    $mem, $src\t# compressed ptr" %}
6827   ins_encode %{
6828     __ movl($mem$$Address, $src$$Register);
6829   %}
6830   ins_pipe(ialu_mem_reg);
6831 %}
6832 
6833 instruct storeImmN0(memory mem, immN0 zero)
6834 %{
6835   predicate(Universe::narrow_oop_base() == NULL);
6836   match(Set mem (StoreN mem zero));
6837 
6838   ins_cost(125); // XXX
6839   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6840   ins_encode %{
6841     __ movl($mem$$Address, r12);
6842   %}
6843   ins_pipe(ialu_mem_reg);
6844 %}
6845 
6846 instruct storeImmN(memory mem, immN src)
6847 %{
6848   match(Set mem (StoreN mem src));
6849 
6850   ins_cost(150); // XXX
6851   format %{ "movl    $mem, $src\t# compressed ptr" %}
6852   ins_encode %{
6853     address con = (address)$src$$constant;
6854     if (con == NULL) {
6855       __ movl($mem$$Address, (int32_t)0);
6856     } else {
6857       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6858     }
6859   %}
6860   ins_pipe(ialu_mem_imm);
6861 %}
6862 
6863 // Store Integer Immediate
6864 instruct storeImmI0(memory mem, immI0 zero)
6865 %{
6866   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6867   match(Set mem (StoreI mem zero));
6868 
6869   ins_cost(125); // XXX
6870   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6871   ins_encode %{
6872     __ movl($mem$$Address, r12);
6873   %}
6874   ins_pipe(ialu_mem_reg);
6875 %}
6876 
6877 instruct storeImmI(memory mem, immI src)
6878 %{
6879   match(Set mem (StoreI mem src));
6880 
6881   ins_cost(150);
6882   format %{ "movl    $mem, $src\t# int" %}
6883   opcode(0xC7); /* C7 /0 */
6884   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6885   ins_pipe(ialu_mem_imm);
6886 %}
6887 
6888 // Store Long Immediate
6889 instruct storeImmL0(memory mem, immL0 zero)
6890 %{
6891   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6892   match(Set mem (StoreL mem zero));
6893 
6894   ins_cost(125); // XXX
6895   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6896   ins_encode %{
6897     __ movq($mem$$Address, r12);
6898   %}
6899   ins_pipe(ialu_mem_reg);
6900 %}
6901 
6902 instruct storeImmL(memory mem, immL32 src)
6903 %{
6904   match(Set mem (StoreL mem src));
6905 
6906   ins_cost(150);
6907   format %{ "movq    $mem, $src\t# long" %}
6908   opcode(0xC7); /* C7 /0 */
6909   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6910   ins_pipe(ialu_mem_imm);
6911 %}
6912 
6913 // Store Short/Char Immediate
6914 instruct storeImmC0(memory mem, immI0 zero)
6915 %{
6916   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6917   match(Set mem (StoreC mem zero));
6918 
6919   ins_cost(125); // XXX
6920   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6921   ins_encode %{
6922     __ movw($mem$$Address, r12);
6923   %}
6924   ins_pipe(ialu_mem_reg);
6925 %}
6926 
6927 instruct storeImmI16(memory mem, immI16 src)
6928 %{
6929   predicate(UseStoreImmI16);
6930   match(Set mem (StoreC mem src));
6931 
6932   ins_cost(150);
6933   format %{ "movw    $mem, $src\t# short/char" %}
6934   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6935   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6936   ins_pipe(ialu_mem_imm);
6937 %}
6938 
6939 // Store Byte Immediate
6940 instruct storeImmB0(memory mem, immI0 zero)
6941 %{
6942   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6943   match(Set mem (StoreB mem zero));
6944 
6945   ins_cost(125); // XXX
6946   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6947   ins_encode %{
6948     __ movb($mem$$Address, r12);
6949   %}
6950   ins_pipe(ialu_mem_reg);
6951 %}
6952 
6953 instruct storeImmB(memory mem, immI8 src)
6954 %{
6955   match(Set mem (StoreB mem src));
6956 
6957   ins_cost(150); // XXX
6958   format %{ "movb    $mem, $src\t# byte" %}
6959   opcode(0xC6); /* C6 /0 */
6960   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6961   ins_pipe(ialu_mem_imm);
6962 %}
6963 
6964 // Store Aligned Packed Byte XMM register to memory
6965 instruct storeA8B(memory mem, regD src) %{
6966   match(Set mem (Store8B mem src));
6967   ins_cost(145);
6968   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6969   ins_encode( movq_st(mem, src));
6970   ins_pipe( pipe_slow );
6971 %}
6972 
6973 // Store Aligned Packed Char/Short XMM register to memory
6974 instruct storeA4C(memory mem, regD src) %{
6975   match(Set mem (Store4C mem src));
6976   ins_cost(145);
6977   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6978   ins_encode( movq_st(mem, src));
6979   ins_pipe( pipe_slow );
6980 %}
6981 
6982 // Store Aligned Packed Integer XMM register to memory
6983 instruct storeA2I(memory mem, regD src) %{
6984   match(Set mem (Store2I mem src));
6985   ins_cost(145);
6986   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6987   ins_encode( movq_st(mem, src));
6988   ins_pipe( pipe_slow );
6989 %}
6990 
6991 // Store CMS card-mark Immediate
6992 instruct storeImmCM0_reg(memory mem, immI0 zero)
6993 %{
6994   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6995   match(Set mem (StoreCM mem zero));
6996 
6997   ins_cost(125); // XXX
6998   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6999   ins_encode %{
7000     __ movb($mem$$Address, r12);
7001   %}
7002   ins_pipe(ialu_mem_reg);
7003 %}
7004 
7005 instruct storeImmCM0(memory mem, immI0 src)
7006 %{
7007   match(Set mem (StoreCM mem src));
7008 
7009   ins_cost(150); // XXX
7010   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7011   opcode(0xC6); /* C6 /0 */
7012   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7013   ins_pipe(ialu_mem_imm);
7014 %}
7015 
7016 // Store Aligned Packed Single Float XMM register to memory
7017 instruct storeA2F(memory mem, regD src) %{
7018   match(Set mem (Store2F mem src));
7019   ins_cost(145);
7020   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7021   ins_encode( movq_st(mem, src));
7022   ins_pipe( pipe_slow );
7023 %}
7024 
7025 // Store Float
7026 instruct storeF(memory mem, regF src)
7027 %{
7028   match(Set mem (StoreF mem src));
7029 
7030   ins_cost(95); // XXX
7031   format %{ "movss   $mem, $src\t# float" %}
7032   opcode(0xF3, 0x0F, 0x11);
7033   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7034   ins_pipe(pipe_slow); // XXX
7035 %}
7036 
7037 // Store immediate Float value (it is faster than store from XMM register)
7038 instruct storeF0(memory mem, immF0 zero)
7039 %{
7040   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7041   match(Set mem (StoreF mem zero));
7042 
7043   ins_cost(25); // XXX
7044   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7045   ins_encode %{
7046     __ movl($mem$$Address, r12);
7047   %}
7048   ins_pipe(ialu_mem_reg);
7049 %}
7050 
7051 instruct storeF_imm(memory mem, immF src)
7052 %{
7053   match(Set mem (StoreF mem src));
7054 
7055   ins_cost(50);
7056   format %{ "movl    $mem, $src\t# float" %}
7057   opcode(0xC7); /* C7 /0 */
7058   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7059   ins_pipe(ialu_mem_imm);
7060 %}
7061 
7062 // Store Double
7063 instruct storeD(memory mem, regD src)
7064 %{
7065   match(Set mem (StoreD mem src));
7066 
7067   ins_cost(95); // XXX
7068   format %{ "movsd   $mem, $src\t# double" %}
7069   opcode(0xF2, 0x0F, 0x11);
7070   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7071   ins_pipe(pipe_slow); // XXX
7072 %}
7073 
7074 // Store immediate double 0.0 (it is faster than store from XMM register)
7075 instruct storeD0_imm(memory mem, immD0 src)
7076 %{
7077   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7078   match(Set mem (StoreD mem src));
7079 
7080   ins_cost(50);
7081   format %{ "movq    $mem, $src\t# double 0." %}
7082   opcode(0xC7); /* C7 /0 */
7083   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7084   ins_pipe(ialu_mem_imm);
7085 %}
7086 
7087 instruct storeD0(memory mem, immD0 zero)
7088 %{
7089   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7090   match(Set mem (StoreD mem zero));
7091 
7092   ins_cost(25); // XXX
7093   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7094   ins_encode %{
7095     __ movq($mem$$Address, r12);
7096   %}
7097   ins_pipe(ialu_mem_reg);
7098 %}
7099 
7100 instruct storeSSI(stackSlotI dst, rRegI src)
7101 %{
7102   match(Set dst src);
7103 
7104   ins_cost(100);
7105   format %{ "movl    $dst, $src\t# int stk" %}
7106   opcode(0x89);
7107   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7108   ins_pipe( ialu_mem_reg );
7109 %}
7110 
7111 instruct storeSSL(stackSlotL dst, rRegL src)
7112 %{
7113   match(Set dst src);
7114 
7115   ins_cost(100);
7116   format %{ "movq    $dst, $src\t# long stk" %}
7117   opcode(0x89);
7118   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7119   ins_pipe(ialu_mem_reg);
7120 %}
7121 
7122 instruct storeSSP(stackSlotP dst, rRegP src)
7123 %{
7124   match(Set dst src);
7125 
7126   ins_cost(100);
7127   format %{ "movq    $dst, $src\t# ptr stk" %}
7128   opcode(0x89);
7129   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7130   ins_pipe(ialu_mem_reg);
7131 %}
7132 
7133 instruct storeSSF(stackSlotF dst, regF src)
7134 %{
7135   match(Set dst src);
7136 
7137   ins_cost(95); // XXX
7138   format %{ "movss   $dst, $src\t# float stk" %}
7139   opcode(0xF3, 0x0F, 0x11);
7140   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7141   ins_pipe(pipe_slow); // XXX
7142 %}
7143 
7144 instruct storeSSD(stackSlotD dst, regD src)
7145 %{
7146   match(Set dst src);
7147 
7148   ins_cost(95); // XXX
7149   format %{ "movsd   $dst, $src\t# double stk" %}
7150   opcode(0xF2, 0x0F, 0x11);
7151   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7152   ins_pipe(pipe_slow); // XXX
7153 %}
7154 
7155 //----------BSWAP Instructions-------------------------------------------------
7156 instruct bytes_reverse_int(rRegI dst) %{
7157   match(Set dst (ReverseBytesI dst));
7158 
7159   format %{ "bswapl  $dst" %}
7160   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7161   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7162   ins_pipe( ialu_reg );
7163 %}
7164 
7165 instruct bytes_reverse_long(rRegL dst) %{
7166   match(Set dst (ReverseBytesL dst));
7167 
7168   format %{ "bswapq  $dst" %}
7169 
7170   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7171   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7172   ins_pipe( ialu_reg);
7173 %}
7174 
7175 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7176   match(Set dst (ReverseBytesUS dst));
7177 
7178   format %{ "bswapl  $dst\n\t"
7179             "shrl    $dst,16\n\t" %}
7180   ins_encode %{
7181     __ bswapl($dst$$Register);
7182     __ shrl($dst$$Register, 16);
7183   %}
7184   ins_pipe( ialu_reg );
7185 %}
7186 
7187 instruct bytes_reverse_short(rRegI dst) %{
7188   match(Set dst (ReverseBytesS dst));
7189 
7190   format %{ "bswapl  $dst\n\t"
7191             "sar     $dst,16\n\t" %}
7192   ins_encode %{
7193     __ bswapl($dst$$Register);
7194     __ sarl($dst$$Register, 16);
7195   %}
7196   ins_pipe( ialu_reg );
7197 %}
7198 
7199 //---------- Zeros Count Instructions ------------------------------------------
7200 
7201 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7202   predicate(UseCountLeadingZerosInstruction);
7203   match(Set dst (CountLeadingZerosI src));
7204   effect(KILL cr);
7205 
7206   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7207   ins_encode %{
7208     __ lzcntl($dst$$Register, $src$$Register);
7209   %}
7210   ins_pipe(ialu_reg);
7211 %}
7212 
7213 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7214   predicate(!UseCountLeadingZerosInstruction);
7215   match(Set dst (CountLeadingZerosI src));
7216   effect(KILL cr);
7217 
7218   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7219             "jnz     skip\n\t"
7220             "movl    $dst, -1\n"
7221       "skip:\n\t"
7222             "negl    $dst\n\t"
7223             "addl    $dst, 31" %}
7224   ins_encode %{
7225     Register Rdst = $dst$$Register;
7226     Register Rsrc = $src$$Register;
7227     Label skip;
7228     __ bsrl(Rdst, Rsrc);
7229     __ jccb(Assembler::notZero, skip);
7230     __ movl(Rdst, -1);
7231     __ bind(skip);
7232     __ negl(Rdst);
7233     __ addl(Rdst, BitsPerInt - 1);
7234   %}
7235   ins_pipe(ialu_reg);
7236 %}
7237 
7238 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7239   predicate(UseCountLeadingZerosInstruction);
7240   match(Set dst (CountLeadingZerosL src));
7241   effect(KILL cr);
7242 
7243   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7244   ins_encode %{
7245     __ lzcntq($dst$$Register, $src$$Register);
7246   %}
7247   ins_pipe(ialu_reg);
7248 %}
7249 
7250 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7251   predicate(!UseCountLeadingZerosInstruction);
7252   match(Set dst (CountLeadingZerosL src));
7253   effect(KILL cr);
7254 
7255   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7256             "jnz     skip\n\t"
7257             "movl    $dst, -1\n"
7258       "skip:\n\t"
7259             "negl    $dst\n\t"
7260             "addl    $dst, 63" %}
7261   ins_encode %{
7262     Register Rdst = $dst$$Register;
7263     Register Rsrc = $src$$Register;
7264     Label skip;
7265     __ bsrq(Rdst, Rsrc);
7266     __ jccb(Assembler::notZero, skip);
7267     __ movl(Rdst, -1);
7268     __ bind(skip);
7269     __ negl(Rdst);
7270     __ addl(Rdst, BitsPerLong - 1);
7271   %}
7272   ins_pipe(ialu_reg);
7273 %}
7274 
7275 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7276   match(Set dst (CountTrailingZerosI src));
7277   effect(KILL cr);
7278 
7279   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7280             "jnz     done\n\t"
7281             "movl    $dst, 32\n"
7282       "done:" %}
7283   ins_encode %{
7284     Register Rdst = $dst$$Register;
7285     Label done;
7286     __ bsfl(Rdst, $src$$Register);
7287     __ jccb(Assembler::notZero, done);
7288     __ movl(Rdst, BitsPerInt);
7289     __ bind(done);
7290   %}
7291   ins_pipe(ialu_reg);
7292 %}
7293 
7294 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7295   match(Set dst (CountTrailingZerosL src));
7296   effect(KILL cr);
7297 
7298   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7299             "jnz     done\n\t"
7300             "movl    $dst, 64\n"
7301       "done:" %}
7302   ins_encode %{
7303     Register Rdst = $dst$$Register;
7304     Label done;
7305     __ bsfq(Rdst, $src$$Register);
7306     __ jccb(Assembler::notZero, done);
7307     __ movl(Rdst, BitsPerLong);
7308     __ bind(done);
7309   %}
7310   ins_pipe(ialu_reg);
7311 %}
7312 
7313 
7314 //---------- Population Count Instructions -------------------------------------
7315 
7316 instruct popCountI(rRegI dst, rRegI src) %{
7317   predicate(UsePopCountInstruction);
7318   match(Set dst (PopCountI src));
7319 
7320   format %{ "popcnt  $dst, $src" %}
7321   ins_encode %{
7322     __ popcntl($dst$$Register, $src$$Register);
7323   %}
7324   ins_pipe(ialu_reg);
7325 %}
7326 
7327 instruct popCountI_mem(rRegI dst, memory mem) %{
7328   predicate(UsePopCountInstruction);
7329   match(Set dst (PopCountI (LoadI mem)));
7330 
7331   format %{ "popcnt  $dst, $mem" %}
7332   ins_encode %{
7333     __ popcntl($dst$$Register, $mem$$Address);
7334   %}
7335   ins_pipe(ialu_reg);
7336 %}
7337 
7338 // Note: Long.bitCount(long) returns an int.
7339 instruct popCountL(rRegI dst, rRegL src) %{
7340   predicate(UsePopCountInstruction);
7341   match(Set dst (PopCountL src));
7342 
7343   format %{ "popcnt  $dst, $src" %}
7344   ins_encode %{
7345     __ popcntq($dst$$Register, $src$$Register);
7346   %}
7347   ins_pipe(ialu_reg);
7348 %}
7349 
7350 // Note: Long.bitCount(long) returns an int.
7351 instruct popCountL_mem(rRegI dst, memory mem) %{
7352   predicate(UsePopCountInstruction);
7353   match(Set dst (PopCountL (LoadL mem)));
7354 
7355   format %{ "popcnt  $dst, $mem" %}
7356   ins_encode %{
7357     __ popcntq($dst$$Register, $mem$$Address);
7358   %}
7359   ins_pipe(ialu_reg);
7360 %}
7361 
7362 
7363 //----------MemBar Instructions-----------------------------------------------
7364 // Memory barrier flavors
7365 
7366 instruct membar_acquire()
7367 %{
7368   match(MemBarAcquire);
7369   ins_cost(0);
7370 
7371   size(0);
7372   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7373   ins_encode();
7374   ins_pipe(empty);
7375 %}
7376 
7377 instruct membar_acquire_lock()
7378 %{
7379   match(MemBarAcquire);
7380   predicate(Matcher::prior_fast_lock(n));
7381   ins_cost(0);
7382 
7383   size(0);
7384   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7385   ins_encode();
7386   ins_pipe(empty);
7387 %}
7388 
7389 instruct membar_release()
7390 %{
7391   match(MemBarRelease);
7392   ins_cost(0);
7393 
7394   size(0);
7395   format %{ "MEMBAR-release ! (empty encoding)" %}
7396   ins_encode();
7397   ins_pipe(empty);
7398 %}
7399 
7400 instruct membar_release_lock()
7401 %{
7402   match(MemBarRelease);
7403   predicate(Matcher::post_fast_unlock(n));
7404   ins_cost(0);
7405 
7406   size(0);
7407   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7408   ins_encode();
7409   ins_pipe(empty);
7410 %}
7411 
7412 instruct membar_volatile(rFlagsReg cr) %{
7413   match(MemBarVolatile);
7414   effect(KILL cr);
7415   ins_cost(400);
7416 
7417   format %{
7418     $$template
7419     if (os::is_MP()) {
7420       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7421     } else {
7422       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7423     }
7424   %}
7425   ins_encode %{
7426     __ membar(Assembler::StoreLoad);
7427   %}
7428   ins_pipe(pipe_slow);
7429 %}
7430 
7431 instruct unnecessary_membar_volatile()
7432 %{
7433   match(MemBarVolatile);
7434   predicate(Matcher::post_store_load_barrier(n));
7435   ins_cost(0);
7436 
7437   size(0);
7438   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7439   ins_encode();
7440   ins_pipe(empty);
7441 %}
7442 
7443 //----------Move Instructions--------------------------------------------------
7444 
7445 instruct castX2P(rRegP dst, rRegL src)
7446 %{
7447   match(Set dst (CastX2P src));
7448 
7449   format %{ "movq    $dst, $src\t# long->ptr" %}
7450   ins_encode(enc_copy_wide(dst, src));
7451   ins_pipe(ialu_reg_reg); // XXX
7452 %}
7453 
7454 instruct castP2X(rRegL dst, rRegP src)
7455 %{
7456   match(Set dst (CastP2X src));
7457 
7458   format %{ "movq    $dst, $src\t# ptr -> long" %}
7459   ins_encode(enc_copy_wide(dst, src));
7460   ins_pipe(ialu_reg_reg); // XXX
7461 %}
7462 
7463 
7464 // Convert oop pointer into compressed form
7465 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7466   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7467   match(Set dst (EncodeP src));
7468   effect(KILL cr);
7469   format %{ "encode_heap_oop $dst,$src" %}
7470   ins_encode %{
7471     Register s = $src$$Register;
7472     Register d = $dst$$Register;
7473     if (s != d) {
7474       __ movq(d, s);
7475     }
7476     __ encode_heap_oop(d);
7477   %}
7478   ins_pipe(ialu_reg_long);
7479 %}
7480 
7481 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7482   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7483   match(Set dst (EncodeP src));
7484   effect(KILL cr);
7485   format %{ "encode_heap_oop_not_null $dst,$src" %}
7486   ins_encode %{
7487     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7488   %}
7489   ins_pipe(ialu_reg_long);
7490 %}
7491 
7492 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7493   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7494             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7495   match(Set dst (DecodeN src));
7496   effect(KILL cr);
7497   format %{ "decode_heap_oop $dst,$src" %}
7498   ins_encode %{
7499     Register s = $src$$Register;
7500     Register d = $dst$$Register;
7501     if (s != d) {
7502       __ movq(d, s);
7503     }
7504     __ decode_heap_oop(d);
7505   %}
7506   ins_pipe(ialu_reg_long);
7507 %}
7508 
7509 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7510   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7511             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7512   match(Set dst (DecodeN src));
7513   effect(KILL cr);
7514   format %{ "decode_heap_oop_not_null $dst,$src" %}
7515   ins_encode %{
7516     Register s = $src$$Register;
7517     Register d = $dst$$Register;
7518     if (s != d) {
7519       __ decode_heap_oop_not_null(d, s);
7520     } else {
7521       __ decode_heap_oop_not_null(d);
7522     }
7523   %}
7524   ins_pipe(ialu_reg_long);
7525 %}
7526 
7527 
7528 //----------Conditional Move---------------------------------------------------
7529 // Jump
7530 // dummy instruction for generating temp registers
7531 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7532   match(Jump (LShiftL switch_val shift));
7533   ins_cost(350);
7534   predicate(false);
7535   effect(TEMP dest);
7536 
7537   format %{ "leaq    $dest, [$constantaddress]\n\t"
7538             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7539   ins_encode %{
7540     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7541     // to do that and the compiler is using that register as one it can allocate.
7542     // So we build it all by hand.
7543     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7544     // ArrayAddress dispatch(table, index);
7545     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7546     __ lea($dest$$Register, $constantaddress);
7547     __ jmp(dispatch);
7548   %}
7549   ins_pipe(pipe_jmp);
7550   ins_pc_relative(1);
7551 %}
7552 
7553 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7554   match(Jump (AddL (LShiftL switch_val shift) offset));
7555   ins_cost(350);
7556   effect(TEMP dest);
7557 
7558   format %{ "leaq    $dest, [$constantaddress]\n\t"
7559             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7560   ins_encode %{
7561     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7562     // to do that and the compiler is using that register as one it can allocate.
7563     // So we build it all by hand.
7564     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7565     // ArrayAddress dispatch(table, index);
7566     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7567     __ lea($dest$$Register, $constantaddress);
7568     __ jmp(dispatch);
7569   %}
7570   ins_pipe(pipe_jmp);
7571   ins_pc_relative(1);
7572 %}
7573 
7574 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7575   match(Jump switch_val);
7576   ins_cost(350);
7577   effect(TEMP dest);
7578 
7579   format %{ "leaq    $dest, [$constantaddress]\n\t"
7580             "jmp     [$dest + $switch_val]\n\t" %}
7581   ins_encode %{
7582     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7583     // to do that and the compiler is using that register as one it can allocate.
7584     // So we build it all by hand.
7585     // Address index(noreg, switch_reg, Address::times_1);
7586     // ArrayAddress dispatch(table, index);
7587     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7588     __ lea($dest$$Register, $constantaddress);
7589     __ jmp(dispatch);
7590   %}
7591   ins_pipe(pipe_jmp);
7592   ins_pc_relative(1);
7593 %}
7594 
7595 // Conditional move
7596 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7597 %{
7598   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7599 
7600   ins_cost(200); // XXX
7601   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7602   opcode(0x0F, 0x40);
7603   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7604   ins_pipe(pipe_cmov_reg);
7605 %}
7606 
7607 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7608   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7609 
7610   ins_cost(200); // XXX
7611   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7612   opcode(0x0F, 0x40);
7613   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7614   ins_pipe(pipe_cmov_reg);
7615 %}
7616 
7617 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7618   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7619   ins_cost(200);
7620   expand %{
7621     cmovI_regU(cop, cr, dst, src);
7622   %}
7623 %}
7624 
7625 // Conditional move
7626 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7627   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7628 
7629   ins_cost(250); // XXX
7630   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7631   opcode(0x0F, 0x40);
7632   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7633   ins_pipe(pipe_cmov_mem);
7634 %}
7635 
7636 // Conditional move
7637 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7638 %{
7639   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7640 
7641   ins_cost(250); // XXX
7642   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7643   opcode(0x0F, 0x40);
7644   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7645   ins_pipe(pipe_cmov_mem);
7646 %}
7647 
7648 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7649   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7650   ins_cost(250);
7651   expand %{
7652     cmovI_memU(cop, cr, dst, src);
7653   %}
7654 %}
7655 
7656 // Conditional move
7657 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7658 %{
7659   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7660 
7661   ins_cost(200); // XXX
7662   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7663   opcode(0x0F, 0x40);
7664   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7665   ins_pipe(pipe_cmov_reg);
7666 %}
7667 
7668 // Conditional move
7669 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7670 %{
7671   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7672 
7673   ins_cost(200); // XXX
7674   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7675   opcode(0x0F, 0x40);
7676   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7677   ins_pipe(pipe_cmov_reg);
7678 %}
7679 
7680 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7681   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7682   ins_cost(200);
7683   expand %{
7684     cmovN_regU(cop, cr, dst, src);
7685   %}
7686 %}
7687 
7688 // Conditional move
7689 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7690 %{
7691   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7692 
7693   ins_cost(200); // XXX
7694   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7695   opcode(0x0F, 0x40);
7696   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7697   ins_pipe(pipe_cmov_reg);  // XXX
7698 %}
7699 
7700 // Conditional move
7701 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7702 %{
7703   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7704 
7705   ins_cost(200); // XXX
7706   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7707   opcode(0x0F, 0x40);
7708   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7709   ins_pipe(pipe_cmov_reg); // XXX
7710 %}
7711 
7712 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7713   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7714   ins_cost(200);
7715   expand %{
7716     cmovP_regU(cop, cr, dst, src);
7717   %}
7718 %}
7719 
7720 // DISABLED: Requires the ADLC to emit a bottom_type call that
7721 // correctly meets the two pointer arguments; one is an incoming
7722 // register but the other is a memory operand.  ALSO appears to
7723 // be buggy with implicit null checks.
7724 //
7725 //// Conditional move
7726 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7727 //%{
7728 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7729 //  ins_cost(250);
7730 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7731 //  opcode(0x0F,0x40);
7732 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7733 //  ins_pipe( pipe_cmov_mem );
7734 //%}
7735 //
7736 //// Conditional move
7737 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7738 //%{
7739 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7740 //  ins_cost(250);
7741 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7742 //  opcode(0x0F,0x40);
7743 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7744 //  ins_pipe( pipe_cmov_mem );
7745 //%}
7746 
7747 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7748 %{
7749   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7750 
7751   ins_cost(200); // XXX
7752   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7753   opcode(0x0F, 0x40);
7754   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7755   ins_pipe(pipe_cmov_reg);  // XXX
7756 %}
7757 
7758 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7759 %{
7760   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7761 
7762   ins_cost(200); // XXX
7763   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7764   opcode(0x0F, 0x40);
7765   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7766   ins_pipe(pipe_cmov_mem);  // XXX
7767 %}
7768 
7769 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7770 %{
7771   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7772 
7773   ins_cost(200); // XXX
7774   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7775   opcode(0x0F, 0x40);
7776   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7777   ins_pipe(pipe_cmov_reg); // XXX
7778 %}
7779 
7780 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7781   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7782   ins_cost(200);
7783   expand %{
7784     cmovL_regU(cop, cr, dst, src);
7785   %}
7786 %}
7787 
7788 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7789 %{
7790   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7791 
7792   ins_cost(200); // XXX
7793   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7794   opcode(0x0F, 0x40);
7795   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7796   ins_pipe(pipe_cmov_mem); // XXX
7797 %}
7798 
7799 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7800   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7801   ins_cost(200);
7802   expand %{
7803     cmovL_memU(cop, cr, dst, src);
7804   %}
7805 %}
7806 
7807 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7808 %{
7809   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7810 
7811   ins_cost(200); // XXX
7812   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7813             "movss     $dst, $src\n"
7814     "skip:" %}
7815   ins_encode(enc_cmovf_branch(cop, dst, src));
7816   ins_pipe(pipe_slow);
7817 %}
7818 
7819 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7820 // %{
7821 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7822 
7823 //   ins_cost(200); // XXX
7824 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7825 //             "movss     $dst, $src\n"
7826 //     "skip:" %}
7827 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7828 //   ins_pipe(pipe_slow);
7829 // %}
7830 
7831 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7832 %{
7833   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7834 
7835   ins_cost(200); // XXX
7836   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7837             "movss     $dst, $src\n"
7838     "skip:" %}
7839   ins_encode(enc_cmovf_branch(cop, dst, src));
7840   ins_pipe(pipe_slow);
7841 %}
7842 
7843 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7844   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7845   ins_cost(200);
7846   expand %{
7847     cmovF_regU(cop, cr, dst, src);
7848   %}
7849 %}
7850 
7851 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7852 %{
7853   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7854 
7855   ins_cost(200); // XXX
7856   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7857             "movsd     $dst, $src\n"
7858     "skip:" %}
7859   ins_encode(enc_cmovd_branch(cop, dst, src));
7860   ins_pipe(pipe_slow);
7861 %}
7862 
7863 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7864 %{
7865   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7866 
7867   ins_cost(200); // XXX
7868   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7869             "movsd     $dst, $src\n"
7870     "skip:" %}
7871   ins_encode(enc_cmovd_branch(cop, dst, src));
7872   ins_pipe(pipe_slow);
7873 %}
7874 
7875 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7876   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7877   ins_cost(200);
7878   expand %{
7879     cmovD_regU(cop, cr, dst, src);
7880   %}
7881 %}
7882 
7883 //----------Arithmetic Instructions--------------------------------------------
7884 //----------Addition Instructions----------------------------------------------
7885 
7886 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7887 %{
7888   match(Set dst (AddI dst src));
7889   effect(KILL cr);
7890 
7891   format %{ "addl    $dst, $src\t# int" %}
7892   opcode(0x03);
7893   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7894   ins_pipe(ialu_reg_reg);
7895 %}
7896 
7897 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7898 %{
7899   match(Set dst (AddI dst src));
7900   effect(KILL cr);
7901 
7902   format %{ "addl    $dst, $src\t# int" %}
7903   opcode(0x81, 0x00); /* /0 id */
7904   ins_encode(OpcSErm(dst, src), Con8or32(src));
7905   ins_pipe( ialu_reg );
7906 %}
7907 
7908 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7909 %{
7910   match(Set dst (AddI dst (LoadI src)));
7911   effect(KILL cr);
7912 
7913   ins_cost(125); // XXX
7914   format %{ "addl    $dst, $src\t# int" %}
7915   opcode(0x03);
7916   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7917   ins_pipe(ialu_reg_mem);
7918 %}
7919 
7920 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7921 %{
7922   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7923   effect(KILL cr);
7924 
7925   ins_cost(150); // XXX
7926   format %{ "addl    $dst, $src\t# int" %}
7927   opcode(0x01); /* Opcode 01 /r */
7928   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7929   ins_pipe(ialu_mem_reg);
7930 %}
7931 
7932 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7933 %{
7934   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7935   effect(KILL cr);
7936 
7937   ins_cost(125); // XXX
7938   format %{ "addl    $dst, $src\t# int" %}
7939   opcode(0x81); /* Opcode 81 /0 id */
7940   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7941   ins_pipe(ialu_mem_imm);
7942 %}
7943 
7944 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7945 %{
7946   predicate(UseIncDec);
7947   match(Set dst (AddI dst src));
7948   effect(KILL cr);
7949 
7950   format %{ "incl    $dst\t# int" %}
7951   opcode(0xFF, 0x00); // FF /0
7952   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7953   ins_pipe(ialu_reg);
7954 %}
7955 
7956 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7957 %{
7958   predicate(UseIncDec);
7959   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7960   effect(KILL cr);
7961 
7962   ins_cost(125); // XXX
7963   format %{ "incl    $dst\t# int" %}
7964   opcode(0xFF); /* Opcode FF /0 */
7965   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7966   ins_pipe(ialu_mem_imm);
7967 %}
7968 
7969 // XXX why does that use AddI
7970 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7971 %{
7972   predicate(UseIncDec);
7973   match(Set dst (AddI dst src));
7974   effect(KILL cr);
7975 
7976   format %{ "decl    $dst\t# int" %}
7977   opcode(0xFF, 0x01); // FF /1
7978   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7979   ins_pipe(ialu_reg);
7980 %}
7981 
7982 // XXX why does that use AddI
7983 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7984 %{
7985   predicate(UseIncDec);
7986   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7987   effect(KILL cr);
7988 
7989   ins_cost(125); // XXX
7990   format %{ "decl    $dst\t# int" %}
7991   opcode(0xFF); /* Opcode FF /1 */
7992   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7993   ins_pipe(ialu_mem_imm);
7994 %}
7995 
7996 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7997 %{
7998   match(Set dst (AddI src0 src1));
7999 
8000   ins_cost(110);
8001   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8002   opcode(0x8D); /* 0x8D /r */
8003   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8004   ins_pipe(ialu_reg_reg);
8005 %}
8006 
8007 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8008 %{
8009   match(Set dst (AddL dst src));
8010   effect(KILL cr);
8011 
8012   format %{ "addq    $dst, $src\t# long" %}
8013   opcode(0x03);
8014   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8015   ins_pipe(ialu_reg_reg);
8016 %}
8017 
8018 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8019 %{
8020   match(Set dst (AddL dst src));
8021   effect(KILL cr);
8022 
8023   format %{ "addq    $dst, $src\t# long" %}
8024   opcode(0x81, 0x00); /* /0 id */
8025   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8026   ins_pipe( ialu_reg );
8027 %}
8028 
8029 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8030 %{
8031   match(Set dst (AddL dst (LoadL src)));
8032   effect(KILL cr);
8033 
8034   ins_cost(125); // XXX
8035   format %{ "addq    $dst, $src\t# long" %}
8036   opcode(0x03);
8037   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8038   ins_pipe(ialu_reg_mem);
8039 %}
8040 
8041 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8042 %{
8043   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8044   effect(KILL cr);
8045 
8046   ins_cost(150); // XXX
8047   format %{ "addq    $dst, $src\t# long" %}
8048   opcode(0x01); /* Opcode 01 /r */
8049   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8050   ins_pipe(ialu_mem_reg);
8051 %}
8052 
8053 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8054 %{
8055   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8056   effect(KILL cr);
8057 
8058   ins_cost(125); // XXX
8059   format %{ "addq    $dst, $src\t# long" %}
8060   opcode(0x81); /* Opcode 81 /0 id */
8061   ins_encode(REX_mem_wide(dst),
8062              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8063   ins_pipe(ialu_mem_imm);
8064 %}
8065 
8066 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8067 %{
8068   predicate(UseIncDec);
8069   match(Set dst (AddL dst src));
8070   effect(KILL cr);
8071 
8072   format %{ "incq    $dst\t# long" %}
8073   opcode(0xFF, 0x00); // FF /0
8074   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8075   ins_pipe(ialu_reg);
8076 %}
8077 
8078 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8079 %{
8080   predicate(UseIncDec);
8081   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8082   effect(KILL cr);
8083 
8084   ins_cost(125); // XXX
8085   format %{ "incq    $dst\t# long" %}
8086   opcode(0xFF); /* Opcode FF /0 */
8087   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8088   ins_pipe(ialu_mem_imm);
8089 %}
8090 
8091 // XXX why does that use AddL
8092 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8093 %{
8094   predicate(UseIncDec);
8095   match(Set dst (AddL dst src));
8096   effect(KILL cr);
8097 
8098   format %{ "decq    $dst\t# long" %}
8099   opcode(0xFF, 0x01); // FF /1
8100   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8101   ins_pipe(ialu_reg);
8102 %}
8103 
8104 // XXX why does that use AddL
8105 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8106 %{
8107   predicate(UseIncDec);
8108   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8109   effect(KILL cr);
8110 
8111   ins_cost(125); // XXX
8112   format %{ "decq    $dst\t# long" %}
8113   opcode(0xFF); /* Opcode FF /1 */
8114   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8115   ins_pipe(ialu_mem_imm);
8116 %}
8117 
8118 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8119 %{
8120   match(Set dst (AddL src0 src1));
8121 
8122   ins_cost(110);
8123   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8124   opcode(0x8D); /* 0x8D /r */
8125   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8126   ins_pipe(ialu_reg_reg);
8127 %}
8128 
8129 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8130 %{
8131   match(Set dst (AddP dst src));
8132   effect(KILL cr);
8133 
8134   format %{ "addq    $dst, $src\t# ptr" %}
8135   opcode(0x03);
8136   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8137   ins_pipe(ialu_reg_reg);
8138 %}
8139 
8140 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8141 %{
8142   match(Set dst (AddP dst src));
8143   effect(KILL cr);
8144 
8145   format %{ "addq    $dst, $src\t# ptr" %}
8146   opcode(0x81, 0x00); /* /0 id */
8147   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8148   ins_pipe( ialu_reg );
8149 %}
8150 
8151 // XXX addP mem ops ????
8152 
8153 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8154 %{
8155   match(Set dst (AddP src0 src1));
8156 
8157   ins_cost(110);
8158   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8159   opcode(0x8D); /* 0x8D /r */
8160   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8161   ins_pipe(ialu_reg_reg);
8162 %}
8163 
8164 instruct checkCastPP(rRegP dst)
8165 %{
8166   match(Set dst (CheckCastPP dst));
8167 
8168   size(0);
8169   format %{ "# checkcastPP of $dst" %}
8170   ins_encode(/* empty encoding */);
8171   ins_pipe(empty);
8172 %}
8173 
8174 instruct castPP(rRegP dst)
8175 %{
8176   match(Set dst (CastPP dst));
8177 
8178   size(0);
8179   format %{ "# castPP of $dst" %}
8180   ins_encode(/* empty encoding */);
8181   ins_pipe(empty);
8182 %}
8183 
8184 instruct castII(rRegI dst)
8185 %{
8186   match(Set dst (CastII dst));
8187 
8188   size(0);
8189   format %{ "# castII of $dst" %}
8190   ins_encode(/* empty encoding */);
8191   ins_cost(0);
8192   ins_pipe(empty);
8193 %}
8194 
8195 // LoadP-locked same as a regular LoadP when used with compare-swap
8196 instruct loadPLocked(rRegP dst, memory mem)
8197 %{
8198   match(Set dst (LoadPLocked mem));
8199 
8200   ins_cost(125); // XXX
8201   format %{ "movq    $dst, $mem\t# ptr locked" %}
8202   opcode(0x8B);
8203   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8204   ins_pipe(ialu_reg_mem); // XXX
8205 %}
8206 
8207 // LoadL-locked - same as a regular LoadL when used with compare-swap
8208 instruct loadLLocked(rRegL dst, memory mem)
8209 %{
8210   match(Set dst (LoadLLocked mem));
8211 
8212   ins_cost(125); // XXX
8213   format %{ "movq    $dst, $mem\t# long locked" %}
8214   opcode(0x8B);
8215   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8216   ins_pipe(ialu_reg_mem); // XXX
8217 %}
8218 
8219 // Conditional-store of the updated heap-top.
8220 // Used during allocation of the shared heap.
8221 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8222 
8223 instruct storePConditional(memory heap_top_ptr,
8224                            rax_RegP oldval, rRegP newval,
8225                            rFlagsReg cr)
8226 %{
8227   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8228 
8229   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8230             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8231   opcode(0x0F, 0xB1);
8232   ins_encode(lock_prefix,
8233              REX_reg_mem_wide(newval, heap_top_ptr),
8234              OpcP, OpcS,
8235              reg_mem(newval, heap_top_ptr));
8236   ins_pipe(pipe_cmpxchg);
8237 %}
8238 
8239 // Conditional-store of an int value.
8240 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8241 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8242 %{
8243   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8244   effect(KILL oldval);
8245 
8246   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8247   opcode(0x0F, 0xB1);
8248   ins_encode(lock_prefix,
8249              REX_reg_mem(newval, mem),
8250              OpcP, OpcS,
8251              reg_mem(newval, mem));
8252   ins_pipe(pipe_cmpxchg);
8253 %}
8254 
8255 // Conditional-store of a long value.
8256 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8257 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8258 %{
8259   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8260   effect(KILL oldval);
8261 
8262   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8263   opcode(0x0F, 0xB1);
8264   ins_encode(lock_prefix,
8265              REX_reg_mem_wide(newval, mem),
8266              OpcP, OpcS,
8267              reg_mem(newval, mem));
8268   ins_pipe(pipe_cmpxchg);
8269 %}
8270 
8271 
8272 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8273 instruct compareAndSwapP(rRegI res,
8274                          memory mem_ptr,
8275                          rax_RegP oldval, rRegP newval,
8276                          rFlagsReg cr)
8277 %{
8278   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8279   effect(KILL cr, KILL oldval);
8280 
8281   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8282             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8283             "sete    $res\n\t"
8284             "movzbl  $res, $res" %}
8285   opcode(0x0F, 0xB1);
8286   ins_encode(lock_prefix,
8287              REX_reg_mem_wide(newval, mem_ptr),
8288              OpcP, OpcS,
8289              reg_mem(newval, mem_ptr),
8290              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8291              REX_reg_breg(res, res), // movzbl
8292              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8293   ins_pipe( pipe_cmpxchg );
8294 %}
8295 
8296 instruct compareAndSwapL(rRegI res,
8297                          memory mem_ptr,
8298                          rax_RegL oldval, rRegL newval,
8299                          rFlagsReg cr)
8300 %{
8301   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8302   effect(KILL cr, KILL oldval);
8303 
8304   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8305             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8306             "sete    $res\n\t"
8307             "movzbl  $res, $res" %}
8308   opcode(0x0F, 0xB1);
8309   ins_encode(lock_prefix,
8310              REX_reg_mem_wide(newval, mem_ptr),
8311              OpcP, OpcS,
8312              reg_mem(newval, mem_ptr),
8313              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8314              REX_reg_breg(res, res), // movzbl
8315              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8316   ins_pipe( pipe_cmpxchg );
8317 %}
8318 
8319 instruct compareAndSwapI(rRegI res,
8320                          memory mem_ptr,
8321                          rax_RegI oldval, rRegI newval,
8322                          rFlagsReg cr)
8323 %{
8324   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8325   effect(KILL cr, KILL oldval);
8326 
8327   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8328             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8329             "sete    $res\n\t"
8330             "movzbl  $res, $res" %}
8331   opcode(0x0F, 0xB1);
8332   ins_encode(lock_prefix,
8333              REX_reg_mem(newval, mem_ptr),
8334              OpcP, OpcS,
8335              reg_mem(newval, mem_ptr),
8336              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8337              REX_reg_breg(res, res), // movzbl
8338              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8339   ins_pipe( pipe_cmpxchg );
8340 %}
8341 
8342 
8343 instruct compareAndSwapN(rRegI res,
8344                           memory mem_ptr,
8345                           rax_RegN oldval, rRegN newval,
8346                           rFlagsReg cr) %{
8347   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8348   effect(KILL cr, KILL oldval);
8349 
8350   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8351             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8352             "sete    $res\n\t"
8353             "movzbl  $res, $res" %}
8354   opcode(0x0F, 0xB1);
8355   ins_encode(lock_prefix,
8356              REX_reg_mem(newval, mem_ptr),
8357              OpcP, OpcS,
8358              reg_mem(newval, mem_ptr),
8359              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8360              REX_reg_breg(res, res), // movzbl
8361              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8362   ins_pipe( pipe_cmpxchg );
8363 %}
8364 
8365 //----------Subtraction Instructions-------------------------------------------
8366 
8367 // Integer Subtraction Instructions
8368 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8369 %{
8370   match(Set dst (SubI dst src));
8371   effect(KILL cr);
8372 
8373   format %{ "subl    $dst, $src\t# int" %}
8374   opcode(0x2B);
8375   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8376   ins_pipe(ialu_reg_reg);
8377 %}
8378 
8379 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8380 %{
8381   match(Set dst (SubI dst src));
8382   effect(KILL cr);
8383 
8384   format %{ "subl    $dst, $src\t# int" %}
8385   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8386   ins_encode(OpcSErm(dst, src), Con8or32(src));
8387   ins_pipe(ialu_reg);
8388 %}
8389 
8390 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8391 %{
8392   match(Set dst (SubI dst (LoadI src)));
8393   effect(KILL cr);
8394 
8395   ins_cost(125);
8396   format %{ "subl    $dst, $src\t# int" %}
8397   opcode(0x2B);
8398   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8399   ins_pipe(ialu_reg_mem);
8400 %}
8401 
8402 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8403 %{
8404   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8405   effect(KILL cr);
8406 
8407   ins_cost(150);
8408   format %{ "subl    $dst, $src\t# int" %}
8409   opcode(0x29); /* Opcode 29 /r */
8410   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8411   ins_pipe(ialu_mem_reg);
8412 %}
8413 
8414 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8415 %{
8416   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8417   effect(KILL cr);
8418 
8419   ins_cost(125); // XXX
8420   format %{ "subl    $dst, $src\t# int" %}
8421   opcode(0x81); /* Opcode 81 /5 id */
8422   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8423   ins_pipe(ialu_mem_imm);
8424 %}
8425 
8426 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8427 %{
8428   match(Set dst (SubL dst src));
8429   effect(KILL cr);
8430 
8431   format %{ "subq    $dst, $src\t# long" %}
8432   opcode(0x2B);
8433   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8434   ins_pipe(ialu_reg_reg);
8435 %}
8436 
8437 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8438 %{
8439   match(Set dst (SubL dst src));
8440   effect(KILL cr);
8441 
8442   format %{ "subq    $dst, $src\t# long" %}
8443   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8444   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8445   ins_pipe(ialu_reg);
8446 %}
8447 
8448 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8449 %{
8450   match(Set dst (SubL dst (LoadL src)));
8451   effect(KILL cr);
8452 
8453   ins_cost(125);
8454   format %{ "subq    $dst, $src\t# long" %}
8455   opcode(0x2B);
8456   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8457   ins_pipe(ialu_reg_mem);
8458 %}
8459 
8460 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8461 %{
8462   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8463   effect(KILL cr);
8464 
8465   ins_cost(150);
8466   format %{ "subq    $dst, $src\t# long" %}
8467   opcode(0x29); /* Opcode 29 /r */
8468   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8469   ins_pipe(ialu_mem_reg);
8470 %}
8471 
8472 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8473 %{
8474   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8475   effect(KILL cr);
8476 
8477   ins_cost(125); // XXX
8478   format %{ "subq    $dst, $src\t# long" %}
8479   opcode(0x81); /* Opcode 81 /5 id */
8480   ins_encode(REX_mem_wide(dst),
8481              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8482   ins_pipe(ialu_mem_imm);
8483 %}
8484 
8485 // Subtract from a pointer
8486 // XXX hmpf???
8487 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8488 %{
8489   match(Set dst (AddP dst (SubI zero src)));
8490   effect(KILL cr);
8491 
8492   format %{ "subq    $dst, $src\t# ptr - int" %}
8493   opcode(0x2B);
8494   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8495   ins_pipe(ialu_reg_reg);
8496 %}
8497 
8498 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8499 %{
8500   match(Set dst (SubI zero dst));
8501   effect(KILL cr);
8502 
8503   format %{ "negl    $dst\t# int" %}
8504   opcode(0xF7, 0x03);  // Opcode F7 /3
8505   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8506   ins_pipe(ialu_reg);
8507 %}
8508 
8509 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8510 %{
8511   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8512   effect(KILL cr);
8513 
8514   format %{ "negl    $dst\t# int" %}
8515   opcode(0xF7, 0x03);  // Opcode F7 /3
8516   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8517   ins_pipe(ialu_reg);
8518 %}
8519 
8520 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8521 %{
8522   match(Set dst (SubL zero dst));
8523   effect(KILL cr);
8524 
8525   format %{ "negq    $dst\t# long" %}
8526   opcode(0xF7, 0x03);  // Opcode F7 /3
8527   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8528   ins_pipe(ialu_reg);
8529 %}
8530 
8531 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8532 %{
8533   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8534   effect(KILL cr);
8535 
8536   format %{ "negq    $dst\t# long" %}
8537   opcode(0xF7, 0x03);  // Opcode F7 /3
8538   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8539   ins_pipe(ialu_reg);
8540 %}
8541 
8542 
8543 //----------Multiplication/Division Instructions-------------------------------
8544 // Integer Multiplication Instructions
8545 // Multiply Register
8546 
8547 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8548 %{
8549   match(Set dst (MulI dst src));
8550   effect(KILL cr);
8551 
8552   ins_cost(300);
8553   format %{ "imull   $dst, $src\t# int" %}
8554   opcode(0x0F, 0xAF);
8555   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8556   ins_pipe(ialu_reg_reg_alu0);
8557 %}
8558 
8559 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8560 %{
8561   match(Set dst (MulI src imm));
8562   effect(KILL cr);
8563 
8564   ins_cost(300);
8565   format %{ "imull   $dst, $src, $imm\t# int" %}
8566   opcode(0x69); /* 69 /r id */
8567   ins_encode(REX_reg_reg(dst, src),
8568              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8569   ins_pipe(ialu_reg_reg_alu0);
8570 %}
8571 
8572 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8573 %{
8574   match(Set dst (MulI dst (LoadI src)));
8575   effect(KILL cr);
8576 
8577   ins_cost(350);
8578   format %{ "imull   $dst, $src\t# int" %}
8579   opcode(0x0F, 0xAF);
8580   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8581   ins_pipe(ialu_reg_mem_alu0);
8582 %}
8583 
8584 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8585 %{
8586   match(Set dst (MulI (LoadI src) imm));
8587   effect(KILL cr);
8588 
8589   ins_cost(300);
8590   format %{ "imull   $dst, $src, $imm\t# int" %}
8591   opcode(0x69); /* 69 /r id */
8592   ins_encode(REX_reg_mem(dst, src),
8593              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8594   ins_pipe(ialu_reg_mem_alu0);
8595 %}
8596 
8597 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8598 %{
8599   match(Set dst (MulL dst src));
8600   effect(KILL cr);
8601 
8602   ins_cost(300);
8603   format %{ "imulq   $dst, $src\t# long" %}
8604   opcode(0x0F, 0xAF);
8605   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8606   ins_pipe(ialu_reg_reg_alu0);
8607 %}
8608 
8609 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8610 %{
8611   match(Set dst (MulL src imm));
8612   effect(KILL cr);
8613 
8614   ins_cost(300);
8615   format %{ "imulq   $dst, $src, $imm\t# long" %}
8616   opcode(0x69); /* 69 /r id */
8617   ins_encode(REX_reg_reg_wide(dst, src),
8618              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8619   ins_pipe(ialu_reg_reg_alu0);
8620 %}
8621 
8622 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8623 %{
8624   match(Set dst (MulL dst (LoadL src)));
8625   effect(KILL cr);
8626 
8627   ins_cost(350);
8628   format %{ "imulq   $dst, $src\t# long" %}
8629   opcode(0x0F, 0xAF);
8630   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8631   ins_pipe(ialu_reg_mem_alu0);
8632 %}
8633 
8634 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8635 %{
8636   match(Set dst (MulL (LoadL src) imm));
8637   effect(KILL cr);
8638 
8639   ins_cost(300);
8640   format %{ "imulq   $dst, $src, $imm\t# long" %}
8641   opcode(0x69); /* 69 /r id */
8642   ins_encode(REX_reg_mem_wide(dst, src),
8643              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8644   ins_pipe(ialu_reg_mem_alu0);
8645 %}
8646 
8647 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8648 %{
8649   match(Set dst (MulHiL src rax));
8650   effect(USE_KILL rax, KILL cr);
8651 
8652   ins_cost(300);
8653   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8654   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8655   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8656   ins_pipe(ialu_reg_reg_alu0);
8657 %}
8658 
8659 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8660                    rFlagsReg cr)
8661 %{
8662   match(Set rax (DivI rax div));
8663   effect(KILL rdx, KILL cr);
8664 
8665   ins_cost(30*100+10*100); // XXX
8666   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8667             "jne,s   normal\n\t"
8668             "xorl    rdx, rdx\n\t"
8669             "cmpl    $div, -1\n\t"
8670             "je,s    done\n"
8671     "normal: cdql\n\t"
8672             "idivl   $div\n"
8673     "done:"        %}
8674   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8675   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8676   ins_pipe(ialu_reg_reg_alu0);
8677 %}
8678 
8679 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8680                    rFlagsReg cr)
8681 %{
8682   match(Set rax (DivL rax div));
8683   effect(KILL rdx, KILL cr);
8684 
8685   ins_cost(30*100+10*100); // XXX
8686   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8687             "cmpq    rax, rdx\n\t"
8688             "jne,s   normal\n\t"
8689             "xorl    rdx, rdx\n\t"
8690             "cmpq    $div, -1\n\t"
8691             "je,s    done\n"
8692     "normal: cdqq\n\t"
8693             "idivq   $div\n"
8694     "done:"        %}
8695   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8696   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8697   ins_pipe(ialu_reg_reg_alu0);
8698 %}
8699 
8700 // Integer DIVMOD with Register, both quotient and mod results
8701 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8702                              rFlagsReg cr)
8703 %{
8704   match(DivModI rax div);
8705   effect(KILL cr);
8706 
8707   ins_cost(30*100+10*100); // XXX
8708   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8709             "jne,s   normal\n\t"
8710             "xorl    rdx, rdx\n\t"
8711             "cmpl    $div, -1\n\t"
8712             "je,s    done\n"
8713     "normal: cdql\n\t"
8714             "idivl   $div\n"
8715     "done:"        %}
8716   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8717   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8718   ins_pipe(pipe_slow);
8719 %}
8720 
8721 // Long DIVMOD with Register, both quotient and mod results
8722 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8723                              rFlagsReg cr)
8724 %{
8725   match(DivModL rax div);
8726   effect(KILL cr);
8727 
8728   ins_cost(30*100+10*100); // XXX
8729   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8730             "cmpq    rax, rdx\n\t"
8731             "jne,s   normal\n\t"
8732             "xorl    rdx, rdx\n\t"
8733             "cmpq    $div, -1\n\t"
8734             "je,s    done\n"
8735     "normal: cdqq\n\t"
8736             "idivq   $div\n"
8737     "done:"        %}
8738   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8739   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8740   ins_pipe(pipe_slow);
8741 %}
8742 
8743 //----------- DivL-By-Constant-Expansions--------------------------------------
8744 // DivI cases are handled by the compiler
8745 
8746 // Magic constant, reciprocal of 10
8747 instruct loadConL_0x6666666666666667(rRegL dst)
8748 %{
8749   effect(DEF dst);
8750 
8751   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8752   ins_encode(load_immL(dst, 0x6666666666666667));
8753   ins_pipe(ialu_reg);
8754 %}
8755 
8756 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8757 %{
8758   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8759 
8760   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8761   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8762   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8763   ins_pipe(ialu_reg_reg_alu0);
8764 %}
8765 
8766 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8767 %{
8768   effect(USE_DEF dst, KILL cr);
8769 
8770   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8771   opcode(0xC1, 0x7); /* C1 /7 ib */
8772   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8773   ins_pipe(ialu_reg);
8774 %}
8775 
8776 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8777 %{
8778   effect(USE_DEF dst, KILL cr);
8779 
8780   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8781   opcode(0xC1, 0x7); /* C1 /7 ib */
8782   ins_encode(reg_opc_imm_wide(dst, 0x2));
8783   ins_pipe(ialu_reg);
8784 %}
8785 
8786 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8787 %{
8788   match(Set dst (DivL src div));
8789 
8790   ins_cost((5+8)*100);
8791   expand %{
8792     rax_RegL rax;                     // Killed temp
8793     rFlagsReg cr;                     // Killed
8794     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8795     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8796     sarL_rReg_63(src, cr);            // sarq  src, 63
8797     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8798     subL_rReg(dst, src, cr);          // subl  rdx, src
8799   %}
8800 %}
8801 
8802 //-----------------------------------------------------------------------------
8803 
8804 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8805                    rFlagsReg cr)
8806 %{
8807   match(Set rdx (ModI rax div));
8808   effect(KILL rax, KILL cr);
8809 
8810   ins_cost(300); // XXX
8811   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8812             "jne,s   normal\n\t"
8813             "xorl    rdx, rdx\n\t"
8814             "cmpl    $div, -1\n\t"
8815             "je,s    done\n"
8816     "normal: cdql\n\t"
8817             "idivl   $div\n"
8818     "done:"        %}
8819   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8820   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8821   ins_pipe(ialu_reg_reg_alu0);
8822 %}
8823 
8824 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8825                    rFlagsReg cr)
8826 %{
8827   match(Set rdx (ModL rax div));
8828   effect(KILL rax, KILL cr);
8829 
8830   ins_cost(300); // XXX
8831   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8832             "cmpq    rax, rdx\n\t"
8833             "jne,s   normal\n\t"
8834             "xorl    rdx, rdx\n\t"
8835             "cmpq    $div, -1\n\t"
8836             "je,s    done\n"
8837     "normal: cdqq\n\t"
8838             "idivq   $div\n"
8839     "done:"        %}
8840   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8841   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8842   ins_pipe(ialu_reg_reg_alu0);
8843 %}
8844 
8845 // Integer Shift Instructions
8846 // Shift Left by one
8847 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8848 %{
8849   match(Set dst (LShiftI dst shift));
8850   effect(KILL cr);
8851 
8852   format %{ "sall    $dst, $shift" %}
8853   opcode(0xD1, 0x4); /* D1 /4 */
8854   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8855   ins_pipe(ialu_reg);
8856 %}
8857 
8858 // Shift Left by one
8859 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8860 %{
8861   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8862   effect(KILL cr);
8863 
8864   format %{ "sall    $dst, $shift\t" %}
8865   opcode(0xD1, 0x4); /* D1 /4 */
8866   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8867   ins_pipe(ialu_mem_imm);
8868 %}
8869 
8870 // Shift Left by 8-bit immediate
8871 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8872 %{
8873   match(Set dst (LShiftI dst shift));
8874   effect(KILL cr);
8875 
8876   format %{ "sall    $dst, $shift" %}
8877   opcode(0xC1, 0x4); /* C1 /4 ib */
8878   ins_encode(reg_opc_imm(dst, shift));
8879   ins_pipe(ialu_reg);
8880 %}
8881 
8882 // Shift Left by 8-bit immediate
8883 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8884 %{
8885   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8886   effect(KILL cr);
8887 
8888   format %{ "sall    $dst, $shift" %}
8889   opcode(0xC1, 0x4); /* C1 /4 ib */
8890   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8891   ins_pipe(ialu_mem_imm);
8892 %}
8893 
8894 // Shift Left by variable
8895 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8896 %{
8897   match(Set dst (LShiftI dst shift));
8898   effect(KILL cr);
8899 
8900   format %{ "sall    $dst, $shift" %}
8901   opcode(0xD3, 0x4); /* D3 /4 */
8902   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8903   ins_pipe(ialu_reg_reg);
8904 %}
8905 
8906 // Shift Left by variable
8907 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8908 %{
8909   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8910   effect(KILL cr);
8911 
8912   format %{ "sall    $dst, $shift" %}
8913   opcode(0xD3, 0x4); /* D3 /4 */
8914   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8915   ins_pipe(ialu_mem_reg);
8916 %}
8917 
8918 // Arithmetic shift right by one
8919 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8920 %{
8921   match(Set dst (RShiftI dst shift));
8922   effect(KILL cr);
8923 
8924   format %{ "sarl    $dst, $shift" %}
8925   opcode(0xD1, 0x7); /* D1 /7 */
8926   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8927   ins_pipe(ialu_reg);
8928 %}
8929 
8930 // Arithmetic shift right by one
8931 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8932 %{
8933   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8934   effect(KILL cr);
8935 
8936   format %{ "sarl    $dst, $shift" %}
8937   opcode(0xD1, 0x7); /* D1 /7 */
8938   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8939   ins_pipe(ialu_mem_imm);
8940 %}
8941 
8942 // Arithmetic Shift Right by 8-bit immediate
8943 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8944 %{
8945   match(Set dst (RShiftI dst shift));
8946   effect(KILL cr);
8947 
8948   format %{ "sarl    $dst, $shift" %}
8949   opcode(0xC1, 0x7); /* C1 /7 ib */
8950   ins_encode(reg_opc_imm(dst, shift));
8951   ins_pipe(ialu_mem_imm);
8952 %}
8953 
8954 // Arithmetic Shift Right by 8-bit immediate
8955 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8956 %{
8957   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8958   effect(KILL cr);
8959 
8960   format %{ "sarl    $dst, $shift" %}
8961   opcode(0xC1, 0x7); /* C1 /7 ib */
8962   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8963   ins_pipe(ialu_mem_imm);
8964 %}
8965 
8966 // Arithmetic Shift Right by variable
8967 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8968 %{
8969   match(Set dst (RShiftI dst shift));
8970   effect(KILL cr);
8971 
8972   format %{ "sarl    $dst, $shift" %}
8973   opcode(0xD3, 0x7); /* D3 /7 */
8974   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8975   ins_pipe(ialu_reg_reg);
8976 %}
8977 
8978 // Arithmetic Shift Right by variable
8979 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8980 %{
8981   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8982   effect(KILL cr);
8983 
8984   format %{ "sarl    $dst, $shift" %}
8985   opcode(0xD3, 0x7); /* D3 /7 */
8986   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8987   ins_pipe(ialu_mem_reg);
8988 %}
8989 
8990 // Logical shift right by one
8991 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8992 %{
8993   match(Set dst (URShiftI dst shift));
8994   effect(KILL cr);
8995 
8996   format %{ "shrl    $dst, $shift" %}
8997   opcode(0xD1, 0x5); /* D1 /5 */
8998   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8999   ins_pipe(ialu_reg);
9000 %}
9001 
9002 // Logical shift right by one
9003 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9004 %{
9005   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9006   effect(KILL cr);
9007 
9008   format %{ "shrl    $dst, $shift" %}
9009   opcode(0xD1, 0x5); /* D1 /5 */
9010   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9011   ins_pipe(ialu_mem_imm);
9012 %}
9013 
9014 // Logical Shift Right by 8-bit immediate
9015 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9016 %{
9017   match(Set dst (URShiftI dst shift));
9018   effect(KILL cr);
9019 
9020   format %{ "shrl    $dst, $shift" %}
9021   opcode(0xC1, 0x5); /* C1 /5 ib */
9022   ins_encode(reg_opc_imm(dst, shift));
9023   ins_pipe(ialu_reg);
9024 %}
9025 
9026 // Logical Shift Right by 8-bit immediate
9027 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9028 %{
9029   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9030   effect(KILL cr);
9031 
9032   format %{ "shrl    $dst, $shift" %}
9033   opcode(0xC1, 0x5); /* C1 /5 ib */
9034   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9035   ins_pipe(ialu_mem_imm);
9036 %}
9037 
9038 // Logical Shift Right by variable
9039 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9040 %{
9041   match(Set dst (URShiftI dst shift));
9042   effect(KILL cr);
9043 
9044   format %{ "shrl    $dst, $shift" %}
9045   opcode(0xD3, 0x5); /* D3 /5 */
9046   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9047   ins_pipe(ialu_reg_reg);
9048 %}
9049 
9050 // Logical Shift Right by variable
9051 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9052 %{
9053   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9054   effect(KILL cr);
9055 
9056   format %{ "shrl    $dst, $shift" %}
9057   opcode(0xD3, 0x5); /* D3 /5 */
9058   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9059   ins_pipe(ialu_mem_reg);
9060 %}
9061 
9062 // Long Shift Instructions
9063 // Shift Left by one
9064 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9065 %{
9066   match(Set dst (LShiftL dst shift));
9067   effect(KILL cr);
9068 
9069   format %{ "salq    $dst, $shift" %}
9070   opcode(0xD1, 0x4); /* D1 /4 */
9071   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9072   ins_pipe(ialu_reg);
9073 %}
9074 
9075 // Shift Left by one
9076 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9077 %{
9078   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9079   effect(KILL cr);
9080 
9081   format %{ "salq    $dst, $shift" %}
9082   opcode(0xD1, 0x4); /* D1 /4 */
9083   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9084   ins_pipe(ialu_mem_imm);
9085 %}
9086 
9087 // Shift Left by 8-bit immediate
9088 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9089 %{
9090   match(Set dst (LShiftL dst shift));
9091   effect(KILL cr);
9092 
9093   format %{ "salq    $dst, $shift" %}
9094   opcode(0xC1, 0x4); /* C1 /4 ib */
9095   ins_encode(reg_opc_imm_wide(dst, shift));
9096   ins_pipe(ialu_reg);
9097 %}
9098 
9099 // Shift Left by 8-bit immediate
9100 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9101 %{
9102   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9103   effect(KILL cr);
9104 
9105   format %{ "salq    $dst, $shift" %}
9106   opcode(0xC1, 0x4); /* C1 /4 ib */
9107   ins_encode(REX_mem_wide(dst), OpcP,
9108              RM_opc_mem(secondary, dst), Con8or32(shift));
9109   ins_pipe(ialu_mem_imm);
9110 %}
9111 
9112 // Shift Left by variable
9113 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9114 %{
9115   match(Set dst (LShiftL dst shift));
9116   effect(KILL cr);
9117 
9118   format %{ "salq    $dst, $shift" %}
9119   opcode(0xD3, 0x4); /* D3 /4 */
9120   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9121   ins_pipe(ialu_reg_reg);
9122 %}
9123 
9124 // Shift Left by variable
9125 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9126 %{
9127   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9128   effect(KILL cr);
9129 
9130   format %{ "salq    $dst, $shift" %}
9131   opcode(0xD3, 0x4); /* D3 /4 */
9132   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9133   ins_pipe(ialu_mem_reg);
9134 %}
9135 
9136 // Arithmetic shift right by one
9137 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9138 %{
9139   match(Set dst (RShiftL dst shift));
9140   effect(KILL cr);
9141 
9142   format %{ "sarq    $dst, $shift" %}
9143   opcode(0xD1, 0x7); /* D1 /7 */
9144   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9145   ins_pipe(ialu_reg);
9146 %}
9147 
9148 // Arithmetic shift right by one
9149 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9150 %{
9151   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9152   effect(KILL cr);
9153 
9154   format %{ "sarq    $dst, $shift" %}
9155   opcode(0xD1, 0x7); /* D1 /7 */
9156   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9157   ins_pipe(ialu_mem_imm);
9158 %}
9159 
9160 // Arithmetic Shift Right by 8-bit immediate
9161 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9162 %{
9163   match(Set dst (RShiftL dst shift));
9164   effect(KILL cr);
9165 
9166   format %{ "sarq    $dst, $shift" %}
9167   opcode(0xC1, 0x7); /* C1 /7 ib */
9168   ins_encode(reg_opc_imm_wide(dst, shift));
9169   ins_pipe(ialu_mem_imm);
9170 %}
9171 
9172 // Arithmetic Shift Right by 8-bit immediate
9173 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9174 %{
9175   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9176   effect(KILL cr);
9177 
9178   format %{ "sarq    $dst, $shift" %}
9179   opcode(0xC1, 0x7); /* C1 /7 ib */
9180   ins_encode(REX_mem_wide(dst), OpcP,
9181              RM_opc_mem(secondary, dst), Con8or32(shift));
9182   ins_pipe(ialu_mem_imm);
9183 %}
9184 
9185 // Arithmetic Shift Right by variable
9186 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9187 %{
9188   match(Set dst (RShiftL dst shift));
9189   effect(KILL cr);
9190 
9191   format %{ "sarq    $dst, $shift" %}
9192   opcode(0xD3, 0x7); /* D3 /7 */
9193   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9194   ins_pipe(ialu_reg_reg);
9195 %}
9196 
9197 // Arithmetic Shift Right by variable
9198 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9199 %{
9200   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9201   effect(KILL cr);
9202 
9203   format %{ "sarq    $dst, $shift" %}
9204   opcode(0xD3, 0x7); /* D3 /7 */
9205   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9206   ins_pipe(ialu_mem_reg);
9207 %}
9208 
9209 // Logical shift right by one
9210 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9211 %{
9212   match(Set dst (URShiftL dst shift));
9213   effect(KILL cr);
9214 
9215   format %{ "shrq    $dst, $shift" %}
9216   opcode(0xD1, 0x5); /* D1 /5 */
9217   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9218   ins_pipe(ialu_reg);
9219 %}
9220 
9221 // Logical shift right by one
9222 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9223 %{
9224   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9225   effect(KILL cr);
9226 
9227   format %{ "shrq    $dst, $shift" %}
9228   opcode(0xD1, 0x5); /* D1 /5 */
9229   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9230   ins_pipe(ialu_mem_imm);
9231 %}
9232 
9233 // Logical Shift Right by 8-bit immediate
9234 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9235 %{
9236   match(Set dst (URShiftL dst shift));
9237   effect(KILL cr);
9238 
9239   format %{ "shrq    $dst, $shift" %}
9240   opcode(0xC1, 0x5); /* C1 /5 ib */
9241   ins_encode(reg_opc_imm_wide(dst, shift));
9242   ins_pipe(ialu_reg);
9243 %}
9244 
9245 
9246 // Logical Shift Right by 8-bit immediate
9247 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9248 %{
9249   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9250   effect(KILL cr);
9251 
9252   format %{ "shrq    $dst, $shift" %}
9253   opcode(0xC1, 0x5); /* C1 /5 ib */
9254   ins_encode(REX_mem_wide(dst), OpcP,
9255              RM_opc_mem(secondary, dst), Con8or32(shift));
9256   ins_pipe(ialu_mem_imm);
9257 %}
9258 
9259 // Logical Shift Right by variable
9260 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9261 %{
9262   match(Set dst (URShiftL dst shift));
9263   effect(KILL cr);
9264 
9265   format %{ "shrq    $dst, $shift" %}
9266   opcode(0xD3, 0x5); /* D3 /5 */
9267   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9268   ins_pipe(ialu_reg_reg);
9269 %}
9270 
9271 // Logical Shift Right by variable
9272 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9275   effect(KILL cr);
9276 
9277   format %{ "shrq    $dst, $shift" %}
9278   opcode(0xD3, 0x5); /* D3 /5 */
9279   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9280   ins_pipe(ialu_mem_reg);
9281 %}
9282 
9283 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9284 // This idiom is used by the compiler for the i2b bytecode.
9285 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9286 %{
9287   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9288 
9289   format %{ "movsbl  $dst, $src\t# i2b" %}
9290   opcode(0x0F, 0xBE);
9291   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9292   ins_pipe(ialu_reg_reg);
9293 %}
9294 
9295 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9296 // This idiom is used by the compiler the i2s bytecode.
9297 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9298 %{
9299   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9300 
9301   format %{ "movswl  $dst, $src\t# i2s" %}
9302   opcode(0x0F, 0xBF);
9303   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9304   ins_pipe(ialu_reg_reg);
9305 %}
9306 
9307 // ROL/ROR instructions
9308 
9309 // ROL expand
9310 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9311   effect(KILL cr, USE_DEF dst);
9312 
9313   format %{ "roll    $dst" %}
9314   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9315   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9316   ins_pipe(ialu_reg);
9317 %}
9318 
9319 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9320   effect(USE_DEF dst, USE shift, KILL cr);
9321 
9322   format %{ "roll    $dst, $shift" %}
9323   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9324   ins_encode( reg_opc_imm(dst, shift) );
9325   ins_pipe(ialu_reg);
9326 %}
9327 
9328 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9329 %{
9330   effect(USE_DEF dst, USE shift, KILL cr);
9331 
9332   format %{ "roll    $dst, $shift" %}
9333   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9334   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9335   ins_pipe(ialu_reg_reg);
9336 %}
9337 // end of ROL expand
9338 
9339 // Rotate Left by one
9340 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9341 %{
9342   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9343 
9344   expand %{
9345     rolI_rReg_imm1(dst, cr);
9346   %}
9347 %}
9348 
9349 // Rotate Left by 8-bit immediate
9350 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9351 %{
9352   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9353   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9354 
9355   expand %{
9356     rolI_rReg_imm8(dst, lshift, cr);
9357   %}
9358 %}
9359 
9360 // Rotate Left by variable
9361 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9362 %{
9363   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9364 
9365   expand %{
9366     rolI_rReg_CL(dst, shift, cr);
9367   %}
9368 %}
9369 
9370 // Rotate Left by variable
9371 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9372 %{
9373   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9374 
9375   expand %{
9376     rolI_rReg_CL(dst, shift, cr);
9377   %}
9378 %}
9379 
9380 // ROR expand
9381 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9382 %{
9383   effect(USE_DEF dst, KILL cr);
9384 
9385   format %{ "rorl    $dst" %}
9386   opcode(0xD1, 0x1); /* D1 /1 */
9387   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9388   ins_pipe(ialu_reg);
9389 %}
9390 
9391 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9392 %{
9393   effect(USE_DEF dst, USE shift, KILL cr);
9394 
9395   format %{ "rorl    $dst, $shift" %}
9396   opcode(0xC1, 0x1); /* C1 /1 ib */
9397   ins_encode(reg_opc_imm(dst, shift));
9398   ins_pipe(ialu_reg);
9399 %}
9400 
9401 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9402 %{
9403   effect(USE_DEF dst, USE shift, KILL cr);
9404 
9405   format %{ "rorl    $dst, $shift" %}
9406   opcode(0xD3, 0x1); /* D3 /1 */
9407   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9408   ins_pipe(ialu_reg_reg);
9409 %}
9410 // end of ROR expand
9411 
9412 // Rotate Right by one
9413 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9414 %{
9415   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9416 
9417   expand %{
9418     rorI_rReg_imm1(dst, cr);
9419   %}
9420 %}
9421 
9422 // Rotate Right by 8-bit immediate
9423 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9424 %{
9425   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9426   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9427 
9428   expand %{
9429     rorI_rReg_imm8(dst, rshift, cr);
9430   %}
9431 %}
9432 
9433 // Rotate Right by variable
9434 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9435 %{
9436   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9437 
9438   expand %{
9439     rorI_rReg_CL(dst, shift, cr);
9440   %}
9441 %}
9442 
9443 // Rotate Right by variable
9444 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9445 %{
9446   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9447 
9448   expand %{
9449     rorI_rReg_CL(dst, shift, cr);
9450   %}
9451 %}
9452 
9453 // for long rotate
9454 // ROL expand
9455 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9456   effect(USE_DEF dst, KILL cr);
9457 
9458   format %{ "rolq    $dst" %}
9459   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9460   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9461   ins_pipe(ialu_reg);
9462 %}
9463 
9464 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9465   effect(USE_DEF dst, USE shift, KILL cr);
9466 
9467   format %{ "rolq    $dst, $shift" %}
9468   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9469   ins_encode( reg_opc_imm_wide(dst, shift) );
9470   ins_pipe(ialu_reg);
9471 %}
9472 
9473 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9474 %{
9475   effect(USE_DEF dst, USE shift, KILL cr);
9476 
9477   format %{ "rolq    $dst, $shift" %}
9478   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9479   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9480   ins_pipe(ialu_reg_reg);
9481 %}
9482 // end of ROL expand
9483 
9484 // Rotate Left by one
9485 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9486 %{
9487   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9488 
9489   expand %{
9490     rolL_rReg_imm1(dst, cr);
9491   %}
9492 %}
9493 
9494 // Rotate Left by 8-bit immediate
9495 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9496 %{
9497   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9498   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9499 
9500   expand %{
9501     rolL_rReg_imm8(dst, lshift, cr);
9502   %}
9503 %}
9504 
9505 // Rotate Left by variable
9506 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9507 %{
9508   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9509 
9510   expand %{
9511     rolL_rReg_CL(dst, shift, cr);
9512   %}
9513 %}
9514 
9515 // Rotate Left by variable
9516 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9517 %{
9518   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9519 
9520   expand %{
9521     rolL_rReg_CL(dst, shift, cr);
9522   %}
9523 %}
9524 
9525 // ROR expand
9526 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9527 %{
9528   effect(USE_DEF dst, KILL cr);
9529 
9530   format %{ "rorq    $dst" %}
9531   opcode(0xD1, 0x1); /* D1 /1 */
9532   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9533   ins_pipe(ialu_reg);
9534 %}
9535 
9536 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9537 %{
9538   effect(USE_DEF dst, USE shift, KILL cr);
9539 
9540   format %{ "rorq    $dst, $shift" %}
9541   opcode(0xC1, 0x1); /* C1 /1 ib */
9542   ins_encode(reg_opc_imm_wide(dst, shift));
9543   ins_pipe(ialu_reg);
9544 %}
9545 
9546 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9547 %{
9548   effect(USE_DEF dst, USE shift, KILL cr);
9549 
9550   format %{ "rorq    $dst, $shift" %}
9551   opcode(0xD3, 0x1); /* D3 /1 */
9552   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9553   ins_pipe(ialu_reg_reg);
9554 %}
9555 // end of ROR expand
9556 
9557 // Rotate Right by one
9558 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9559 %{
9560   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9561 
9562   expand %{
9563     rorL_rReg_imm1(dst, cr);
9564   %}
9565 %}
9566 
9567 // Rotate Right by 8-bit immediate
9568 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9569 %{
9570   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9571   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9572 
9573   expand %{
9574     rorL_rReg_imm8(dst, rshift, cr);
9575   %}
9576 %}
9577 
9578 // Rotate Right by variable
9579 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9580 %{
9581   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9582 
9583   expand %{
9584     rorL_rReg_CL(dst, shift, cr);
9585   %}
9586 %}
9587 
9588 // Rotate Right by variable
9589 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9590 %{
9591   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9592 
9593   expand %{
9594     rorL_rReg_CL(dst, shift, cr);
9595   %}
9596 %}
9597 
9598 // Logical Instructions
9599 
9600 // Integer Logical Instructions
9601 
9602 // And Instructions
9603 // And Register with Register
9604 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9605 %{
9606   match(Set dst (AndI dst src));
9607   effect(KILL cr);
9608 
9609   format %{ "andl    $dst, $src\t# int" %}
9610   opcode(0x23);
9611   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9612   ins_pipe(ialu_reg_reg);
9613 %}
9614 
9615 // And Register with Immediate 255
9616 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9617 %{
9618   match(Set dst (AndI dst src));
9619 
9620   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9621   opcode(0x0F, 0xB6);
9622   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9623   ins_pipe(ialu_reg);
9624 %}
9625 
9626 // And Register with Immediate 255 and promote to long
9627 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9628 %{
9629   match(Set dst (ConvI2L (AndI src mask)));
9630 
9631   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9632   opcode(0x0F, 0xB6);
9633   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9634   ins_pipe(ialu_reg);
9635 %}
9636 
9637 // And Register with Immediate 65535
9638 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9639 %{
9640   match(Set dst (AndI dst src));
9641 
9642   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9643   opcode(0x0F, 0xB7);
9644   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9645   ins_pipe(ialu_reg);
9646 %}
9647 
9648 // And Register with Immediate 65535 and promote to long
9649 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9650 %{
9651   match(Set dst (ConvI2L (AndI src mask)));
9652 
9653   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9654   opcode(0x0F, 0xB7);
9655   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9656   ins_pipe(ialu_reg);
9657 %}
9658 
9659 // And Register with Immediate
9660 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9661 %{
9662   match(Set dst (AndI dst src));
9663   effect(KILL cr);
9664 
9665   format %{ "andl    $dst, $src\t# int" %}
9666   opcode(0x81, 0x04); /* Opcode 81 /4 */
9667   ins_encode(OpcSErm(dst, src), Con8or32(src));
9668   ins_pipe(ialu_reg);
9669 %}
9670 
9671 // And Register with Memory
9672 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9673 %{
9674   match(Set dst (AndI dst (LoadI src)));
9675   effect(KILL cr);
9676 
9677   ins_cost(125);
9678   format %{ "andl    $dst, $src\t# int" %}
9679   opcode(0x23);
9680   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9681   ins_pipe(ialu_reg_mem);
9682 %}
9683 
9684 // And Memory with Register
9685 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9686 %{
9687   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9688   effect(KILL cr);
9689 
9690   ins_cost(150);
9691   format %{ "andl    $dst, $src\t# int" %}
9692   opcode(0x21); /* Opcode 21 /r */
9693   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9694   ins_pipe(ialu_mem_reg);
9695 %}
9696 
9697 // And Memory with Immediate
9698 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9699 %{
9700   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9701   effect(KILL cr);
9702 
9703   ins_cost(125);
9704   format %{ "andl    $dst, $src\t# int" %}
9705   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9706   ins_encode(REX_mem(dst), OpcSE(src),
9707              RM_opc_mem(secondary, dst), Con8or32(src));
9708   ins_pipe(ialu_mem_imm);
9709 %}
9710 
9711 // Or Instructions
9712 // Or Register with Register
9713 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9714 %{
9715   match(Set dst (OrI dst src));
9716   effect(KILL cr);
9717 
9718   format %{ "orl     $dst, $src\t# int" %}
9719   opcode(0x0B);
9720   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9721   ins_pipe(ialu_reg_reg);
9722 %}
9723 
9724 // Or Register with Immediate
9725 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9726 %{
9727   match(Set dst (OrI dst src));
9728   effect(KILL cr);
9729 
9730   format %{ "orl     $dst, $src\t# int" %}
9731   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9732   ins_encode(OpcSErm(dst, src), Con8or32(src));
9733   ins_pipe(ialu_reg);
9734 %}
9735 
9736 // Or Register with Memory
9737 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9738 %{
9739   match(Set dst (OrI dst (LoadI src)));
9740   effect(KILL cr);
9741 
9742   ins_cost(125);
9743   format %{ "orl     $dst, $src\t# int" %}
9744   opcode(0x0B);
9745   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9746   ins_pipe(ialu_reg_mem);
9747 %}
9748 
9749 // Or Memory with Register
9750 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9751 %{
9752   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9753   effect(KILL cr);
9754 
9755   ins_cost(150);
9756   format %{ "orl     $dst, $src\t# int" %}
9757   opcode(0x09); /* Opcode 09 /r */
9758   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9759   ins_pipe(ialu_mem_reg);
9760 %}
9761 
9762 // Or Memory with Immediate
9763 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9764 %{
9765   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9766   effect(KILL cr);
9767 
9768   ins_cost(125);
9769   format %{ "orl     $dst, $src\t# int" %}
9770   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9771   ins_encode(REX_mem(dst), OpcSE(src),
9772              RM_opc_mem(secondary, dst), Con8or32(src));
9773   ins_pipe(ialu_mem_imm);
9774 %}
9775 
9776 // Xor Instructions
9777 // Xor Register with Register
9778 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9779 %{
9780   match(Set dst (XorI dst src));
9781   effect(KILL cr);
9782 
9783   format %{ "xorl    $dst, $src\t# int" %}
9784   opcode(0x33);
9785   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9786   ins_pipe(ialu_reg_reg);
9787 %}
9788 
9789 // Xor Register with Immediate -1
9790 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9791   match(Set dst (XorI dst imm));
9792 
9793   format %{ "not    $dst" %}
9794   ins_encode %{
9795      __ notl($dst$$Register);
9796   %}
9797   ins_pipe(ialu_reg);
9798 %}
9799 
9800 // Xor Register with Immediate
9801 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9802 %{
9803   match(Set dst (XorI dst src));
9804   effect(KILL cr);
9805 
9806   format %{ "xorl    $dst, $src\t# int" %}
9807   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9808   ins_encode(OpcSErm(dst, src), Con8or32(src));
9809   ins_pipe(ialu_reg);
9810 %}
9811 
9812 // Xor Register with Memory
9813 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9814 %{
9815   match(Set dst (XorI dst (LoadI src)));
9816   effect(KILL cr);
9817 
9818   ins_cost(125);
9819   format %{ "xorl    $dst, $src\t# int" %}
9820   opcode(0x33);
9821   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9822   ins_pipe(ialu_reg_mem);
9823 %}
9824 
9825 // Xor Memory with Register
9826 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9827 %{
9828   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9829   effect(KILL cr);
9830 
9831   ins_cost(150);
9832   format %{ "xorl    $dst, $src\t# int" %}
9833   opcode(0x31); /* Opcode 31 /r */
9834   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9835   ins_pipe(ialu_mem_reg);
9836 %}
9837 
9838 // Xor Memory with Immediate
9839 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9840 %{
9841   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9842   effect(KILL cr);
9843 
9844   ins_cost(125);
9845   format %{ "xorl    $dst, $src\t# int" %}
9846   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9847   ins_encode(REX_mem(dst), OpcSE(src),
9848              RM_opc_mem(secondary, dst), Con8or32(src));
9849   ins_pipe(ialu_mem_imm);
9850 %}
9851 
9852 
9853 // Long Logical Instructions
9854 
9855 // And Instructions
9856 // And Register with Register
9857 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9858 %{
9859   match(Set dst (AndL dst src));
9860   effect(KILL cr);
9861 
9862   format %{ "andq    $dst, $src\t# long" %}
9863   opcode(0x23);
9864   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9865   ins_pipe(ialu_reg_reg);
9866 %}
9867 
9868 // And Register with Immediate 255
9869 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9870 %{
9871   match(Set dst (AndL dst src));
9872 
9873   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9874   opcode(0x0F, 0xB6);
9875   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9876   ins_pipe(ialu_reg);
9877 %}
9878 
9879 // And Register with Immediate 65535
9880 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9881 %{
9882   match(Set dst (AndL dst src));
9883 
9884   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9885   opcode(0x0F, 0xB7);
9886   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9887   ins_pipe(ialu_reg);
9888 %}
9889 
9890 // And Register with Immediate
9891 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9892 %{
9893   match(Set dst (AndL dst src));
9894   effect(KILL cr);
9895 
9896   format %{ "andq    $dst, $src\t# long" %}
9897   opcode(0x81, 0x04); /* Opcode 81 /4 */
9898   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9899   ins_pipe(ialu_reg);
9900 %}
9901 
9902 // And Register with Memory
9903 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9904 %{
9905   match(Set dst (AndL dst (LoadL src)));
9906   effect(KILL cr);
9907 
9908   ins_cost(125);
9909   format %{ "andq    $dst, $src\t# long" %}
9910   opcode(0x23);
9911   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9912   ins_pipe(ialu_reg_mem);
9913 %}
9914 
9915 // And Memory with Register
9916 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9917 %{
9918   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9919   effect(KILL cr);
9920 
9921   ins_cost(150);
9922   format %{ "andq    $dst, $src\t# long" %}
9923   opcode(0x21); /* Opcode 21 /r */
9924   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9925   ins_pipe(ialu_mem_reg);
9926 %}
9927 
9928 // And Memory with Immediate
9929 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9930 %{
9931   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9932   effect(KILL cr);
9933 
9934   ins_cost(125);
9935   format %{ "andq    $dst, $src\t# long" %}
9936   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9937   ins_encode(REX_mem_wide(dst), OpcSE(src),
9938              RM_opc_mem(secondary, dst), Con8or32(src));
9939   ins_pipe(ialu_mem_imm);
9940 %}
9941 
9942 // Or Instructions
9943 // Or Register with Register
9944 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9945 %{
9946   match(Set dst (OrL dst src));
9947   effect(KILL cr);
9948 
9949   format %{ "orq     $dst, $src\t# long" %}
9950   opcode(0x0B);
9951   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9952   ins_pipe(ialu_reg_reg);
9953 %}
9954 
9955 // Use any_RegP to match R15 (TLS register) without spilling.
9956 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9957   match(Set dst (OrL dst (CastP2X src)));
9958   effect(KILL cr);
9959 
9960   format %{ "orq     $dst, $src\t# long" %}
9961   opcode(0x0B);
9962   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9963   ins_pipe(ialu_reg_reg);
9964 %}
9965 
9966 
9967 // Or Register with Immediate
9968 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9969 %{
9970   match(Set dst (OrL dst src));
9971   effect(KILL cr);
9972 
9973   format %{ "orq     $dst, $src\t# long" %}
9974   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9975   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9976   ins_pipe(ialu_reg);
9977 %}
9978 
9979 // Or Register with Memory
9980 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9981 %{
9982   match(Set dst (OrL dst (LoadL src)));
9983   effect(KILL cr);
9984 
9985   ins_cost(125);
9986   format %{ "orq     $dst, $src\t# long" %}
9987   opcode(0x0B);
9988   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9989   ins_pipe(ialu_reg_mem);
9990 %}
9991 
9992 // Or Memory with Register
9993 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9994 %{
9995   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9996   effect(KILL cr);
9997 
9998   ins_cost(150);
9999   format %{ "orq     $dst, $src\t# long" %}
10000   opcode(0x09); /* Opcode 09 /r */
10001   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10002   ins_pipe(ialu_mem_reg);
10003 %}
10004 
10005 // Or Memory with Immediate
10006 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10007 %{
10008   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10009   effect(KILL cr);
10010 
10011   ins_cost(125);
10012   format %{ "orq     $dst, $src\t# long" %}
10013   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10014   ins_encode(REX_mem_wide(dst), OpcSE(src),
10015              RM_opc_mem(secondary, dst), Con8or32(src));
10016   ins_pipe(ialu_mem_imm);
10017 %}
10018 
10019 // Xor Instructions
10020 // Xor Register with Register
10021 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10022 %{
10023   match(Set dst (XorL dst src));
10024   effect(KILL cr);
10025 
10026   format %{ "xorq    $dst, $src\t# long" %}
10027   opcode(0x33);
10028   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10029   ins_pipe(ialu_reg_reg);
10030 %}
10031 
10032 // Xor Register with Immediate -1
10033 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10034   match(Set dst (XorL dst imm));
10035 
10036   format %{ "notq   $dst" %}
10037   ins_encode %{
10038      __ notq($dst$$Register);
10039   %}
10040   ins_pipe(ialu_reg);
10041 %}
10042 
10043 // Xor Register with Immediate
10044 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10045 %{
10046   match(Set dst (XorL dst src));
10047   effect(KILL cr);
10048 
10049   format %{ "xorq    $dst, $src\t# long" %}
10050   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10051   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10052   ins_pipe(ialu_reg);
10053 %}
10054 
10055 // Xor Register with Memory
10056 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10057 %{
10058   match(Set dst (XorL dst (LoadL src)));
10059   effect(KILL cr);
10060 
10061   ins_cost(125);
10062   format %{ "xorq    $dst, $src\t# long" %}
10063   opcode(0x33);
10064   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10065   ins_pipe(ialu_reg_mem);
10066 %}
10067 
10068 // Xor Memory with Register
10069 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10070 %{
10071   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10072   effect(KILL cr);
10073 
10074   ins_cost(150);
10075   format %{ "xorq    $dst, $src\t# long" %}
10076   opcode(0x31); /* Opcode 31 /r */
10077   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10078   ins_pipe(ialu_mem_reg);
10079 %}
10080 
10081 // Xor Memory with Immediate
10082 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10083 %{
10084   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10085   effect(KILL cr);
10086 
10087   ins_cost(125);
10088   format %{ "xorq    $dst, $src\t# long" %}
10089   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10090   ins_encode(REX_mem_wide(dst), OpcSE(src),
10091              RM_opc_mem(secondary, dst), Con8or32(src));
10092   ins_pipe(ialu_mem_imm);
10093 %}
10094 
10095 // Convert Int to Boolean
10096 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10097 %{
10098   match(Set dst (Conv2B src));
10099   effect(KILL cr);
10100 
10101   format %{ "testl   $src, $src\t# ci2b\n\t"
10102             "setnz   $dst\n\t"
10103             "movzbl  $dst, $dst" %}
10104   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10105              setNZ_reg(dst),
10106              REX_reg_breg(dst, dst), // movzbl
10107              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10108   ins_pipe(pipe_slow); // XXX
10109 %}
10110 
10111 // Convert Pointer to Boolean
10112 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10113 %{
10114   match(Set dst (Conv2B src));
10115   effect(KILL cr);
10116 
10117   format %{ "testq   $src, $src\t# cp2b\n\t"
10118             "setnz   $dst\n\t"
10119             "movzbl  $dst, $dst" %}
10120   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10121              setNZ_reg(dst),
10122              REX_reg_breg(dst, dst), // movzbl
10123              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10124   ins_pipe(pipe_slow); // XXX
10125 %}
10126 
10127 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10128 %{
10129   match(Set dst (CmpLTMask p q));
10130   effect(KILL cr);
10131 
10132   ins_cost(400); // XXX
10133   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10134             "setlt   $dst\n\t"
10135             "movzbl  $dst, $dst\n\t"
10136             "negl    $dst" %}
10137   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10138              setLT_reg(dst),
10139              REX_reg_breg(dst, dst), // movzbl
10140              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10141              neg_reg(dst));
10142   ins_pipe(pipe_slow);
10143 %}
10144 
10145 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10146 %{
10147   match(Set dst (CmpLTMask dst zero));
10148   effect(KILL cr);
10149 
10150   ins_cost(100); // XXX
10151   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10152   opcode(0xC1, 0x7);  /* C1 /7 ib */
10153   ins_encode(reg_opc_imm(dst, 0x1F));
10154   ins_pipe(ialu_reg);
10155 %}
10156 
10157 
10158 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
10159 %{
10160   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10161   effect(TEMP tmp, KILL cr);
10162 
10163   ins_cost(400); // XXX
10164   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10165             "sbbl    $tmp, $tmp\n\t"
10166             "andl    $tmp, $y\n\t"
10167             "addl    $p, $tmp" %}
10168   ins_encode %{
10169     Register Rp = $p$$Register;
10170     Register Rq = $q$$Register;
10171     Register Ry = $y$$Register;
10172     Register Rt = $tmp$$Register;
10173     __ subl(Rp, Rq);
10174     __ sbbl(Rt, Rt);
10175     __ andl(Rt, Ry);
10176     __ addl(Rp, Rt);
10177   %}
10178   ins_pipe(pipe_cmplt);
10179 %}
10180 
10181 //---------- FP Instructions------------------------------------------------
10182 
10183 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10184 %{
10185   match(Set cr (CmpF src1 src2));
10186 
10187   ins_cost(145);
10188   format %{ "ucomiss $src1, $src2\n\t"
10189             "jnp,s   exit\n\t"
10190             "pushfq\t# saw NaN, set CF\n\t"
10191             "andq    [rsp], #0xffffff2b\n\t"
10192             "popfq\n"
10193     "exit:   nop\t# avoid branch to branch" %}
10194   opcode(0x0F, 0x2E);
10195   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10196              cmpfp_fixup);
10197   ins_pipe(pipe_slow);
10198 %}
10199 
10200 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10201   match(Set cr (CmpF src1 src2));
10202 
10203   ins_cost(145);
10204   format %{ "ucomiss $src1, $src2" %}
10205   ins_encode %{
10206     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10207   %}
10208   ins_pipe(pipe_slow);
10209 %}
10210 
10211 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10212 %{
10213   match(Set cr (CmpF src1 (LoadF src2)));
10214 
10215   ins_cost(145);
10216   format %{ "ucomiss $src1, $src2\n\t"
10217             "jnp,s   exit\n\t"
10218             "pushfq\t# saw NaN, set CF\n\t"
10219             "andq    [rsp], #0xffffff2b\n\t"
10220             "popfq\n"
10221     "exit:   nop\t# avoid branch to branch" %}
10222   opcode(0x0F, 0x2E);
10223   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10224              cmpfp_fixup);
10225   ins_pipe(pipe_slow);
10226 %}
10227 
10228 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10229   match(Set cr (CmpF src1 (LoadF src2)));
10230 
10231   ins_cost(100);
10232   format %{ "ucomiss $src1, $src2" %}
10233   opcode(0x0F, 0x2E);
10234   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10235   ins_pipe(pipe_slow);
10236 %}
10237 
10238 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10239   match(Set cr (CmpF src con));
10240 
10241   ins_cost(145);
10242   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10243             "jnp,s   exit\n\t"
10244             "pushfq\t# saw NaN, set CF\n\t"
10245             "andq    [rsp], #0xffffff2b\n\t"
10246             "popfq\n"
10247     "exit:   nop\t# avoid branch to branch" %}
10248   ins_encode %{
10249     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10250     emit_cmpfp_fixup(_masm);
10251   %}
10252   ins_pipe(pipe_slow);
10253 %}
10254 
10255 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10256   match(Set cr (CmpF src con));
10257   ins_cost(100);
10258   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10259   ins_encode %{
10260     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10261   %}
10262   ins_pipe(pipe_slow);
10263 %}
10264 
10265 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10266 %{
10267   match(Set cr (CmpD src1 src2));
10268 
10269   ins_cost(145);
10270   format %{ "ucomisd $src1, $src2\n\t"
10271             "jnp,s   exit\n\t"
10272             "pushfq\t# saw NaN, set CF\n\t"
10273             "andq    [rsp], #0xffffff2b\n\t"
10274             "popfq\n"
10275     "exit:   nop\t# avoid branch to branch" %}
10276   opcode(0x66, 0x0F, 0x2E);
10277   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10278              cmpfp_fixup);
10279   ins_pipe(pipe_slow);
10280 %}
10281 
10282 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10283   match(Set cr (CmpD src1 src2));
10284 
10285   ins_cost(100);
10286   format %{ "ucomisd $src1, $src2 test" %}
10287   ins_encode %{
10288     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10289   %}
10290   ins_pipe(pipe_slow);
10291 %}
10292 
10293 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10294 %{
10295   match(Set cr (CmpD src1 (LoadD src2)));
10296 
10297   ins_cost(145);
10298   format %{ "ucomisd $src1, $src2\n\t"
10299             "jnp,s   exit\n\t"
10300             "pushfq\t# saw NaN, set CF\n\t"
10301             "andq    [rsp], #0xffffff2b\n\t"
10302             "popfq\n"
10303     "exit:   nop\t# avoid branch to branch" %}
10304   opcode(0x66, 0x0F, 0x2E);
10305   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10306              cmpfp_fixup);
10307   ins_pipe(pipe_slow);
10308 %}
10309 
10310 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10311   match(Set cr (CmpD src1 (LoadD src2)));
10312 
10313   ins_cost(100);
10314   format %{ "ucomisd $src1, $src2" %}
10315   opcode(0x66, 0x0F, 0x2E);
10316   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10317   ins_pipe(pipe_slow);
10318 %}
10319 
10320 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10321   match(Set cr (CmpD src con));
10322 
10323   ins_cost(145);
10324   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10325             "jnp,s   exit\n\t"
10326             "pushfq\t# saw NaN, set CF\n\t"
10327             "andq    [rsp], #0xffffff2b\n\t"
10328             "popfq\n"
10329     "exit:   nop\t# avoid branch to branch" %}
10330   ins_encode %{
10331     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10332     emit_cmpfp_fixup(_masm);
10333   %}
10334   ins_pipe(pipe_slow);
10335 %}
10336 
10337 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10338   match(Set cr (CmpD src con));
10339   ins_cost(100);
10340   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10341   ins_encode %{
10342     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10343   %}
10344   ins_pipe(pipe_slow);
10345 %}
10346 
10347 // Compare into -1,0,1
10348 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10349 %{
10350   match(Set dst (CmpF3 src1 src2));
10351   effect(KILL cr);
10352 
10353   ins_cost(275);
10354   format %{ "ucomiss $src1, $src2\n\t"
10355             "movl    $dst, #-1\n\t"
10356             "jp,s    done\n\t"
10357             "jb,s    done\n\t"
10358             "setne   $dst\n\t"
10359             "movzbl  $dst, $dst\n"
10360     "done:" %}
10361 
10362   opcode(0x0F, 0x2E);
10363   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10364              cmpfp3(dst));
10365   ins_pipe(pipe_slow);
10366 %}
10367 
10368 // Compare into -1,0,1
10369 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10370 %{
10371   match(Set dst (CmpF3 src1 (LoadF src2)));
10372   effect(KILL cr);
10373 
10374   ins_cost(275);
10375   format %{ "ucomiss $src1, $src2\n\t"
10376             "movl    $dst, #-1\n\t"
10377             "jp,s    done\n\t"
10378             "jb,s    done\n\t"
10379             "setne   $dst\n\t"
10380             "movzbl  $dst, $dst\n"
10381     "done:" %}
10382 
10383   opcode(0x0F, 0x2E);
10384   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10385              cmpfp3(dst));
10386   ins_pipe(pipe_slow);
10387 %}
10388 
10389 // Compare into -1,0,1
10390 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10391   match(Set dst (CmpF3 src con));
10392   effect(KILL cr);
10393 
10394   ins_cost(275);
10395   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10396             "movl    $dst, #-1\n\t"
10397             "jp,s    done\n\t"
10398             "jb,s    done\n\t"
10399             "setne   $dst\n\t"
10400             "movzbl  $dst, $dst\n"
10401     "done:" %}
10402   ins_encode %{
10403     Label L_done;
10404     Register Rdst = $dst$$Register;
10405     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10406     __ movl(Rdst, -1);
10407     __ jcc(Assembler::parity, L_done);
10408     __ jcc(Assembler::below, L_done);
10409     __ setb(Assembler::notEqual, Rdst);
10410     __ movzbl(Rdst, Rdst);
10411     __ bind(L_done);
10412   %}
10413   ins_pipe(pipe_slow);
10414 %}
10415 
10416 // Compare into -1,0,1
10417 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10418 %{
10419   match(Set dst (CmpD3 src1 src2));
10420   effect(KILL cr);
10421 
10422   ins_cost(275);
10423   format %{ "ucomisd $src1, $src2\n\t"
10424             "movl    $dst, #-1\n\t"
10425             "jp,s    done\n\t"
10426             "jb,s    done\n\t"
10427             "setne   $dst\n\t"
10428             "movzbl  $dst, $dst\n"
10429     "done:" %}
10430 
10431   opcode(0x66, 0x0F, 0x2E);
10432   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10433              cmpfp3(dst));
10434   ins_pipe(pipe_slow);
10435 %}
10436 
10437 // Compare into -1,0,1
10438 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10439 %{
10440   match(Set dst (CmpD3 src1 (LoadD src2)));
10441   effect(KILL cr);
10442 
10443   ins_cost(275);
10444   format %{ "ucomisd $src1, $src2\n\t"
10445             "movl    $dst, #-1\n\t"
10446             "jp,s    done\n\t"
10447             "jb,s    done\n\t"
10448             "setne   $dst\n\t"
10449             "movzbl  $dst, $dst\n"
10450     "done:" %}
10451 
10452   opcode(0x66, 0x0F, 0x2E);
10453   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10454              cmpfp3(dst));
10455   ins_pipe(pipe_slow);
10456 %}
10457 
10458 // Compare into -1,0,1
10459 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10460   match(Set dst (CmpD3 src con));
10461   effect(KILL cr);
10462 
10463   ins_cost(275);
10464   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10465             "movl    $dst, #-1\n\t"
10466             "jp,s    done\n\t"
10467             "jb,s    done\n\t"
10468             "setne   $dst\n\t"
10469             "movzbl  $dst, $dst\n"
10470     "done:" %}
10471   ins_encode %{
10472     Register Rdst = $dst$$Register;
10473     Label L_done;
10474     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10475     __ movl(Rdst, -1);
10476     __ jcc(Assembler::parity, L_done);
10477     __ jcc(Assembler::below, L_done);
10478     __ setb(Assembler::notEqual, Rdst);
10479     __ movzbl(Rdst, Rdst);
10480     __ bind(L_done);
10481   %}
10482   ins_pipe(pipe_slow);
10483 %}
10484 
10485 instruct addF_reg(regF dst, regF src)
10486 %{
10487   match(Set dst (AddF dst src));
10488 
10489   format %{ "addss   $dst, $src" %}
10490   ins_cost(150); // XXX
10491   opcode(0xF3, 0x0F, 0x58);
10492   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10493   ins_pipe(pipe_slow);
10494 %}
10495 
10496 instruct addF_mem(regF dst, memory src)
10497 %{
10498   match(Set dst (AddF dst (LoadF src)));
10499 
10500   format %{ "addss   $dst, $src" %}
10501   ins_cost(150); // XXX
10502   opcode(0xF3, 0x0F, 0x58);
10503   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10504   ins_pipe(pipe_slow);
10505 %}
10506 
10507 instruct addF_imm(regF dst, immF con) %{
10508   match(Set dst (AddF dst con));
10509   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10510   ins_cost(150); // XXX
10511   ins_encode %{
10512     __ addss($dst$$XMMRegister, $constantaddress($con));
10513   %}
10514   ins_pipe(pipe_slow);
10515 %}
10516 
10517 instruct addD_reg(regD dst, regD src)
10518 %{
10519   match(Set dst (AddD dst src));
10520 
10521   format %{ "addsd   $dst, $src" %}
10522   ins_cost(150); // XXX
10523   opcode(0xF2, 0x0F, 0x58);
10524   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10525   ins_pipe(pipe_slow);
10526 %}
10527 
10528 instruct addD_mem(regD dst, memory src)
10529 %{
10530   match(Set dst (AddD dst (LoadD src)));
10531 
10532   format %{ "addsd   $dst, $src" %}
10533   ins_cost(150); // XXX
10534   opcode(0xF2, 0x0F, 0x58);
10535   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10536   ins_pipe(pipe_slow);
10537 %}
10538 
10539 instruct addD_imm(regD dst, immD con) %{
10540   match(Set dst (AddD dst con));
10541   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10542   ins_cost(150); // XXX
10543   ins_encode %{
10544     __ addsd($dst$$XMMRegister, $constantaddress($con));
10545   %}
10546   ins_pipe(pipe_slow);
10547 %}
10548 
10549 instruct subF_reg(regF dst, regF src)
10550 %{
10551   match(Set dst (SubF dst src));
10552 
10553   format %{ "subss   $dst, $src" %}
10554   ins_cost(150); // XXX
10555   opcode(0xF3, 0x0F, 0x5C);
10556   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10557   ins_pipe(pipe_slow);
10558 %}
10559 
10560 instruct subF_mem(regF dst, memory src)
10561 %{
10562   match(Set dst (SubF dst (LoadF src)));
10563 
10564   format %{ "subss   $dst, $src" %}
10565   ins_cost(150); // XXX
10566   opcode(0xF3, 0x0F, 0x5C);
10567   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10568   ins_pipe(pipe_slow);
10569 %}
10570 
10571 instruct subF_imm(regF dst, immF con) %{
10572   match(Set dst (SubF dst con));
10573   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10574   ins_cost(150); // XXX
10575   ins_encode %{
10576     __ subss($dst$$XMMRegister, $constantaddress($con));
10577   %}
10578   ins_pipe(pipe_slow);
10579 %}
10580 
10581 instruct subD_reg(regD dst, regD src)
10582 %{
10583   match(Set dst (SubD dst src));
10584 
10585   format %{ "subsd   $dst, $src" %}
10586   ins_cost(150); // XXX
10587   opcode(0xF2, 0x0F, 0x5C);
10588   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10589   ins_pipe(pipe_slow);
10590 %}
10591 
10592 instruct subD_mem(regD dst, memory src)
10593 %{
10594   match(Set dst (SubD dst (LoadD src)));
10595 
10596   format %{ "subsd   $dst, $src" %}
10597   ins_cost(150); // XXX
10598   opcode(0xF2, 0x0F, 0x5C);
10599   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10600   ins_pipe(pipe_slow);
10601 %}
10602 
10603 instruct subD_imm(regD dst, immD con) %{
10604   match(Set dst (SubD dst con));
10605   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10606   ins_cost(150); // XXX
10607   ins_encode %{
10608     __ subsd($dst$$XMMRegister, $constantaddress($con));
10609   %}
10610   ins_pipe(pipe_slow);
10611 %}
10612 
10613 instruct mulF_reg(regF dst, regF src)
10614 %{
10615   match(Set dst (MulF dst src));
10616 
10617   format %{ "mulss   $dst, $src" %}
10618   ins_cost(150); // XXX
10619   opcode(0xF3, 0x0F, 0x59);
10620   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10621   ins_pipe(pipe_slow);
10622 %}
10623 
10624 instruct mulF_mem(regF dst, memory src)
10625 %{
10626   match(Set dst (MulF dst (LoadF src)));
10627 
10628   format %{ "mulss   $dst, $src" %}
10629   ins_cost(150); // XXX
10630   opcode(0xF3, 0x0F, 0x59);
10631   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10632   ins_pipe(pipe_slow);
10633 %}
10634 
10635 instruct mulF_imm(regF dst, immF con) %{
10636   match(Set dst (MulF dst con));
10637   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10638   ins_cost(150); // XXX
10639   ins_encode %{
10640     __ mulss($dst$$XMMRegister, $constantaddress($con));
10641   %}
10642   ins_pipe(pipe_slow);
10643 %}
10644 
10645 instruct mulD_reg(regD dst, regD src)
10646 %{
10647   match(Set dst (MulD dst src));
10648 
10649   format %{ "mulsd   $dst, $src" %}
10650   ins_cost(150); // XXX
10651   opcode(0xF2, 0x0F, 0x59);
10652   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10653   ins_pipe(pipe_slow);
10654 %}
10655 
10656 instruct mulD_mem(regD dst, memory src)
10657 %{
10658   match(Set dst (MulD dst (LoadD src)));
10659 
10660   format %{ "mulsd   $dst, $src" %}
10661   ins_cost(150); // XXX
10662   opcode(0xF2, 0x0F, 0x59);
10663   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10664   ins_pipe(pipe_slow);
10665 %}
10666 
10667 instruct mulD_imm(regD dst, immD con) %{
10668   match(Set dst (MulD dst con));
10669   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10670   ins_cost(150); // XXX
10671   ins_encode %{
10672     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10673   %}
10674   ins_pipe(pipe_slow);
10675 %}
10676 
10677 instruct divF_reg(regF dst, regF src)
10678 %{
10679   match(Set dst (DivF dst src));
10680 
10681   format %{ "divss   $dst, $src" %}
10682   ins_cost(150); // XXX
10683   opcode(0xF3, 0x0F, 0x5E);
10684   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10685   ins_pipe(pipe_slow);
10686 %}
10687 
10688 instruct divF_mem(regF dst, memory src)
10689 %{
10690   match(Set dst (DivF dst (LoadF src)));
10691 
10692   format %{ "divss   $dst, $src" %}
10693   ins_cost(150); // XXX
10694   opcode(0xF3, 0x0F, 0x5E);
10695   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10696   ins_pipe(pipe_slow);
10697 %}
10698 
10699 instruct divF_imm(regF dst, immF con) %{
10700   match(Set dst (DivF dst con));
10701   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10702   ins_cost(150); // XXX
10703   ins_encode %{
10704     __ divss($dst$$XMMRegister, $constantaddress($con));
10705   %}
10706   ins_pipe(pipe_slow);
10707 %}
10708 
10709 instruct divD_reg(regD dst, regD src)
10710 %{
10711   match(Set dst (DivD dst src));
10712 
10713   format %{ "divsd   $dst, $src" %}
10714   ins_cost(150); // XXX
10715   opcode(0xF2, 0x0F, 0x5E);
10716   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10717   ins_pipe(pipe_slow);
10718 %}
10719 
10720 instruct divD_mem(regD dst, memory src)
10721 %{
10722   match(Set dst (DivD dst (LoadD src)));
10723 
10724   format %{ "divsd   $dst, $src" %}
10725   ins_cost(150); // XXX
10726   opcode(0xF2, 0x0F, 0x5E);
10727   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10728   ins_pipe(pipe_slow);
10729 %}
10730 
10731 instruct divD_imm(regD dst, immD con) %{
10732   match(Set dst (DivD dst con));
10733   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10734   ins_cost(150); // XXX
10735   ins_encode %{
10736     __ divsd($dst$$XMMRegister, $constantaddress($con));
10737   %}
10738   ins_pipe(pipe_slow);
10739 %}
10740 
10741 instruct sqrtF_reg(regF dst, regF src)
10742 %{
10743   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10744 
10745   format %{ "sqrtss  $dst, $src" %}
10746   ins_cost(150); // XXX
10747   opcode(0xF3, 0x0F, 0x51);
10748   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10749   ins_pipe(pipe_slow);
10750 %}
10751 
10752 instruct sqrtF_mem(regF dst, memory src)
10753 %{
10754   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10755 
10756   format %{ "sqrtss  $dst, $src" %}
10757   ins_cost(150); // XXX
10758   opcode(0xF3, 0x0F, 0x51);
10759   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10760   ins_pipe(pipe_slow);
10761 %}
10762 
10763 instruct sqrtF_imm(regF dst, immF con) %{
10764   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10765   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10766   ins_cost(150); // XXX
10767   ins_encode %{
10768     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10769   %}
10770   ins_pipe(pipe_slow);
10771 %}
10772 
10773 instruct sqrtD_reg(regD dst, regD src)
10774 %{
10775   match(Set dst (SqrtD src));
10776 
10777   format %{ "sqrtsd  $dst, $src" %}
10778   ins_cost(150); // XXX
10779   opcode(0xF2, 0x0F, 0x51);
10780   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10781   ins_pipe(pipe_slow);
10782 %}
10783 
10784 instruct sqrtD_mem(regD dst, memory src)
10785 %{
10786   match(Set dst (SqrtD (LoadD src)));
10787 
10788   format %{ "sqrtsd  $dst, $src" %}
10789   ins_cost(150); // XXX
10790   opcode(0xF2, 0x0F, 0x51);
10791   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10792   ins_pipe(pipe_slow);
10793 %}
10794 
10795 instruct sqrtD_imm(regD dst, immD con) %{
10796   match(Set dst (SqrtD con));
10797   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10798   ins_cost(150); // XXX
10799   ins_encode %{
10800     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10801   %}
10802   ins_pipe(pipe_slow);
10803 %}
10804 
10805 instruct absF_reg(regF dst)
10806 %{
10807   match(Set dst (AbsF dst));
10808 
10809   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10810   ins_encode(absF_encoding(dst));
10811   ins_pipe(pipe_slow);
10812 %}
10813 
10814 instruct absD_reg(regD dst)
10815 %{
10816   match(Set dst (AbsD dst));
10817 
10818   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10819             "# abs double by sign masking" %}
10820   ins_encode(absD_encoding(dst));
10821   ins_pipe(pipe_slow);
10822 %}
10823 
10824 instruct negF_reg(regF dst)
10825 %{
10826   match(Set dst (NegF dst));
10827 
10828   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10829   ins_encode(negF_encoding(dst));
10830   ins_pipe(pipe_slow);
10831 %}
10832 
10833 instruct negD_reg(regD dst)
10834 %{
10835   match(Set dst (NegD dst));
10836 
10837   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10838             "# neg double by sign flipping" %}
10839   ins_encode(negD_encoding(dst));
10840   ins_pipe(pipe_slow);
10841 %}
10842 
10843 // -----------Trig and Trancendental Instructions------------------------------
10844 instruct cosD_reg(regD dst) %{
10845   match(Set dst (CosD dst));
10846 
10847   format %{ "dcos   $dst\n\t" %}
10848   opcode(0xD9, 0xFF);
10849   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10850   ins_pipe( pipe_slow );
10851 %}
10852 
10853 instruct sinD_reg(regD dst) %{
10854   match(Set dst (SinD dst));
10855 
10856   format %{ "dsin   $dst\n\t" %}
10857   opcode(0xD9, 0xFE);
10858   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10859   ins_pipe( pipe_slow );
10860 %}
10861 
10862 instruct tanD_reg(regD dst) %{
10863   match(Set dst (TanD dst));
10864 
10865   format %{ "dtan   $dst\n\t" %}
10866   ins_encode( Push_SrcXD(dst),
10867               Opcode(0xD9), Opcode(0xF2),   //fptan
10868               Opcode(0xDD), Opcode(0xD8),   //fstp st
10869               Push_ResultXD(dst) );
10870   ins_pipe( pipe_slow );
10871 %}
10872 
10873 instruct log10D_reg(regD dst) %{
10874   // The source and result Double operands in XMM registers
10875   match(Set dst (Log10D dst));
10876   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10877   // fyl2x        ; compute log_10(2) * log_2(x)
10878   format %{ "fldlg2\t\t\t#Log10\n\t"
10879             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10880          %}
10881    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10882               Push_SrcXD(dst),
10883               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10884               Push_ResultXD(dst));
10885 
10886   ins_pipe( pipe_slow );
10887 %}
10888 
10889 instruct logD_reg(regD dst) %{
10890   // The source and result Double operands in XMM registers
10891   match(Set dst (LogD dst));
10892   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10893   // fyl2x        ; compute log_e(2) * log_2(x)
10894   format %{ "fldln2\t\t\t#Log_e\n\t"
10895             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10896          %}
10897   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10898               Push_SrcXD(dst),
10899               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10900               Push_ResultXD(dst));
10901   ins_pipe( pipe_slow );
10902 %}
10903 
10904 
10905 
10906 //----------Arithmetic Conversion Instructions---------------------------------
10907 
10908 instruct roundFloat_nop(regF dst)
10909 %{
10910   match(Set dst (RoundFloat dst));
10911 
10912   ins_cost(0);
10913   ins_encode();
10914   ins_pipe(empty);
10915 %}
10916 
10917 instruct roundDouble_nop(regD dst)
10918 %{
10919   match(Set dst (RoundDouble dst));
10920 
10921   ins_cost(0);
10922   ins_encode();
10923   ins_pipe(empty);
10924 %}
10925 
10926 instruct convF2D_reg_reg(regD dst, regF src)
10927 %{
10928   match(Set dst (ConvF2D src));
10929 
10930   format %{ "cvtss2sd $dst, $src" %}
10931   opcode(0xF3, 0x0F, 0x5A);
10932   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10933   ins_pipe(pipe_slow); // XXX
10934 %}
10935 
10936 instruct convF2D_reg_mem(regD dst, memory src)
10937 %{
10938   match(Set dst (ConvF2D (LoadF src)));
10939 
10940   format %{ "cvtss2sd $dst, $src" %}
10941   opcode(0xF3, 0x0F, 0x5A);
10942   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10943   ins_pipe(pipe_slow); // XXX
10944 %}
10945 
10946 instruct convD2F_reg_reg(regF dst, regD src)
10947 %{
10948   match(Set dst (ConvD2F src));
10949 
10950   format %{ "cvtsd2ss $dst, $src" %}
10951   opcode(0xF2, 0x0F, 0x5A);
10952   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10953   ins_pipe(pipe_slow); // XXX
10954 %}
10955 
10956 instruct convD2F_reg_mem(regF dst, memory src)
10957 %{
10958   match(Set dst (ConvD2F (LoadD src)));
10959 
10960   format %{ "cvtsd2ss $dst, $src" %}
10961   opcode(0xF2, 0x0F, 0x5A);
10962   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10963   ins_pipe(pipe_slow); // XXX
10964 %}
10965 
10966 // XXX do mem variants
10967 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10968 %{
10969   match(Set dst (ConvF2I src));
10970   effect(KILL cr);
10971 
10972   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10973             "cmpl    $dst, #0x80000000\n\t"
10974             "jne,s   done\n\t"
10975             "subq    rsp, #8\n\t"
10976             "movss   [rsp], $src\n\t"
10977             "call    f2i_fixup\n\t"
10978             "popq    $dst\n"
10979     "done:   "%}
10980   opcode(0xF3, 0x0F, 0x2C);
10981   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10982              f2i_fixup(dst, src));
10983   ins_pipe(pipe_slow);
10984 %}
10985 
10986 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10987 %{
10988   match(Set dst (ConvF2L src));
10989   effect(KILL cr);
10990 
10991   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10992             "cmpq    $dst, [0x8000000000000000]\n\t"
10993             "jne,s   done\n\t"
10994             "subq    rsp, #8\n\t"
10995             "movss   [rsp], $src\n\t"
10996             "call    f2l_fixup\n\t"
10997             "popq    $dst\n"
10998     "done:   "%}
10999   opcode(0xF3, 0x0F, 0x2C);
11000   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11001              f2l_fixup(dst, src));
11002   ins_pipe(pipe_slow);
11003 %}
11004 
11005 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11006 %{
11007   match(Set dst (ConvD2I src));
11008   effect(KILL cr);
11009 
11010   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11011             "cmpl    $dst, #0x80000000\n\t"
11012             "jne,s   done\n\t"
11013             "subq    rsp, #8\n\t"
11014             "movsd   [rsp], $src\n\t"
11015             "call    d2i_fixup\n\t"
11016             "popq    $dst\n"
11017     "done:   "%}
11018   opcode(0xF2, 0x0F, 0x2C);
11019   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11020              d2i_fixup(dst, src));
11021   ins_pipe(pipe_slow);
11022 %}
11023 
11024 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11025 %{
11026   match(Set dst (ConvD2L src));
11027   effect(KILL cr);
11028 
11029   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11030             "cmpq    $dst, [0x8000000000000000]\n\t"
11031             "jne,s   done\n\t"
11032             "subq    rsp, #8\n\t"
11033             "movsd   [rsp], $src\n\t"
11034             "call    d2l_fixup\n\t"
11035             "popq    $dst\n"
11036     "done:   "%}
11037   opcode(0xF2, 0x0F, 0x2C);
11038   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11039              d2l_fixup(dst, src));
11040   ins_pipe(pipe_slow);
11041 %}
11042 
11043 instruct convI2F_reg_reg(regF dst, rRegI src)
11044 %{
11045   predicate(!UseXmmI2F);
11046   match(Set dst (ConvI2F src));
11047 
11048   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11049   opcode(0xF3, 0x0F, 0x2A);
11050   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11051   ins_pipe(pipe_slow); // XXX
11052 %}
11053 
11054 instruct convI2F_reg_mem(regF dst, memory src)
11055 %{
11056   match(Set dst (ConvI2F (LoadI src)));
11057 
11058   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11059   opcode(0xF3, 0x0F, 0x2A);
11060   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11061   ins_pipe(pipe_slow); // XXX
11062 %}
11063 
11064 instruct convI2D_reg_reg(regD dst, rRegI src)
11065 %{
11066   predicate(!UseXmmI2D);
11067   match(Set dst (ConvI2D src));
11068 
11069   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11070   opcode(0xF2, 0x0F, 0x2A);
11071   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11072   ins_pipe(pipe_slow); // XXX
11073 %}
11074 
11075 instruct convI2D_reg_mem(regD dst, memory src)
11076 %{
11077   match(Set dst (ConvI2D (LoadI src)));
11078 
11079   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11080   opcode(0xF2, 0x0F, 0x2A);
11081   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11082   ins_pipe(pipe_slow); // XXX
11083 %}
11084 
11085 instruct convXI2F_reg(regF dst, rRegI src)
11086 %{
11087   predicate(UseXmmI2F);
11088   match(Set dst (ConvI2F src));
11089 
11090   format %{ "movdl $dst, $src\n\t"
11091             "cvtdq2psl $dst, $dst\t# i2f" %}
11092   ins_encode %{
11093     __ movdl($dst$$XMMRegister, $src$$Register);
11094     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11095   %}
11096   ins_pipe(pipe_slow); // XXX
11097 %}
11098 
11099 instruct convXI2D_reg(regD dst, rRegI src)
11100 %{
11101   predicate(UseXmmI2D);
11102   match(Set dst (ConvI2D src));
11103 
11104   format %{ "movdl $dst, $src\n\t"
11105             "cvtdq2pdl $dst, $dst\t# i2d" %}
11106   ins_encode %{
11107     __ movdl($dst$$XMMRegister, $src$$Register);
11108     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11109   %}
11110   ins_pipe(pipe_slow); // XXX
11111 %}
11112 
11113 instruct convL2F_reg_reg(regF dst, rRegL src)
11114 %{
11115   match(Set dst (ConvL2F src));
11116 
11117   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11118   opcode(0xF3, 0x0F, 0x2A);
11119   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11120   ins_pipe(pipe_slow); // XXX
11121 %}
11122 
11123 instruct convL2F_reg_mem(regF dst, memory src)
11124 %{
11125   match(Set dst (ConvL2F (LoadL src)));
11126 
11127   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11128   opcode(0xF3, 0x0F, 0x2A);
11129   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11130   ins_pipe(pipe_slow); // XXX
11131 %}
11132 
11133 instruct convL2D_reg_reg(regD dst, rRegL src)
11134 %{
11135   match(Set dst (ConvL2D src));
11136 
11137   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11138   opcode(0xF2, 0x0F, 0x2A);
11139   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11140   ins_pipe(pipe_slow); // XXX
11141 %}
11142 
11143 instruct convL2D_reg_mem(regD dst, memory src)
11144 %{
11145   match(Set dst (ConvL2D (LoadL src)));
11146 
11147   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11148   opcode(0xF2, 0x0F, 0x2A);
11149   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11150   ins_pipe(pipe_slow); // XXX
11151 %}
11152 
11153 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11154 %{
11155   match(Set dst (ConvI2L src));
11156 
11157   ins_cost(125);
11158   format %{ "movslq  $dst, $src\t# i2l" %}
11159   ins_encode %{
11160     __ movslq($dst$$Register, $src$$Register);
11161   %}
11162   ins_pipe(ialu_reg_reg);
11163 %}
11164 
11165 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11166 // %{
11167 //   match(Set dst (ConvI2L src));
11168 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11169 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11170 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11171 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11172 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11173 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11174 
11175 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11176 //   ins_encode(enc_copy(dst, src));
11177 // //   opcode(0x63); // needs REX.W
11178 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11179 //   ins_pipe(ialu_reg_reg);
11180 // %}
11181 
11182 // Zero-extend convert int to long
11183 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11184 %{
11185   match(Set dst (AndL (ConvI2L src) mask));
11186 
11187   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11188   ins_encode(enc_copy(dst, src));
11189   ins_pipe(ialu_reg_reg);
11190 %}
11191 
11192 // Zero-extend convert int to long
11193 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11194 %{
11195   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11196 
11197   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11198   opcode(0x8B);
11199   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11200   ins_pipe(ialu_reg_mem);
11201 %}
11202 
11203 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11204 %{
11205   match(Set dst (AndL src mask));
11206 
11207   format %{ "movl    $dst, $src\t# zero-extend long" %}
11208   ins_encode(enc_copy_always(dst, src));
11209   ins_pipe(ialu_reg_reg);
11210 %}
11211 
11212 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11213 %{
11214   match(Set dst (ConvL2I src));
11215 
11216   format %{ "movl    $dst, $src\t# l2i" %}
11217   ins_encode(enc_copy_always(dst, src));
11218   ins_pipe(ialu_reg_reg);
11219 %}
11220 
11221 
11222 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11223   match(Set dst (MoveF2I src));
11224   effect(DEF dst, USE src);
11225 
11226   ins_cost(125);
11227   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11228   opcode(0x8B);
11229   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11230   ins_pipe(ialu_reg_mem);
11231 %}
11232 
11233 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11234   match(Set dst (MoveI2F src));
11235   effect(DEF dst, USE src);
11236 
11237   ins_cost(125);
11238   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11239   opcode(0xF3, 0x0F, 0x10);
11240   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11241   ins_pipe(pipe_slow);
11242 %}
11243 
11244 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11245   match(Set dst (MoveD2L src));
11246   effect(DEF dst, USE src);
11247 
11248   ins_cost(125);
11249   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11250   opcode(0x8B);
11251   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11252   ins_pipe(ialu_reg_mem);
11253 %}
11254 
11255 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11256   predicate(!UseXmmLoadAndClearUpper);
11257   match(Set dst (MoveL2D src));
11258   effect(DEF dst, USE src);
11259 
11260   ins_cost(125);
11261   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11262   opcode(0x66, 0x0F, 0x12);
11263   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11264   ins_pipe(pipe_slow);
11265 %}
11266 
11267 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11268   predicate(UseXmmLoadAndClearUpper);
11269   match(Set dst (MoveL2D src));
11270   effect(DEF dst, USE src);
11271 
11272   ins_cost(125);
11273   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11274   opcode(0xF2, 0x0F, 0x10);
11275   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11276   ins_pipe(pipe_slow);
11277 %}
11278 
11279 
11280 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11281   match(Set dst (MoveF2I src));
11282   effect(DEF dst, USE src);
11283 
11284   ins_cost(95); // XXX
11285   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11286   opcode(0xF3, 0x0F, 0x11);
11287   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11288   ins_pipe(pipe_slow);
11289 %}
11290 
11291 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11292   match(Set dst (MoveI2F src));
11293   effect(DEF dst, USE src);
11294 
11295   ins_cost(100);
11296   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11297   opcode(0x89);
11298   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11299   ins_pipe( ialu_mem_reg );
11300 %}
11301 
11302 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11303   match(Set dst (MoveD2L src));
11304   effect(DEF dst, USE src);
11305 
11306   ins_cost(95); // XXX
11307   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11308   opcode(0xF2, 0x0F, 0x11);
11309   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11310   ins_pipe(pipe_slow);
11311 %}
11312 
11313 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11314   match(Set dst (MoveL2D src));
11315   effect(DEF dst, USE src);
11316 
11317   ins_cost(100);
11318   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11319   opcode(0x89);
11320   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11321   ins_pipe(ialu_mem_reg);
11322 %}
11323 
11324 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11325   match(Set dst (MoveF2I src));
11326   effect(DEF dst, USE src);
11327   ins_cost(85);
11328   format %{ "movd    $dst,$src\t# MoveF2I" %}
11329   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11330   ins_pipe( pipe_slow );
11331 %}
11332 
11333 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11334   match(Set dst (MoveD2L src));
11335   effect(DEF dst, USE src);
11336   ins_cost(85);
11337   format %{ "movd    $dst,$src\t# MoveD2L" %}
11338   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11339   ins_pipe( pipe_slow );
11340 %}
11341 
11342 // The next instructions have long latency and use Int unit. Set high cost.
11343 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11344   match(Set dst (MoveI2F src));
11345   effect(DEF dst, USE src);
11346   ins_cost(300);
11347   format %{ "movd    $dst,$src\t# MoveI2F" %}
11348   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11353   match(Set dst (MoveL2D src));
11354   effect(DEF dst, USE src);
11355   ins_cost(300);
11356   format %{ "movd    $dst,$src\t# MoveL2D" %}
11357   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11358   ins_pipe( pipe_slow );
11359 %}
11360 
11361 // Replicate scalar to packed byte (1 byte) values in xmm
11362 instruct Repl8B_reg(regD dst, regD src) %{
11363   match(Set dst (Replicate8B src));
11364   format %{ "MOVDQA  $dst,$src\n\t"
11365             "PUNPCKLBW $dst,$dst\n\t"
11366             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11367   ins_encode( pshufd_8x8(dst, src));
11368   ins_pipe( pipe_slow );
11369 %}
11370 
11371 // Replicate scalar to packed byte (1 byte) values in xmm
11372 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11373   match(Set dst (Replicate8B src));
11374   format %{ "MOVD    $dst,$src\n\t"
11375             "PUNPCKLBW $dst,$dst\n\t"
11376             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11377   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11378   ins_pipe( pipe_slow );
11379 %}
11380 
11381 // Replicate scalar zero to packed byte (1 byte) values in xmm
11382 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11383   match(Set dst (Replicate8B zero));
11384   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11385   ins_encode( pxor(dst, dst));
11386   ins_pipe( fpu_reg_reg );
11387 %}
11388 
11389 // Replicate scalar to packed shore (2 byte) values in xmm
11390 instruct Repl4S_reg(regD dst, regD src) %{
11391   match(Set dst (Replicate4S src));
11392   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11393   ins_encode( pshufd_4x16(dst, src));
11394   ins_pipe( fpu_reg_reg );
11395 %}
11396 
11397 // Replicate scalar to packed shore (2 byte) values in xmm
11398 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11399   match(Set dst (Replicate4S src));
11400   format %{ "MOVD    $dst,$src\n\t"
11401             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11402   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11403   ins_pipe( fpu_reg_reg );
11404 %}
11405 
11406 // Replicate scalar zero to packed short (2 byte) values in xmm
11407 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11408   match(Set dst (Replicate4S zero));
11409   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11410   ins_encode( pxor(dst, dst));
11411   ins_pipe( fpu_reg_reg );
11412 %}
11413 
11414 // Replicate scalar to packed char (2 byte) values in xmm
11415 instruct Repl4C_reg(regD dst, regD src) %{
11416   match(Set dst (Replicate4C src));
11417   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11418   ins_encode( pshufd_4x16(dst, src));
11419   ins_pipe( fpu_reg_reg );
11420 %}
11421 
11422 // Replicate scalar to packed char (2 byte) values in xmm
11423 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11424   match(Set dst (Replicate4C src));
11425   format %{ "MOVD    $dst,$src\n\t"
11426             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11427   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11428   ins_pipe( fpu_reg_reg );
11429 %}
11430 
11431 // Replicate scalar zero to packed char (2 byte) values in xmm
11432 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11433   match(Set dst (Replicate4C zero));
11434   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11435   ins_encode( pxor(dst, dst));
11436   ins_pipe( fpu_reg_reg );
11437 %}
11438 
11439 // Replicate scalar to packed integer (4 byte) values in xmm
11440 instruct Repl2I_reg(regD dst, regD src) %{
11441   match(Set dst (Replicate2I src));
11442   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11443   ins_encode( pshufd(dst, src, 0x00));
11444   ins_pipe( fpu_reg_reg );
11445 %}
11446 
11447 // Replicate scalar to packed integer (4 byte) values in xmm
11448 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11449   match(Set dst (Replicate2I src));
11450   format %{ "MOVD   $dst,$src\n\t"
11451             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11452   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11453   ins_pipe( fpu_reg_reg );
11454 %}
11455 
11456 // Replicate scalar zero to packed integer (2 byte) values in xmm
11457 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11458   match(Set dst (Replicate2I zero));
11459   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11460   ins_encode( pxor(dst, dst));
11461   ins_pipe( fpu_reg_reg );
11462 %}
11463 
11464 // Replicate scalar to packed single precision floating point values in xmm
11465 instruct Repl2F_reg(regD dst, regD src) %{
11466   match(Set dst (Replicate2F src));
11467   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11468   ins_encode( pshufd(dst, src, 0xe0));
11469   ins_pipe( fpu_reg_reg );
11470 %}
11471 
11472 // Replicate scalar to packed single precision floating point values in xmm
11473 instruct Repl2F_regF(regD dst, regF src) %{
11474   match(Set dst (Replicate2F src));
11475   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11476   ins_encode( pshufd(dst, src, 0xe0));
11477   ins_pipe( fpu_reg_reg );
11478 %}
11479 
11480 // Replicate scalar to packed single precision floating point values in xmm
11481 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11482   match(Set dst (Replicate2F zero));
11483   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11484   ins_encode( pxor(dst, dst));
11485   ins_pipe( fpu_reg_reg );
11486 %}
11487 
11488 
11489 // =======================================================================
11490 // fast clearing of an array
11491 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11492                   rFlagsReg cr)
11493 %{
11494   match(Set dummy (ClearArray cnt base));
11495   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11496 
11497   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11498             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11499   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11500              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11501   ins_pipe(pipe_slow);
11502 %}
11503 
11504 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11505                         rax_RegI result, regD tmp1, rFlagsReg cr)
11506 %{
11507   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11508   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11509 
11510   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11511   ins_encode %{
11512     __ string_compare($str1$$Register, $str2$$Register,
11513                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11514                       $tmp1$$XMMRegister);
11515   %}
11516   ins_pipe( pipe_slow );
11517 %}
11518 
11519 // fast search of substring with known size.
11520 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11521                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11522 %{
11523   predicate(UseSSE42Intrinsics);
11524   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11525   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11526 
11527   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11528   ins_encode %{
11529     int icnt2 = (int)$int_cnt2$$constant;
11530     if (icnt2 >= 8) {
11531       // IndexOf for constant substrings with size >= 8 elements
11532       // which don't need to be loaded through stack.
11533       __ string_indexofC8($str1$$Register, $str2$$Register,
11534                           $cnt1$$Register, $cnt2$$Register,
11535                           icnt2, $result$$Register,
11536                           $vec$$XMMRegister, $tmp$$Register);
11537     } else {
11538       // Small strings are loaded through stack if they cross page boundary.
11539       __ string_indexof($str1$$Register, $str2$$Register,
11540                         $cnt1$$Register, $cnt2$$Register,
11541                         icnt2, $result$$Register,
11542                         $vec$$XMMRegister, $tmp$$Register);
11543     }
11544   %}
11545   ins_pipe( pipe_slow );
11546 %}
11547 
11548 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11549                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11550 %{
11551   predicate(UseSSE42Intrinsics);
11552   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11553   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11554 
11555   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11556   ins_encode %{
11557     __ string_indexof($str1$$Register, $str2$$Register,
11558                       $cnt1$$Register, $cnt2$$Register,
11559                       (-1), $result$$Register,
11560                       $vec$$XMMRegister, $tmp$$Register);
11561   %}
11562   ins_pipe( pipe_slow );
11563 %}
11564 
11565 // fast string equals
11566 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11567                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11568 %{
11569   match(Set result (StrEquals (Binary str1 str2) cnt));
11570   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11571 
11572   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11573   ins_encode %{
11574     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11575                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11576                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11577   %}
11578   ins_pipe( pipe_slow );
11579 %}
11580 
11581 // fast array equals
11582 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11583                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11584 %{
11585   match(Set result (AryEq ary1 ary2));
11586   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11587   //ins_cost(300);
11588 
11589   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11590   ins_encode %{
11591     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11592                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11593                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11594   %}
11595   ins_pipe( pipe_slow );
11596 %}
11597 
11598 //----------Control Flow Instructions------------------------------------------
11599 // Signed compare Instructions
11600 
11601 // XXX more variants!!
11602 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11603 %{
11604   match(Set cr (CmpI op1 op2));
11605   effect(DEF cr, USE op1, USE op2);
11606 
11607   format %{ "cmpl    $op1, $op2" %}
11608   opcode(0x3B);  /* Opcode 3B /r */
11609   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11610   ins_pipe(ialu_cr_reg_reg);
11611 %}
11612 
11613 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11614 %{
11615   match(Set cr (CmpI op1 op2));
11616 
11617   format %{ "cmpl    $op1, $op2" %}
11618   opcode(0x81, 0x07); /* Opcode 81 /7 */
11619   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11620   ins_pipe(ialu_cr_reg_imm);
11621 %}
11622 
11623 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11624 %{
11625   match(Set cr (CmpI op1 (LoadI op2)));
11626 
11627   ins_cost(500); // XXX
11628   format %{ "cmpl    $op1, $op2" %}
11629   opcode(0x3B); /* Opcode 3B /r */
11630   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11631   ins_pipe(ialu_cr_reg_mem);
11632 %}
11633 
11634 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11635 %{
11636   match(Set cr (CmpI src zero));
11637 
11638   format %{ "testl   $src, $src" %}
11639   opcode(0x85);
11640   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11641   ins_pipe(ialu_cr_reg_imm);
11642 %}
11643 
11644 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11645 %{
11646   match(Set cr (CmpI (AndI src con) zero));
11647 
11648   format %{ "testl   $src, $con" %}
11649   opcode(0xF7, 0x00);
11650   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11651   ins_pipe(ialu_cr_reg_imm);
11652 %}
11653 
11654 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11655 %{
11656   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11657 
11658   format %{ "testl   $src, $mem" %}
11659   opcode(0x85);
11660   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11661   ins_pipe(ialu_cr_reg_mem);
11662 %}
11663 
11664 // Unsigned compare Instructions; really, same as signed except they
11665 // produce an rFlagsRegU instead of rFlagsReg.
11666 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11667 %{
11668   match(Set cr (CmpU op1 op2));
11669 
11670   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11671   opcode(0x3B); /* Opcode 3B /r */
11672   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11673   ins_pipe(ialu_cr_reg_reg);
11674 %}
11675 
11676 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11677 %{
11678   match(Set cr (CmpU op1 op2));
11679 
11680   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11681   opcode(0x81,0x07); /* Opcode 81 /7 */
11682   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11683   ins_pipe(ialu_cr_reg_imm);
11684 %}
11685 
11686 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11687 %{
11688   match(Set cr (CmpU op1 (LoadI op2)));
11689 
11690   ins_cost(500); // XXX
11691   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11692   opcode(0x3B); /* Opcode 3B /r */
11693   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11694   ins_pipe(ialu_cr_reg_mem);
11695 %}
11696 
11697 // // // Cisc-spilled version of cmpU_rReg
11698 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11699 // //%{
11700 // //  match(Set cr (CmpU (LoadI op1) op2));
11701 // //
11702 // //  format %{ "CMPu   $op1,$op2" %}
11703 // //  ins_cost(500);
11704 // //  opcode(0x39);  /* Opcode 39 /r */
11705 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11706 // //%}
11707 
11708 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11709 %{
11710   match(Set cr (CmpU src zero));
11711 
11712   format %{ "testl  $src, $src\t# unsigned" %}
11713   opcode(0x85);
11714   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11715   ins_pipe(ialu_cr_reg_imm);
11716 %}
11717 
11718 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11719 %{
11720   match(Set cr (CmpP op1 op2));
11721 
11722   format %{ "cmpq    $op1, $op2\t# ptr" %}
11723   opcode(0x3B); /* Opcode 3B /r */
11724   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11725   ins_pipe(ialu_cr_reg_reg);
11726 %}
11727 
11728 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11729 %{
11730   match(Set cr (CmpP op1 (LoadP op2)));
11731 
11732   ins_cost(500); // XXX
11733   format %{ "cmpq    $op1, $op2\t# ptr" %}
11734   opcode(0x3B); /* Opcode 3B /r */
11735   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11736   ins_pipe(ialu_cr_reg_mem);
11737 %}
11738 
11739 // // // Cisc-spilled version of cmpP_rReg
11740 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11741 // //%{
11742 // //  match(Set cr (CmpP (LoadP op1) op2));
11743 // //
11744 // //  format %{ "CMPu   $op1,$op2" %}
11745 // //  ins_cost(500);
11746 // //  opcode(0x39);  /* Opcode 39 /r */
11747 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11748 // //%}
11749 
11750 // XXX this is generalized by compP_rReg_mem???
11751 // Compare raw pointer (used in out-of-heap check).
11752 // Only works because non-oop pointers must be raw pointers
11753 // and raw pointers have no anti-dependencies.
11754 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11755 %{
11756   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11757   match(Set cr (CmpP op1 (LoadP op2)));
11758 
11759   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11760   opcode(0x3B); /* Opcode 3B /r */
11761   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11762   ins_pipe(ialu_cr_reg_mem);
11763 %}
11764 
11765 // This will generate a signed flags result. This should be OK since
11766 // any compare to a zero should be eq/neq.
11767 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11768 %{
11769   match(Set cr (CmpP src zero));
11770 
11771   format %{ "testq   $src, $src\t# ptr" %}
11772   opcode(0x85);
11773   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11774   ins_pipe(ialu_cr_reg_imm);
11775 %}
11776 
11777 // This will generate a signed flags result. This should be OK since
11778 // any compare to a zero should be eq/neq.
11779 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11780 %{
11781   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11782   match(Set cr (CmpP (LoadP op) zero));
11783 
11784   ins_cost(500); // XXX
11785   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11786   opcode(0xF7); /* Opcode F7 /0 */
11787   ins_encode(REX_mem_wide(op),
11788              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11789   ins_pipe(ialu_cr_reg_imm);
11790 %}
11791 
11792 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11793 %{
11794   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11795   match(Set cr (CmpP (LoadP mem) zero));
11796 
11797   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11798   ins_encode %{
11799     __ cmpq(r12, $mem$$Address);
11800   %}
11801   ins_pipe(ialu_cr_reg_mem);
11802 %}
11803 
11804 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11805 %{
11806   match(Set cr (CmpN op1 op2));
11807 
11808   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11809   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11810   ins_pipe(ialu_cr_reg_reg);
11811 %}
11812 
11813 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11814 %{
11815   match(Set cr (CmpN src (LoadN mem)));
11816 
11817   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11818   ins_encode %{
11819     __ cmpl($src$$Register, $mem$$Address);
11820   %}
11821   ins_pipe(ialu_cr_reg_mem);
11822 %}
11823 
11824 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11825   match(Set cr (CmpN op1 op2));
11826 
11827   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11828   ins_encode %{
11829     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11830   %}
11831   ins_pipe(ialu_cr_reg_imm);
11832 %}
11833 
11834 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11835 %{
11836   match(Set cr (CmpN src (LoadN mem)));
11837 
11838   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11839   ins_encode %{
11840     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11841   %}
11842   ins_pipe(ialu_cr_reg_mem);
11843 %}
11844 
11845 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11846   match(Set cr (CmpN src zero));
11847 
11848   format %{ "testl   $src, $src\t# compressed ptr" %}
11849   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11850   ins_pipe(ialu_cr_reg_imm);
11851 %}
11852 
11853 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11854 %{
11855   predicate(Universe::narrow_oop_base() != NULL);
11856   match(Set cr (CmpN (LoadN mem) zero));
11857 
11858   ins_cost(500); // XXX
11859   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11860   ins_encode %{
11861     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11862   %}
11863   ins_pipe(ialu_cr_reg_mem);
11864 %}
11865 
11866 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11867 %{
11868   predicate(Universe::narrow_oop_base() == NULL);
11869   match(Set cr (CmpN (LoadN mem) zero));
11870 
11871   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11872   ins_encode %{
11873     __ cmpl(r12, $mem$$Address);
11874   %}
11875   ins_pipe(ialu_cr_reg_mem);
11876 %}
11877 
11878 // Yanked all unsigned pointer compare operations.
11879 // Pointer compares are done with CmpP which is already unsigned.
11880 
11881 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11882 %{
11883   match(Set cr (CmpL op1 op2));
11884 
11885   format %{ "cmpq    $op1, $op2" %}
11886   opcode(0x3B);  /* Opcode 3B /r */
11887   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11888   ins_pipe(ialu_cr_reg_reg);
11889 %}
11890 
11891 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11892 %{
11893   match(Set cr (CmpL op1 op2));
11894 
11895   format %{ "cmpq    $op1, $op2" %}
11896   opcode(0x81, 0x07); /* Opcode 81 /7 */
11897   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11898   ins_pipe(ialu_cr_reg_imm);
11899 %}
11900 
11901 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11902 %{
11903   match(Set cr (CmpL op1 (LoadL op2)));
11904 
11905   format %{ "cmpq    $op1, $op2" %}
11906   opcode(0x3B); /* Opcode 3B /r */
11907   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11908   ins_pipe(ialu_cr_reg_mem);
11909 %}
11910 
11911 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11912 %{
11913   match(Set cr (CmpL src zero));
11914 
11915   format %{ "testq   $src, $src" %}
11916   opcode(0x85);
11917   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11918   ins_pipe(ialu_cr_reg_imm);
11919 %}
11920 
11921 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11922 %{
11923   match(Set cr (CmpL (AndL src con) zero));
11924 
11925   format %{ "testq   $src, $con\t# long" %}
11926   opcode(0xF7, 0x00);
11927   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11928   ins_pipe(ialu_cr_reg_imm);
11929 %}
11930 
11931 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11932 %{
11933   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11934 
11935   format %{ "testq   $src, $mem" %}
11936   opcode(0x85);
11937   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11938   ins_pipe(ialu_cr_reg_mem);
11939 %}
11940 
11941 // Manifest a CmpL result in an integer register.  Very painful.
11942 // This is the test to avoid.
11943 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11944 %{
11945   match(Set dst (CmpL3 src1 src2));
11946   effect(KILL flags);
11947 
11948   ins_cost(275); // XXX
11949   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11950             "movl    $dst, -1\n\t"
11951             "jl,s    done\n\t"
11952             "setne   $dst\n\t"
11953             "movzbl  $dst, $dst\n\t"
11954     "done:" %}
11955   ins_encode(cmpl3_flag(src1, src2, dst));
11956   ins_pipe(pipe_slow);
11957 %}
11958 
11959 //----------Max and Min--------------------------------------------------------
11960 // Min Instructions
11961 
11962 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11963 %{
11964   effect(USE_DEF dst, USE src, USE cr);
11965 
11966   format %{ "cmovlgt $dst, $src\t# min" %}
11967   opcode(0x0F, 0x4F);
11968   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11969   ins_pipe(pipe_cmov_reg);
11970 %}
11971 
11972 
11973 instruct minI_rReg(rRegI dst, rRegI src)
11974 %{
11975   match(Set dst (MinI dst src));
11976 
11977   ins_cost(200);
11978   expand %{
11979     rFlagsReg cr;
11980     compI_rReg(cr, dst, src);
11981     cmovI_reg_g(dst, src, cr);
11982   %}
11983 %}
11984 
11985 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11986 %{
11987   effect(USE_DEF dst, USE src, USE cr);
11988 
11989   format %{ "cmovllt $dst, $src\t# max" %}
11990   opcode(0x0F, 0x4C);
11991   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11992   ins_pipe(pipe_cmov_reg);
11993 %}
11994 
11995 
11996 instruct maxI_rReg(rRegI dst, rRegI src)
11997 %{
11998   match(Set dst (MaxI dst src));
11999 
12000   ins_cost(200);
12001   expand %{
12002     rFlagsReg cr;
12003     compI_rReg(cr, dst, src);
12004     cmovI_reg_l(dst, src, cr);
12005   %}
12006 %}
12007 
12008 // ============================================================================
12009 // Branch Instructions
12010 
12011 // Jump Direct - Label defines a relative address from JMP+1
12012 instruct jmpDir(label labl)
12013 %{
12014   match(Goto);
12015   effect(USE labl);
12016 
12017   ins_cost(300);
12018   format %{ "jmp     $labl" %}
12019   size(5);
12020   opcode(0xE9);
12021   ins_encode(OpcP, Lbl(labl));
12022   ins_pipe(pipe_jmp);
12023   ins_pc_relative(1);
12024 %}
12025 
12026 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12027 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12028 %{
12029   match(If cop cr);
12030   effect(USE labl);
12031 
12032   ins_cost(300);
12033   format %{ "j$cop     $labl" %}
12034   size(6);
12035   opcode(0x0F, 0x80);
12036   ins_encode(Jcc(cop, labl));
12037   ins_pipe(pipe_jcc);
12038   ins_pc_relative(1);
12039 %}
12040 
12041 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12042 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12043 %{
12044   match(CountedLoopEnd cop cr);
12045   effect(USE labl);
12046 
12047   ins_cost(300);
12048   format %{ "j$cop     $labl\t# loop end" %}
12049   size(6);
12050   opcode(0x0F, 0x80);
12051   ins_encode(Jcc(cop, labl));
12052   ins_pipe(pipe_jcc);
12053   ins_pc_relative(1);
12054 %}
12055 
12056 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12057 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12058   match(CountedLoopEnd cop cmp);
12059   effect(USE labl);
12060 
12061   ins_cost(300);
12062   format %{ "j$cop,u   $labl\t# loop end" %}
12063   size(6);
12064   opcode(0x0F, 0x80);
12065   ins_encode(Jcc(cop, labl));
12066   ins_pipe(pipe_jcc);
12067   ins_pc_relative(1);
12068 %}
12069 
12070 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12071   match(CountedLoopEnd cop cmp);
12072   effect(USE labl);
12073 
12074   ins_cost(200);
12075   format %{ "j$cop,u   $labl\t# loop end" %}
12076   size(6);
12077   opcode(0x0F, 0x80);
12078   ins_encode(Jcc(cop, labl));
12079   ins_pipe(pipe_jcc);
12080   ins_pc_relative(1);
12081 %}
12082 
12083 // Jump Direct Conditional - using unsigned comparison
12084 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12085   match(If cop cmp);
12086   effect(USE labl);
12087 
12088   ins_cost(300);
12089   format %{ "j$cop,u  $labl" %}
12090   size(6);
12091   opcode(0x0F, 0x80);
12092   ins_encode(Jcc(cop, labl));
12093   ins_pipe(pipe_jcc);
12094   ins_pc_relative(1);
12095 %}
12096 
12097 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12098   match(If cop cmp);
12099   effect(USE labl);
12100 
12101   ins_cost(200);
12102   format %{ "j$cop,u  $labl" %}
12103   size(6);
12104   opcode(0x0F, 0x80);
12105   ins_encode(Jcc(cop, labl));
12106   ins_pipe(pipe_jcc);
12107   ins_pc_relative(1);
12108 %}
12109 
12110 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12111   match(If cop cmp);
12112   effect(USE labl);
12113 
12114   ins_cost(200);
12115   format %{ $$template
12116     if ($cop$$cmpcode == Assembler::notEqual) {
12117       $$emit$$"jp,u   $labl\n\t"
12118       $$emit$$"j$cop,u   $labl"
12119     } else {
12120       $$emit$$"jp,u   done\n\t"
12121       $$emit$$"j$cop,u   $labl\n\t"
12122       $$emit$$"done:"
12123     }
12124   %}
12125   size(12);
12126   opcode(0x0F, 0x80);
12127   ins_encode %{
12128     Label* l = $labl$$label;
12129     $$$emit8$primary;
12130     emit_cc(cbuf, $secondary, Assembler::parity);
12131     int parity_disp = -1;
12132     if ($cop$$cmpcode == Assembler::notEqual) {
12133        // the two jumps 6 bytes apart so the jump distances are too
12134        parity_disp = l->loc_pos() - (cbuf.insts_size() + 4);
12135     } else if ($cop$$cmpcode == Assembler::equal) {
12136        parity_disp = 6;
12137     } else {
12138        ShouldNotReachHere();
12139     }
12140     emit_d32(cbuf, parity_disp);
12141     $$$emit8$primary;
12142     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12143     int disp = l->loc_pos() - (cbuf.insts_size() + 4);
12144     emit_d32(cbuf, disp);
12145   %}
12146   ins_pipe(pipe_jcc);
12147   ins_pc_relative(1);
12148 %}
12149 
12150 // ============================================================================
12151 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12152 // superklass array for an instance of the superklass.  Set a hidden
12153 // internal cache on a hit (cache is checked with exposed code in
12154 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12155 // encoding ALSO sets flags.
12156 
12157 instruct partialSubtypeCheck(rdi_RegP result,
12158                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12159                              rFlagsReg cr)
12160 %{
12161   match(Set result (PartialSubtypeCheck sub super));
12162   effect(KILL rcx, KILL cr);
12163 
12164   ins_cost(1100);  // slightly larger than the next version
12165   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12166             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12167             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12168             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12169             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12170             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12171             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12172     "miss:\t" %}
12173 
12174   opcode(0x1); // Force a XOR of RDI
12175   ins_encode(enc_PartialSubtypeCheck());
12176   ins_pipe(pipe_slow);
12177 %}
12178 
12179 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12180                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12181                                      immP0 zero,
12182                                      rdi_RegP result)
12183 %{
12184   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12185   effect(KILL rcx, KILL result);
12186 
12187   ins_cost(1000);
12188   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12189             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12190             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12191             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12192             "jne,s   miss\t\t# Missed: flags nz\n\t"
12193             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12194     "miss:\t" %}
12195 
12196   opcode(0x0); // No need to XOR RDI
12197   ins_encode(enc_PartialSubtypeCheck());
12198   ins_pipe(pipe_slow);
12199 %}
12200 
12201 // ============================================================================
12202 // Branch Instructions -- short offset versions
12203 //
12204 // These instructions are used to replace jumps of a long offset (the default
12205 // match) with jumps of a shorter offset.  These instructions are all tagged
12206 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12207 // match rules in general matching.  Instead, the ADLC generates a conversion
12208 // method in the MachNode which can be used to do in-place replacement of the
12209 // long variant with the shorter variant.  The compiler will determine if a
12210 // branch can be taken by the is_short_branch_offset() predicate in the machine
12211 // specific code section of the file.
12212 
12213 // Jump Direct - Label defines a relative address from JMP+1
12214 instruct jmpDir_short(label labl) %{
12215   match(Goto);
12216   effect(USE labl);
12217 
12218   ins_cost(300);
12219   format %{ "jmp,s   $labl" %}
12220   size(2);
12221   opcode(0xEB);
12222   ins_encode(OpcP, LblShort(labl));
12223   ins_pipe(pipe_jmp);
12224   ins_pc_relative(1);
12225   ins_short_branch(1);
12226 %}
12227 
12228 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12229 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12230   match(If cop cr);
12231   effect(USE labl);
12232 
12233   ins_cost(300);
12234   format %{ "j$cop,s   $labl" %}
12235   size(2);
12236   opcode(0x70);
12237   ins_encode(JccShort(cop, labl));
12238   ins_pipe(pipe_jcc);
12239   ins_pc_relative(1);
12240   ins_short_branch(1);
12241 %}
12242 
12243 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12244 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12245   match(CountedLoopEnd cop cr);
12246   effect(USE labl);
12247 
12248   ins_cost(300);
12249   format %{ "j$cop,s   $labl\t# loop end" %}
12250   size(2);
12251   opcode(0x70);
12252   ins_encode(JccShort(cop, labl));
12253   ins_pipe(pipe_jcc);
12254   ins_pc_relative(1);
12255   ins_short_branch(1);
12256 %}
12257 
12258 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12259 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12260   match(CountedLoopEnd cop cmp);
12261   effect(USE labl);
12262 
12263   ins_cost(300);
12264   format %{ "j$cop,us  $labl\t# loop end" %}
12265   size(2);
12266   opcode(0x70);
12267   ins_encode(JccShort(cop, labl));
12268   ins_pipe(pipe_jcc);
12269   ins_pc_relative(1);
12270   ins_short_branch(1);
12271 %}
12272 
12273 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12274   match(CountedLoopEnd cop cmp);
12275   effect(USE labl);
12276 
12277   ins_cost(300);
12278   format %{ "j$cop,us  $labl\t# loop end" %}
12279   size(2);
12280   opcode(0x70);
12281   ins_encode(JccShort(cop, labl));
12282   ins_pipe(pipe_jcc);
12283   ins_pc_relative(1);
12284   ins_short_branch(1);
12285 %}
12286 
12287 // Jump Direct Conditional - using unsigned comparison
12288 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12289   match(If cop cmp);
12290   effect(USE labl);
12291 
12292   ins_cost(300);
12293   format %{ "j$cop,us  $labl" %}
12294   size(2);
12295   opcode(0x70);
12296   ins_encode(JccShort(cop, labl));
12297   ins_pipe(pipe_jcc);
12298   ins_pc_relative(1);
12299   ins_short_branch(1);
12300 %}
12301 
12302 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12303   match(If cop cmp);
12304   effect(USE labl);
12305 
12306   ins_cost(300);
12307   format %{ "j$cop,us  $labl" %}
12308   size(2);
12309   opcode(0x70);
12310   ins_encode(JccShort(cop, labl));
12311   ins_pipe(pipe_jcc);
12312   ins_pc_relative(1);
12313   ins_short_branch(1);
12314 %}
12315 
12316 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12317   match(If cop cmp);
12318   effect(USE labl);
12319 
12320   ins_cost(300);
12321   format %{ $$template
12322     if ($cop$$cmpcode == Assembler::notEqual) {
12323       $$emit$$"jp,u,s   $labl\n\t"
12324       $$emit$$"j$cop,u,s   $labl"
12325     } else {
12326       $$emit$$"jp,u,s   done\n\t"
12327       $$emit$$"j$cop,u,s  $labl\n\t"
12328       $$emit$$"done:"
12329     }
12330   %}
12331   size(4);
12332   opcode(0x70);
12333   ins_encode %{
12334     Label* l = $labl$$label;
12335     emit_cc(cbuf, $primary, Assembler::parity);
12336     int parity_disp = -1;
12337     if ($cop$$cmpcode == Assembler::notEqual) {
12338       parity_disp = l->loc_pos() - (cbuf.insts_size() + 1);
12339     } else if ($cop$$cmpcode == Assembler::equal) {
12340       parity_disp = 2;
12341     } else {
12342       ShouldNotReachHere();
12343     }
12344     emit_d8(cbuf, parity_disp);
12345     emit_cc(cbuf, $primary, $cop$$cmpcode);
12346     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
12347     emit_d8(cbuf, disp);
12348     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12349     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12350   %}
12351   ins_pipe(pipe_jcc);
12352   ins_pc_relative(1);
12353   ins_short_branch(1);
12354 %}
12355 
12356 // ============================================================================
12357 // inlined locking and unlocking
12358 
12359 instruct cmpFastLock(rFlagsReg cr,
12360                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12361 %{
12362   match(Set cr (FastLock object box));
12363   effect(TEMP tmp, TEMP scr);
12364 
12365   ins_cost(300);
12366   format %{ "fastlock $object,$box,$tmp,$scr" %}
12367   ins_encode(Fast_Lock(object, box, tmp, scr));
12368   ins_pipe(pipe_slow);
12369   ins_pc_relative(1);
12370 %}
12371 
12372 instruct cmpFastUnlock(rFlagsReg cr,
12373                        rRegP object, rax_RegP box, rRegP tmp)
12374 %{
12375   match(Set cr (FastUnlock object box));
12376   effect(TEMP tmp);
12377 
12378   ins_cost(300);
12379   format %{ "fastunlock $object, $box, $tmp" %}
12380   ins_encode(Fast_Unlock(object, box, tmp));
12381   ins_pipe(pipe_slow);
12382   ins_pc_relative(1);
12383 %}
12384 
12385 
12386 // ============================================================================
12387 // Safepoint Instructions
12388 instruct safePoint_poll(rFlagsReg cr)
12389 %{
12390   predicate(!Assembler::is_polling_page_far());
12391   match(SafePoint);
12392   effect(KILL cr);
12393 
12394   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12395             "# Safepoint: poll for GC" %}
12396   ins_cost(125);
12397   ins_encode %{
12398     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12399     __ testl(rax, addr);
12400   %}
12401   ins_pipe(ialu_reg_mem);
12402 %}
12403 
12404 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12405 %{
12406   predicate(Assembler::is_polling_page_far());
12407   match(SafePoint poll);
12408   effect(KILL cr, USE poll);
12409 
12410   format %{ "testl  rax, [$poll]\t"
12411             "# Safepoint: poll for GC" %}
12412   ins_cost(125);
12413   ins_encode %{
12414     __ relocate(relocInfo::poll_type);
12415     __ testl(rax, Address($poll$$Register, 0));
12416   %}
12417   ins_pipe(ialu_reg_mem);
12418 %}
12419 
12420 // ============================================================================
12421 // Procedure Call/Return Instructions
12422 // Call Java Static Instruction
12423 // Note: If this code changes, the corresponding ret_addr_offset() and
12424 //       compute_padding() functions will have to be adjusted.
12425 instruct CallStaticJavaDirect(method meth) %{
12426   match(CallStaticJava);
12427   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12428   effect(USE meth);
12429 
12430   ins_cost(300);
12431   format %{ "call,static " %}
12432   opcode(0xE8); /* E8 cd */
12433   ins_encode(Java_Static_Call(meth), call_epilog);
12434   ins_pipe(pipe_slow);
12435   ins_pc_relative(1);
12436   ins_alignment(4);
12437 %}
12438 
12439 // Call Java Static Instruction (method handle version)
12440 // Note: If this code changes, the corresponding ret_addr_offset() and
12441 //       compute_padding() functions will have to be adjusted.
12442 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12443   match(CallStaticJava);
12444   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12445   effect(USE meth);
12446   // RBP is saved by all callees (for interpreter stack correction).
12447   // We use it here for a similar purpose, in {preserve,restore}_SP.
12448 
12449   ins_cost(300);
12450   format %{ "call,static/MethodHandle " %}
12451   opcode(0xE8); /* E8 cd */
12452   ins_encode(preserve_SP,
12453              Java_Static_Call(meth),
12454              restore_SP,
12455              call_epilog);
12456   ins_pipe(pipe_slow);
12457   ins_pc_relative(1);
12458   ins_alignment(4);
12459 %}
12460 
12461 // Call Java Dynamic Instruction
12462 // Note: If this code changes, the corresponding ret_addr_offset() and
12463 //       compute_padding() functions will have to be adjusted.
12464 instruct CallDynamicJavaDirect(method meth)
12465 %{
12466   match(CallDynamicJava);
12467   effect(USE meth);
12468 
12469   ins_cost(300);
12470   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12471             "call,dynamic " %}
12472   opcode(0xE8); /* E8 cd */
12473   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12474   ins_pipe(pipe_slow);
12475   ins_pc_relative(1);
12476   ins_alignment(4);
12477 %}
12478 
12479 // Call Runtime Instruction
12480 instruct CallRuntimeDirect(method meth)
12481 %{
12482   match(CallRuntime);
12483   effect(USE meth);
12484 
12485   ins_cost(300);
12486   format %{ "call,runtime " %}
12487   opcode(0xE8); /* E8 cd */
12488   ins_encode(Java_To_Runtime(meth));
12489   ins_pipe(pipe_slow);
12490   ins_pc_relative(1);
12491 %}
12492 
12493 // Call runtime without safepoint
12494 instruct CallLeafDirect(method meth)
12495 %{
12496   match(CallLeaf);
12497   effect(USE meth);
12498 
12499   ins_cost(300);
12500   format %{ "call_leaf,runtime " %}
12501   opcode(0xE8); /* E8 cd */
12502   ins_encode(Java_To_Runtime(meth));
12503   ins_pipe(pipe_slow);
12504   ins_pc_relative(1);
12505 %}
12506 
12507 // Call runtime without safepoint
12508 instruct CallLeafNoFPDirect(method meth)
12509 %{
12510   match(CallLeafNoFP);
12511   effect(USE meth);
12512 
12513   ins_cost(300);
12514   format %{ "call_leaf_nofp,runtime " %}
12515   opcode(0xE8); /* E8 cd */
12516   ins_encode(Java_To_Runtime(meth));
12517   ins_pipe(pipe_slow);
12518   ins_pc_relative(1);
12519 %}
12520 
12521 // Return Instruction
12522 // Remove the return address & jump to it.
12523 // Notice: We always emit a nop after a ret to make sure there is room
12524 // for safepoint patching
12525 instruct Ret()
12526 %{
12527   match(Return);
12528 
12529   format %{ "ret" %}
12530   opcode(0xC3);
12531   ins_encode(OpcP);
12532   ins_pipe(pipe_jmp);
12533 %}
12534 
12535 // Tail Call; Jump from runtime stub to Java code.
12536 // Also known as an 'interprocedural jump'.
12537 // Target of jump will eventually return to caller.
12538 // TailJump below removes the return address.
12539 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12540 %{
12541   match(TailCall jump_target method_oop);
12542 
12543   ins_cost(300);
12544   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12545   opcode(0xFF, 0x4); /* Opcode FF /4 */
12546   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12547   ins_pipe(pipe_jmp);
12548 %}
12549 
12550 // Tail Jump; remove the return address; jump to target.
12551 // TailCall above leaves the return address around.
12552 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12553 %{
12554   match(TailJump jump_target ex_oop);
12555 
12556   ins_cost(300);
12557   format %{ "popq    rdx\t# pop return address\n\t"
12558             "jmp     $jump_target" %}
12559   opcode(0xFF, 0x4); /* Opcode FF /4 */
12560   ins_encode(Opcode(0x5a), // popq rdx
12561              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12562   ins_pipe(pipe_jmp);
12563 %}
12564 
12565 // Create exception oop: created by stack-crawling runtime code.
12566 // Created exception is now available to this handler, and is setup
12567 // just prior to jumping to this handler.  No code emitted.
12568 instruct CreateException(rax_RegP ex_oop)
12569 %{
12570   match(Set ex_oop (CreateEx));
12571 
12572   size(0);
12573   // use the following format syntax
12574   format %{ "# exception oop is in rax; no code emitted" %}
12575   ins_encode();
12576   ins_pipe(empty);
12577 %}
12578 
12579 // Rethrow exception:
12580 // The exception oop will come in the first argument position.
12581 // Then JUMP (not call) to the rethrow stub code.
12582 instruct RethrowException()
12583 %{
12584   match(Rethrow);
12585 
12586   // use the following format syntax
12587   format %{ "jmp     rethrow_stub" %}
12588   ins_encode(enc_rethrow);
12589   ins_pipe(pipe_jmp);
12590 %}
12591 
12592 
12593 //----------PEEPHOLE RULES-----------------------------------------------------
12594 // These must follow all instruction definitions as they use the names
12595 // defined in the instructions definitions.
12596 //
12597 // peepmatch ( root_instr_name [preceding_instruction]* );
12598 //
12599 // peepconstraint %{
12600 // (instruction_number.operand_name relational_op instruction_number.operand_name
12601 //  [, ...] );
12602 // // instruction numbers are zero-based using left to right order in peepmatch
12603 //
12604 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12605 // // provide an instruction_number.operand_name for each operand that appears
12606 // // in the replacement instruction's match rule
12607 //
12608 // ---------VM FLAGS---------------------------------------------------------
12609 //
12610 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12611 //
12612 // Each peephole rule is given an identifying number starting with zero and
12613 // increasing by one in the order seen by the parser.  An individual peephole
12614 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12615 // on the command-line.
12616 //
12617 // ---------CURRENT LIMITATIONS----------------------------------------------
12618 //
12619 // Only match adjacent instructions in same basic block
12620 // Only equality constraints
12621 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12622 // Only one replacement instruction
12623 //
12624 // ---------EXAMPLE----------------------------------------------------------
12625 //
12626 // // pertinent parts of existing instructions in architecture description
12627 // instruct movI(rRegI dst, rRegI src)
12628 // %{
12629 //   match(Set dst (CopyI src));
12630 // %}
12631 //
12632 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12633 // %{
12634 //   match(Set dst (AddI dst src));
12635 //   effect(KILL cr);
12636 // %}
12637 //
12638 // // Change (inc mov) to lea
12639 // peephole %{
12640 //   // increment preceeded by register-register move
12641 //   peepmatch ( incI_rReg movI );
12642 //   // require that the destination register of the increment
12643 //   // match the destination register of the move
12644 //   peepconstraint ( 0.dst == 1.dst );
12645 //   // construct a replacement instruction that sets
12646 //   // the destination to ( move's source register + one )
12647 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12648 // %}
12649 //
12650 
12651 // Implementation no longer uses movX instructions since
12652 // machine-independent system no longer uses CopyX nodes.
12653 //
12654 // peephole
12655 // %{
12656 //   peepmatch (incI_rReg movI);
12657 //   peepconstraint (0.dst == 1.dst);
12658 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12659 // %}
12660 
12661 // peephole
12662 // %{
12663 //   peepmatch (decI_rReg movI);
12664 //   peepconstraint (0.dst == 1.dst);
12665 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12666 // %}
12667 
12668 // peephole
12669 // %{
12670 //   peepmatch (addI_rReg_imm movI);
12671 //   peepconstraint (0.dst == 1.dst);
12672 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12673 // %}
12674 
12675 // peephole
12676 // %{
12677 //   peepmatch (incL_rReg movL);
12678 //   peepconstraint (0.dst == 1.dst);
12679 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12680 // %}
12681 
12682 // peephole
12683 // %{
12684 //   peepmatch (decL_rReg movL);
12685 //   peepconstraint (0.dst == 1.dst);
12686 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12687 // %}
12688 
12689 // peephole
12690 // %{
12691 //   peepmatch (addL_rReg_imm movL);
12692 //   peepconstraint (0.dst == 1.dst);
12693 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12694 // %}
12695 
12696 // peephole
12697 // %{
12698 //   peepmatch (addP_rReg_imm movP);
12699 //   peepconstraint (0.dst == 1.dst);
12700 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12701 // %}
12702 
12703 // // Change load of spilled value to only a spill
12704 // instruct storeI(memory mem, rRegI src)
12705 // %{
12706 //   match(Set mem (StoreI mem src));
12707 // %}
12708 //
12709 // instruct loadI(rRegI dst, memory mem)
12710 // %{
12711 //   match(Set dst (LoadI mem));
12712 // %}
12713 //
12714 
12715 peephole
12716 %{
12717   peepmatch (loadI storeI);
12718   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12719   peepreplace (storeI(1.mem 1.mem 1.src));
12720 %}
12721 
12722 peephole
12723 %{
12724   peepmatch (loadL storeL);
12725   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12726   peepreplace (storeL(1.mem 1.mem 1.src));
12727 %}
12728 
12729 //----------SMARTSPILL RULES---------------------------------------------------
12730 // These must follow all instruction definitions as they use the names
12731 // defined in the instructions definitions.