1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 801 {
 802   if (dstenc != srcenc) {
 803     if (dstenc < 8) {
 804       if (srcenc >= 8) {
 805         emit_opcode(cbuf, Assembler::REX_B);
 806         srcenc -= 8;
 807       }
 808     } else {
 809       if (srcenc < 8) {
 810         emit_opcode(cbuf, Assembler::REX_R);
 811       } else {
 812         emit_opcode(cbuf, Assembler::REX_RB);
 813         srcenc -= 8;
 814       }
 815       dstenc -= 8;
 816     }
 817 
 818     emit_opcode(cbuf, 0x8B);
 819     emit_rm(cbuf, 0x3, dstenc, srcenc);
 820   }
 821 }
 822 
 823 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 824   if( dst_encoding == src_encoding ) {
 825     // reg-reg copy, use an empty encoding
 826   } else {
 827     MacroAssembler _masm(&cbuf);
 828 
 829     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 830   }
 831 }
 832 
 833 // This could be in MacroAssembler but it's fairly C2 specific
 834 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 835   Label exit;
 836   __ jccb(Assembler::noParity, exit);
 837   __ pushf();
 838   __ andq(Address(rsp, 0), 0xffffff2b);
 839   __ popf();
 840   __ bind(exit);
 841   __ nop(); // (target for branch to avoid branch to branch)
 842 }
 843 
 844 
 845 //=============================================================================
 846 const bool Matcher::constant_table_absolute_addressing = true;
 847 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 848 
 849 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 850   // Empty encoding
 851 }
 852 
 853 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 854   return 0;
 855 }
 856 
 857 #ifndef PRODUCT
 858 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 859   st->print("# MachConstantBaseNode (empty encoding)");
 860 }
 861 #endif
 862 
 863 
 864 //=============================================================================
 865 #ifndef PRODUCT
 866 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 867 {
 868   Compile* C = ra_->C;
 869 
 870   int framesize = C->frame_slots() << LogBytesPerInt;
 871   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 872   // Remove wordSize for return adr already pushed
 873   // and another for the RBP we are going to save
 874   framesize -= 2*wordSize;
 875   bool need_nop = true;
 876 
 877   // Calls to C2R adapters often do not accept exceptional returns.
 878   // We require that their callers must bang for them.  But be
 879   // careful, because some VM calls (such as call site linkage) can
 880   // use several kilobytes of stack.  But the stack safety zone should
 881   // account for that.  See bugs 4446381, 4468289, 4497237.
 882   if (C->need_stack_bang(framesize)) {
 883     st->print_cr("# stack bang"); st->print("\t");
 884     need_nop = false;
 885   }
 886   st->print_cr("pushq   rbp"); st->print("\t");
 887 
 888   if (VerifyStackAtCalls) {
 889     // Majik cookie to verify stack depth
 890     st->print_cr("pushq   0xffffffffbadb100d"
 891                   "\t# Majik cookie for stack depth check");
 892     st->print("\t");
 893     framesize -= wordSize; // Remove 2 for cookie
 894     need_nop = false;
 895   }
 896 
 897   if (framesize) {
 898     st->print("subq    rsp, #%d\t# Create frame", framesize);
 899     if (framesize < 0x80 && need_nop) {
 900       st->print("\n\tnop\t# nop for patch_verified_entry");
 901     }
 902   }
 903 }
 904 #endif
 905 
 906 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 907 {
 908   Compile* C = ra_->C;
 909 
 910   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 911   // NativeJump::patch_verified_entry will be able to patch out the entry
 912   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 913   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 914   // 6 bytes. So if we don't do the fldcw or the push then we must
 915   // use the 6 byte frame allocation even if we have no frame. :-(
 916   // If method sets FPU control word do it now
 917 
 918   int framesize = C->frame_slots() << LogBytesPerInt;
 919   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 920   // Remove wordSize for return adr already pushed
 921   // and another for the RBP we are going to save
 922   framesize -= 2*wordSize;
 923   bool need_nop = true;
 924 
 925   // Calls to C2R adapters often do not accept exceptional returns.
 926   // We require that their callers must bang for them.  But be
 927   // careful, because some VM calls (such as call site linkage) can
 928   // use several kilobytes of stack.  But the stack safety zone should
 929   // account for that.  See bugs 4446381, 4468289, 4497237.
 930   if (C->need_stack_bang(framesize)) {
 931     MacroAssembler masm(&cbuf);
 932     masm.generate_stack_overflow_check(framesize);
 933     need_nop = false;
 934   }
 935 
 936   // We always push rbp so that on return to interpreter rbp will be
 937   // restored correctly and we can correct the stack.
 938   emit_opcode(cbuf, 0x50 | RBP_enc);
 939 
 940   if (VerifyStackAtCalls) {
 941     // Majik cookie to verify stack depth
 942     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 943     emit_d32(cbuf, 0xbadb100d);
 944     framesize -= wordSize; // Remove 2 for cookie
 945     need_nop = false;
 946   }
 947 
 948   if (framesize) {
 949     emit_opcode(cbuf, Assembler::REX_W);
 950     if (framesize < 0x80) {
 951       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 952       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 953       emit_d8(cbuf, framesize);
 954       if (need_nop) {
 955         emit_opcode(cbuf, 0x90); // nop
 956       }
 957     } else {
 958       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 959       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 960       emit_d32(cbuf, framesize);
 961     }
 962   }
 963 
 964   C->set_frame_complete(cbuf.insts_size());
 965 
 966 #ifdef ASSERT
 967   if (VerifyStackAtCalls) {
 968     Label L;
 969     MacroAssembler masm(&cbuf);
 970     masm.push(rax);
 971     masm.mov(rax, rsp);
 972     masm.andptr(rax, StackAlignmentInBytes-1);
 973     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 974     masm.pop(rax);
 975     masm.jcc(Assembler::equal, L);
 976     masm.stop("Stack is not properly aligned!");
 977     masm.bind(L);
 978   }
 979 #endif
 980 }
 981 
 982 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 983 {
 984   return MachNode::size(ra_); // too many variables; just compute it
 985                               // the hard way
 986 }
 987 
 988 int MachPrologNode::reloc() const
 989 {
 990   return 0; // a large enough number
 991 }
 992 
 993 //=============================================================================
 994 #ifndef PRODUCT
 995 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 996 {
 997   Compile* C = ra_->C;
 998   int framesize = C->frame_slots() << LogBytesPerInt;
 999   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1000   // Remove word for return adr already pushed
1001   // and RBP
1002   framesize -= 2*wordSize;
1003 
1004   if (framesize) {
1005     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
1006     st->print("\t");
1007   }
1008 
1009   st->print_cr("popq   rbp");
1010   if (do_polling() && C->is_method_compilation()) {
1011     st->print("\t");
1012     if (Assembler::is_polling_page_far()) {
1013       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
1014                    "testl  rax, [rscratch1]\t"
1015                    "# Safepoint: poll for GC");
1016     } else {
1017       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
1018                    "# Safepoint: poll for GC");
1019     }
1020   }
1021 }
1022 #endif
1023 
1024 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1025 {
1026   Compile* C = ra_->C;
1027   int framesize = C->frame_slots() << LogBytesPerInt;
1028   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1029   // Remove word for return adr already pushed
1030   // and RBP
1031   framesize -= 2*wordSize;
1032 
1033   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1034 
1035   if (framesize) {
1036     emit_opcode(cbuf, Assembler::REX_W);
1037     if (framesize < 0x80) {
1038       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1039       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1040       emit_d8(cbuf, framesize);
1041     } else {
1042       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1043       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1044       emit_d32(cbuf, framesize);
1045     }
1046   }
1047 
1048   // popq rbp
1049   emit_opcode(cbuf, 0x58 | RBP_enc);
1050 
1051   if (do_polling() && C->is_method_compilation()) {
1052     MacroAssembler _masm(&cbuf);
1053     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1054     if (Assembler::is_polling_page_far()) {
1055       __ lea(rscratch1, polling_page);
1056       __ relocate(relocInfo::poll_return_type);
1057       __ testl(rax, Address(rscratch1, 0));
1058     } else {
1059       __ testl(rax, polling_page);
1060     }
1061   }
1062 }
1063 
1064 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1065 {
1066   return MachNode::size(ra_); // too many variables; just compute it
1067                               // the hard way
1068 }
1069 
1070 int MachEpilogNode::reloc() const
1071 {
1072   return 2; // a large enough number
1073 }
1074 
1075 const Pipeline* MachEpilogNode::pipeline() const
1076 {
1077   return MachNode::pipeline_class();
1078 }
1079 
1080 int MachEpilogNode::safepoint_offset() const
1081 {
1082   return 0;
1083 }
1084 
1085 //=============================================================================
1086 
1087 enum RC {
1088   rc_bad,
1089   rc_int,
1090   rc_float,
1091   rc_stack
1092 };
1093 
1094 static enum RC rc_class(OptoReg::Name reg)
1095 {
1096   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1097 
1098   if (OptoReg::is_stack(reg)) return rc_stack;
1099 
1100   VMReg r = OptoReg::as_VMReg(reg);
1101 
1102   if (r->is_Register()) return rc_int;
1103 
1104   assert(r->is_XMMRegister(), "must be");
1105   return rc_float;
1106 }
1107 
1108 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1109                                        PhaseRegAlloc* ra_,
1110                                        bool do_size,
1111                                        outputStream* st) const
1112 {
1113 
1114   // Get registers to move
1115   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1116   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1117   OptoReg::Name dst_second = ra_->get_reg_second(this);
1118   OptoReg::Name dst_first = ra_->get_reg_first(this);
1119 
1120   enum RC src_second_rc = rc_class(src_second);
1121   enum RC src_first_rc = rc_class(src_first);
1122   enum RC dst_second_rc = rc_class(dst_second);
1123   enum RC dst_first_rc = rc_class(dst_first);
1124 
1125   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1126          "must move at least 1 register" );
1127 
1128   if (src_first == dst_first && src_second == dst_second) {
1129     // Self copy, no move
1130     return 0;
1131   } else if (src_first_rc == rc_stack) {
1132     // mem ->
1133     if (dst_first_rc == rc_stack) {
1134       // mem -> mem
1135       assert(src_second != dst_first, "overlap");
1136       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1137           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1138         // 64-bit
1139         int src_offset = ra_->reg2offset(src_first);
1140         int dst_offset = ra_->reg2offset(dst_first);
1141         if (cbuf) {
1142           emit_opcode(*cbuf, 0xFF);
1143           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1144 
1145           emit_opcode(*cbuf, 0x8F);
1146           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1147 
1148 #ifndef PRODUCT
1149         } else if (!do_size) {
1150           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1151                      "popq    [rsp + #%d]",
1152                      src_offset,
1153                      dst_offset);
1154 #endif
1155         }
1156         return
1157           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1158           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1159       } else {
1160         // 32-bit
1161         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1162         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1163         // No pushl/popl, so:
1164         int src_offset = ra_->reg2offset(src_first);
1165         int dst_offset = ra_->reg2offset(dst_first);
1166         if (cbuf) {
1167           emit_opcode(*cbuf, Assembler::REX_W);
1168           emit_opcode(*cbuf, 0x89);
1169           emit_opcode(*cbuf, 0x44);
1170           emit_opcode(*cbuf, 0x24);
1171           emit_opcode(*cbuf, 0xF8);
1172 
1173           emit_opcode(*cbuf, 0x8B);
1174           encode_RegMem(*cbuf,
1175                         RAX_enc,
1176                         RSP_enc, 0x4, 0, src_offset,
1177                         false);
1178 
1179           emit_opcode(*cbuf, 0x89);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, dst_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, Assembler::REX_W);
1186           emit_opcode(*cbuf, 0x8B);
1187           emit_opcode(*cbuf, 0x44);
1188           emit_opcode(*cbuf, 0x24);
1189           emit_opcode(*cbuf, 0xF8);
1190 
1191 #ifndef PRODUCT
1192         } else if (!do_size) {
1193           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1194                      "movl    rax, [rsp + #%d]\n\t"
1195                      "movl    [rsp + #%d], rax\n\t"
1196                      "movq    rax, [rsp - #8]",
1197                      src_offset,
1198                      dst_offset);
1199 #endif
1200         }
1201         return
1202           5 + // movq
1203           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1204           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1205           5; // movq
1206       }
1207     } else if (dst_first_rc == rc_int) {
1208       // mem -> gpr
1209       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1210           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1211         // 64-bit
1212         int offset = ra_->reg2offset(src_first);
1213         if (cbuf) {
1214           if (Matcher::_regEncode[dst_first] < 8) {
1215             emit_opcode(*cbuf, Assembler::REX_W);
1216           } else {
1217             emit_opcode(*cbuf, Assembler::REX_WR);
1218           }
1219           emit_opcode(*cbuf, 0x8B);
1220           encode_RegMem(*cbuf,
1221                         Matcher::_regEncode[dst_first],
1222                         RSP_enc, 0x4, 0, offset,
1223                         false);
1224 #ifndef PRODUCT
1225         } else if (!do_size) {
1226           st->print("movq    %s, [rsp + #%d]\t# spill",
1227                      Matcher::regName[dst_first],
1228                      offset);
1229 #endif
1230         }
1231         return
1232           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1233       } else {
1234         // 32-bit
1235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1237         int offset = ra_->reg2offset(src_first);
1238         if (cbuf) {
1239           if (Matcher::_regEncode[dst_first] >= 8) {
1240             emit_opcode(*cbuf, Assembler::REX_R);
1241           }
1242           emit_opcode(*cbuf, 0x8B);
1243           encode_RegMem(*cbuf,
1244                         Matcher::_regEncode[dst_first],
1245                         RSP_enc, 0x4, 0, offset,
1246                         false);
1247 #ifndef PRODUCT
1248         } else if (!do_size) {
1249           st->print("movl    %s, [rsp + #%d]\t# spill",
1250                      Matcher::regName[dst_first],
1251                      offset);
1252 #endif
1253         }
1254         return
1255           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1256           ((Matcher::_regEncode[dst_first] < 8)
1257            ? 3
1258            : 4); // REX
1259       }
1260     } else if (dst_first_rc == rc_float) {
1261       // mem-> xmm
1262       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1263           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1264         // 64-bit
1265         int offset = ra_->reg2offset(src_first);
1266         if (cbuf) {
1267           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1268           if (Matcher::_regEncode[dst_first] >= 8) {
1269             emit_opcode(*cbuf, Assembler::REX_R);
1270           }
1271           emit_opcode(*cbuf, 0x0F);
1272           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1273           encode_RegMem(*cbuf,
1274                         Matcher::_regEncode[dst_first],
1275                         RSP_enc, 0x4, 0, offset,
1276                         false);
1277 #ifndef PRODUCT
1278         } else if (!do_size) {
1279           st->print("%s  %s, [rsp + #%d]\t# spill",
1280                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1281                      Matcher::regName[dst_first],
1282                      offset);
1283 #endif
1284         }
1285         return
1286           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1287           ((Matcher::_regEncode[dst_first] < 8)
1288            ? 5
1289            : 6); // REX
1290       } else {
1291         // 32-bit
1292         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1293         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1294         int offset = ra_->reg2offset(src_first);
1295         if (cbuf) {
1296           emit_opcode(*cbuf, 0xF3);
1297           if (Matcher::_regEncode[dst_first] >= 8) {
1298             emit_opcode(*cbuf, Assembler::REX_R);
1299           }
1300           emit_opcode(*cbuf, 0x0F);
1301           emit_opcode(*cbuf, 0x10);
1302           encode_RegMem(*cbuf,
1303                         Matcher::_regEncode[dst_first],
1304                         RSP_enc, 0x4, 0, offset,
1305                         false);
1306 #ifndef PRODUCT
1307         } else if (!do_size) {
1308           st->print("movss   %s, [rsp + #%d]\t# spill",
1309                      Matcher::regName[dst_first],
1310                      offset);
1311 #endif
1312         }
1313         return
1314           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1315           ((Matcher::_regEncode[dst_first] < 8)
1316            ? 5
1317            : 6); // REX
1318       }
1319     }
1320   } else if (src_first_rc == rc_int) {
1321     // gpr ->
1322     if (dst_first_rc == rc_stack) {
1323       // gpr -> mem
1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1326         // 64-bit
1327         int offset = ra_->reg2offset(dst_first);
1328         if (cbuf) {
1329           if (Matcher::_regEncode[src_first] < 8) {
1330             emit_opcode(*cbuf, Assembler::REX_W);
1331           } else {
1332             emit_opcode(*cbuf, Assembler::REX_WR);
1333           }
1334           emit_opcode(*cbuf, 0x89);
1335           encode_RegMem(*cbuf,
1336                         Matcher::_regEncode[src_first],
1337                         RSP_enc, 0x4, 0, offset,
1338                         false);
1339 #ifndef PRODUCT
1340         } else if (!do_size) {
1341           st->print("movq    [rsp + #%d], %s\t# spill",
1342                      offset,
1343                      Matcher::regName[src_first]);
1344 #endif
1345         }
1346         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1347       } else {
1348         // 32-bit
1349         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1350         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1351         int offset = ra_->reg2offset(dst_first);
1352         if (cbuf) {
1353           if (Matcher::_regEncode[src_first] >= 8) {
1354             emit_opcode(*cbuf, Assembler::REX_R);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movl    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return
1369           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1370           ((Matcher::_regEncode[src_first] < 8)
1371            ? 3
1372            : 4); // REX
1373       }
1374     } else if (dst_first_rc == rc_int) {
1375       // gpr -> gpr
1376       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1377           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1378         // 64-bit
1379         if (cbuf) {
1380           if (Matcher::_regEncode[dst_first] < 8) {
1381             if (Matcher::_regEncode[src_first] < 8) {
1382               emit_opcode(*cbuf, Assembler::REX_W);
1383             } else {
1384               emit_opcode(*cbuf, Assembler::REX_WB);
1385             }
1386           } else {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_WR);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WRB);
1391             }
1392           }
1393           emit_opcode(*cbuf, 0x8B);
1394           emit_rm(*cbuf, 0x3,
1395                   Matcher::_regEncode[dst_first] & 7,
1396                   Matcher::_regEncode[src_first] & 7);
1397 #ifndef PRODUCT
1398         } else if (!do_size) {
1399           st->print("movq    %s, %s\t# spill",
1400                      Matcher::regName[dst_first],
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404         return 3; // REX
1405       } else {
1406         // 32-bit
1407         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1408         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1409         if (cbuf) {
1410           if (Matcher::_regEncode[dst_first] < 8) {
1411             if (Matcher::_regEncode[src_first] >= 8) {
1412               emit_opcode(*cbuf, Assembler::REX_B);
1413             }
1414           } else {
1415             if (Matcher::_regEncode[src_first] < 8) {
1416               emit_opcode(*cbuf, Assembler::REX_R);
1417             } else {
1418               emit_opcode(*cbuf, Assembler::REX_RB);
1419             }
1420           }
1421           emit_opcode(*cbuf, 0x8B);
1422           emit_rm(*cbuf, 0x3,
1423                   Matcher::_regEncode[dst_first] & 7,
1424                   Matcher::_regEncode[src_first] & 7);
1425 #ifndef PRODUCT
1426         } else if (!do_size) {
1427           st->print("movl    %s, %s\t# spill",
1428                      Matcher::regName[dst_first],
1429                      Matcher::regName[src_first]);
1430 #endif
1431         }
1432         return
1433           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1434           ? 2
1435           : 3; // REX
1436       }
1437     } else if (dst_first_rc == rc_float) {
1438       // gpr -> xmm
1439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1441         // 64-bit
1442         if (cbuf) {
1443           emit_opcode(*cbuf, 0x66);
1444           if (Matcher::_regEncode[dst_first] < 8) {
1445             if (Matcher::_regEncode[src_first] < 8) {
1446               emit_opcode(*cbuf, Assembler::REX_W);
1447             } else {
1448               emit_opcode(*cbuf, Assembler::REX_WB);
1449             }
1450           } else {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_WR);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WRB);
1455             }
1456           }
1457           emit_opcode(*cbuf, 0x0F);
1458           emit_opcode(*cbuf, 0x6E);
1459           emit_rm(*cbuf, 0x3,
1460                   Matcher::_regEncode[dst_first] & 7,
1461                   Matcher::_regEncode[src_first] & 7);
1462 #ifndef PRODUCT
1463         } else if (!do_size) {
1464           st->print("movdq   %s, %s\t# spill",
1465                      Matcher::regName[dst_first],
1466                      Matcher::regName[src_first]);
1467 #endif
1468         }
1469         return 5; // REX
1470       } else {
1471         // 32-bit
1472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1474         if (cbuf) {
1475           emit_opcode(*cbuf, 0x66);
1476           if (Matcher::_regEncode[dst_first] < 8) {
1477             if (Matcher::_regEncode[src_first] >= 8) {
1478               emit_opcode(*cbuf, Assembler::REX_B);
1479             }
1480           } else {
1481             if (Matcher::_regEncode[src_first] < 8) {
1482               emit_opcode(*cbuf, Assembler::REX_R);
1483             } else {
1484               emit_opcode(*cbuf, Assembler::REX_RB);
1485             }
1486           }
1487           emit_opcode(*cbuf, 0x0F);
1488           emit_opcode(*cbuf, 0x6E);
1489           emit_rm(*cbuf, 0x3,
1490                   Matcher::_regEncode[dst_first] & 7,
1491                   Matcher::_regEncode[src_first] & 7);
1492 #ifndef PRODUCT
1493         } else if (!do_size) {
1494           st->print("movdl   %s, %s\t# spill",
1495                      Matcher::regName[dst_first],
1496                      Matcher::regName[src_first]);
1497 #endif
1498         }
1499         return
1500           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1501           ? 4
1502           : 5; // REX
1503       }
1504     }
1505   } else if (src_first_rc == rc_float) {
1506     // xmm ->
1507     if (dst_first_rc == rc_stack) {
1508       // xmm -> mem
1509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1511         // 64-bit
1512         int offset = ra_->reg2offset(dst_first);
1513         if (cbuf) {
1514           emit_opcode(*cbuf, 0xF2);
1515           if (Matcher::_regEncode[src_first] >= 8) {
1516               emit_opcode(*cbuf, Assembler::REX_R);
1517           }
1518           emit_opcode(*cbuf, 0x0F);
1519           emit_opcode(*cbuf, 0x11);
1520           encode_RegMem(*cbuf,
1521                         Matcher::_regEncode[src_first],
1522                         RSP_enc, 0x4, 0, offset,
1523                         false);
1524 #ifndef PRODUCT
1525         } else if (!do_size) {
1526           st->print("movsd   [rsp + #%d], %s\t# spill",
1527                      offset,
1528                      Matcher::regName[src_first]);
1529 #endif
1530         }
1531         return
1532           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1533           ((Matcher::_regEncode[src_first] < 8)
1534            ? 5
1535            : 6); // REX
1536       } else {
1537         // 32-bit
1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1540         int offset = ra_->reg2offset(dst_first);
1541         if (cbuf) {
1542           emit_opcode(*cbuf, 0xF3);
1543           if (Matcher::_regEncode[src_first] >= 8) {
1544               emit_opcode(*cbuf, Assembler::REX_R);
1545           }
1546           emit_opcode(*cbuf, 0x0F);
1547           emit_opcode(*cbuf, 0x11);
1548           encode_RegMem(*cbuf,
1549                         Matcher::_regEncode[src_first],
1550                         RSP_enc, 0x4, 0, offset,
1551                         false);
1552 #ifndef PRODUCT
1553         } else if (!do_size) {
1554           st->print("movss   [rsp + #%d], %s\t# spill",
1555                      offset,
1556                      Matcher::regName[src_first]);
1557 #endif
1558         }
1559         return
1560           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1561           ((Matcher::_regEncode[src_first] < 8)
1562            ? 5
1563            : 6); // REX
1564       }
1565     } else if (dst_first_rc == rc_int) {
1566       // xmm -> gpr
1567       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1568           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1569         // 64-bit
1570         if (cbuf) {
1571           emit_opcode(*cbuf, 0x66);
1572           if (Matcher::_regEncode[dst_first] < 8) {
1573             if (Matcher::_regEncode[src_first] < 8) {
1574               emit_opcode(*cbuf, Assembler::REX_W);
1575             } else {
1576               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1577             }
1578           } else {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WRB);
1583             }
1584           }
1585           emit_opcode(*cbuf, 0x0F);
1586           emit_opcode(*cbuf, 0x7E);
1587           emit_rm(*cbuf, 0x3,
1588                   Matcher::_regEncode[src_first] & 7,
1589                   Matcher::_regEncode[dst_first] & 7);
1590 #ifndef PRODUCT
1591         } else if (!do_size) {
1592           st->print("movdq   %s, %s\t# spill",
1593                      Matcher::regName[dst_first],
1594                      Matcher::regName[src_first]);
1595 #endif
1596         }
1597         return 5; // REX
1598       } else {
1599         // 32-bit
1600         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1601         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1602         if (cbuf) {
1603           emit_opcode(*cbuf, 0x66);
1604           if (Matcher::_regEncode[dst_first] < 8) {
1605             if (Matcher::_regEncode[src_first] >= 8) {
1606               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1607             }
1608           } else {
1609             if (Matcher::_regEncode[src_first] < 8) {
1610               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1611             } else {
1612               emit_opcode(*cbuf, Assembler::REX_RB);
1613             }
1614           }
1615           emit_opcode(*cbuf, 0x0F);
1616           emit_opcode(*cbuf, 0x7E);
1617           emit_rm(*cbuf, 0x3,
1618                   Matcher::_regEncode[src_first] & 7,
1619                   Matcher::_regEncode[dst_first] & 7);
1620 #ifndef PRODUCT
1621         } else if (!do_size) {
1622           st->print("movdl   %s, %s\t# spill",
1623                      Matcher::regName[dst_first],
1624                      Matcher::regName[src_first]);
1625 #endif
1626         }
1627         return
1628           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1629           ? 4
1630           : 5; // REX
1631       }
1632     } else if (dst_first_rc == rc_float) {
1633       // xmm -> xmm
1634       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1635           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1636         // 64-bit
1637         if (cbuf) {
1638           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1639           if (Matcher::_regEncode[dst_first] < 8) {
1640             if (Matcher::_regEncode[src_first] >= 8) {
1641               emit_opcode(*cbuf, Assembler::REX_B);
1642             }
1643           } else {
1644             if (Matcher::_regEncode[src_first] < 8) {
1645               emit_opcode(*cbuf, Assembler::REX_R);
1646             } else {
1647               emit_opcode(*cbuf, Assembler::REX_RB);
1648             }
1649           }
1650           emit_opcode(*cbuf, 0x0F);
1651           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1652           emit_rm(*cbuf, 0x3,
1653                   Matcher::_regEncode[dst_first] & 7,
1654                   Matcher::_regEncode[src_first] & 7);
1655 #ifndef PRODUCT
1656         } else if (!do_size) {
1657           st->print("%s  %s, %s\t# spill",
1658                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1659                      Matcher::regName[dst_first],
1660                      Matcher::regName[src_first]);
1661 #endif
1662         }
1663         return
1664           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1665           ? 4
1666           : 5; // REX
1667       } else {
1668         // 32-bit
1669         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1670         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1671         if (cbuf) {
1672           if (!UseXmmRegToRegMoveAll)
1673             emit_opcode(*cbuf, 0xF3);
1674           if (Matcher::_regEncode[dst_first] < 8) {
1675             if (Matcher::_regEncode[src_first] >= 8) {
1676               emit_opcode(*cbuf, Assembler::REX_B);
1677             }
1678           } else {
1679             if (Matcher::_regEncode[src_first] < 8) {
1680               emit_opcode(*cbuf, Assembler::REX_R);
1681             } else {
1682               emit_opcode(*cbuf, Assembler::REX_RB);
1683             }
1684           }
1685           emit_opcode(*cbuf, 0x0F);
1686           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1687           emit_rm(*cbuf, 0x3,
1688                   Matcher::_regEncode[dst_first] & 7,
1689                   Matcher::_regEncode[src_first] & 7);
1690 #ifndef PRODUCT
1691         } else if (!do_size) {
1692           st->print("%s  %s, %s\t# spill",
1693                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1694                      Matcher::regName[dst_first],
1695                      Matcher::regName[src_first]);
1696 #endif
1697         }
1698         return
1699           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1700           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1701           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1702       }
1703     }
1704   }
1705 
1706   assert(0," foo ");
1707   Unimplemented();
1708 
1709   return 0;
1710 }
1711 
1712 #ifndef PRODUCT
1713 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1714 {
1715   implementation(NULL, ra_, false, st);
1716 }
1717 #endif
1718 
1719 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1720 {
1721   implementation(&cbuf, ra_, false, NULL);
1722 }
1723 
1724 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1725 {
1726   return implementation(NULL, ra_, true, NULL);
1727 }
1728 
1729 //=============================================================================
1730 #ifndef PRODUCT
1731 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1732 {
1733   st->print("nop \t# %d bytes pad for loops and calls", _count);
1734 }
1735 #endif
1736 
1737 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1738 {
1739   MacroAssembler _masm(&cbuf);
1740   __ nop(_count);
1741 }
1742 
1743 uint MachNopNode::size(PhaseRegAlloc*) const
1744 {
1745   return _count;
1746 }
1747 
1748 
1749 //=============================================================================
1750 #ifndef PRODUCT
1751 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1752 {
1753   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1754   int reg = ra_->get_reg_first(this);
1755   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1756             Matcher::regName[reg], offset);
1757 }
1758 #endif
1759 
1760 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1761 {
1762   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1763   int reg = ra_->get_encode(this);
1764   if (offset >= 0x80) {
1765     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1766     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1767     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1768     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1769     emit_d32(cbuf, offset);
1770   } else {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d8(cbuf, offset);
1776   }
1777 }
1778 
1779 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1780 {
1781   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1782   return (offset < 0x80) ? 5 : 8; // REX
1783 }
1784 
1785 //=============================================================================
1786 
1787 // emit call stub, compiled java to interpreter
1788 void emit_java_to_interp(CodeBuffer& cbuf)
1789 {
1790   // Stub is fixed up when the corresponding call is converted from
1791   // calling compiled code to calling interpreted code.
1792   // movq rbx, 0
1793   // jmp -5 # to self
1794 
1795   address mark = cbuf.insts_mark();  // get mark within main instrs section
1796 
1797   // Note that the code buffer's insts_mark is always relative to insts.
1798   // That's why we must use the macroassembler to generate a stub.
1799   MacroAssembler _masm(&cbuf);
1800 
1801   address base =
1802   __ start_a_stub(Compile::MAX_stubs_size);
1803   if (base == NULL)  return;  // CodeBuffer::expand failed
1804   // static stub relocation stores the instruction address of the call
1805   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1806   // static stub relocation also tags the methodOop in the code-stream.
1807   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1808   // This is recognized as unresolved by relocs/nativeinst/ic code
1809   __ jump(RuntimeAddress(__ pc()));
1810 
1811   // Update current stubs pointer and restore insts_end.
1812   __ end_a_stub();
1813 }
1814 
1815 // size of call stub, compiled java to interpretor
1816 uint size_java_to_interp()
1817 {
1818   return 15;  // movq (1+1+8); jmp (1+4)
1819 }
1820 
1821 // relocation entries for call stub, compiled java to interpretor
1822 uint reloc_java_to_interp()
1823 {
1824   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1825 }
1826 
1827 //=============================================================================
1828 #ifndef PRODUCT
1829 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1830 {
1831   if (UseCompressedOops) {
1832     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1833     if (Universe::narrow_oop_shift() != 0) {
1834       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1835     }
1836     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1837   } else {
1838     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1839                  "# Inline cache check");
1840   }
1841   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1842   st->print_cr("\tnop\t# nops to align entry point");
1843 }
1844 #endif
1845 
1846 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1847 {
1848   MacroAssembler masm(&cbuf);
1849   uint insts_size = cbuf.insts_size();
1850   if (UseCompressedOops) {
1851     masm.load_klass(rscratch1, j_rarg0);
1852     masm.cmpptr(rax, rscratch1);
1853   } else {
1854     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1855   }
1856 
1857   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1858 
1859   /* WARNING these NOPs are critical so that verified entry point is properly
1860      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1861   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1862   if (OptoBreakpoint) {
1863     // Leave space for int3
1864     nops_cnt -= 1;
1865   }
1866   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1867   if (nops_cnt > 0)
1868     masm.nop(nops_cnt);
1869 }
1870 
1871 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1872 {
1873   return MachNode::size(ra_); // too many variables; just compute it
1874                               // the hard way
1875 }
1876 
1877 
1878 //=============================================================================
1879 uint size_exception_handler()
1880 {
1881   // NativeCall instruction size is the same as NativeJump.
1882   // Note that this value is also credited (in output.cpp) to
1883   // the size of the code section.
1884   return NativeJump::instruction_size;
1885 }
1886 
1887 // Emit exception handler code.
1888 int emit_exception_handler(CodeBuffer& cbuf)
1889 {
1890 
1891   // Note that the code buffer's insts_mark is always relative to insts.
1892   // That's why we must use the macroassembler to generate a handler.
1893   MacroAssembler _masm(&cbuf);
1894   address base =
1895   __ start_a_stub(size_exception_handler());
1896   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1897   int offset = __ offset();
1898   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1899   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1900   __ end_a_stub();
1901   return offset;
1902 }
1903 
1904 uint size_deopt_handler()
1905 {
1906   // three 5 byte instructions
1907   return 15;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's insts_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_deopt_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   address the_pc = (address) __ pc();
1922   Label next;
1923   // push a "the_pc" on the stack without destroying any registers
1924   // as they all may be live.
1925 
1926   // push address of "next"
1927   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1928   __ bind(next);
1929   // adjust it so it matches "the_pc"
1930   __ subptr(Address(rsp, 0), __ offset() - offset);
1931   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1932   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1933   __ end_a_stub();
1934   return offset;
1935 }
1936 
1937 
1938 const bool Matcher::match_rule_supported(int opcode) {
1939   if (!has_match_rule(opcode))
1940     return false;
1941 
1942   return true;  // Per default match rules are supported.
1943 }
1944 
1945 int Matcher::regnum_to_fpu_offset(int regnum)
1946 {
1947   return regnum - 32; // The FP registers are in the second chunk
1948 }
1949 
1950 // This is UltraSparc specific, true just means we have fast l2f conversion
1951 const bool Matcher::convL2FSupported(void) {
1952   return true;
1953 }
1954 
1955 // Vector width in bytes
1956 const uint Matcher::vector_width_in_bytes(void) {
1957   return 8;
1958 }
1959 
1960 // Vector ideal reg
1961 const uint Matcher::vector_ideal_reg(void) {
1962   return Op_RegD;
1963 }
1964 
1965 // Is this branch offset short enough that a short branch can be used?
1966 //
1967 // NOTE: If the platform does not provide any short branch variants, then
1968 //       this method should return false for offset 0.
1969 bool Matcher::is_short_branch_offset(int rule, int offset) {
1970   // the short version of jmpConUCF2 contains multiple branches,
1971   // making the reach slightly less
1972   if (rule == jmpConUCF2_rule)
1973     return (-126 <= offset && offset <= 125);
1974   return (-128 <= offset && offset <= 127);
1975 }
1976 
1977 const bool Matcher::isSimpleConstant64(jlong value) {
1978   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1979   //return value == (int) value;  // Cf. storeImmL and immL32.
1980 
1981   // Probably always true, even if a temp register is required.
1982   return true;
1983 }
1984 
1985 // The ecx parameter to rep stosq for the ClearArray node is in words.
1986 const bool Matcher::init_array_count_is_in_bytes = false;
1987 
1988 // Threshold size for cleararray.
1989 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1990 
1991 // Should the Matcher clone shifts on addressing modes, expecting them
1992 // to be subsumed into complex addressing expressions or compute them
1993 // into registers?  True for Intel but false for most RISCs
1994 const bool Matcher::clone_shift_expressions = true;
1995 
1996 // Do we need to mask the count passed to shift instructions or does
1997 // the cpu only look at the lower 5/6 bits anyway?
1998 const bool Matcher::need_masked_shift_count = false;
1999 
2000 bool Matcher::narrow_oop_use_complex_address() {
2001   assert(UseCompressedOops, "only for compressed oops code");
2002   return (LogMinObjAlignmentInBytes <= 3);
2003 }
2004 
2005 // Is it better to copy float constants, or load them directly from
2006 // memory?  Intel can load a float constant from a direct address,
2007 // requiring no extra registers.  Most RISCs will have to materialize
2008 // an address into a register first, so they would do better to copy
2009 // the constant from stack.
2010 const bool Matcher::rematerialize_float_constants = true; // XXX
2011 
2012 // If CPU can load and store mis-aligned doubles directly then no
2013 // fixup is needed.  Else we split the double into 2 integer pieces
2014 // and move it piece-by-piece.  Only happens when passing doubles into
2015 // C code as the Java calling convention forces doubles to be aligned.
2016 const bool Matcher::misaligned_doubles_ok = true;
2017 
2018 // No-op on amd64
2019 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2020 
2021 // Advertise here if the CPU requires explicit rounding operations to
2022 // implement the UseStrictFP mode.
2023 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2024 
2025 // Are floats conerted to double when stored to stack during deoptimization?
2026 // On x64 it is stored without convertion so we can use normal access.
2027 bool Matcher::float_in_double() { return false; }
2028 
2029 // Do ints take an entire long register or just half?
2030 const bool Matcher::int_in_long = true;
2031 
2032 // Return whether or not this register is ever used as an argument.
2033 // This function is used on startup to build the trampoline stubs in
2034 // generateOptoStub.  Registers not mentioned will be killed by the VM
2035 // call in the trampoline, and arguments in those registers not be
2036 // available to the callee.
2037 bool Matcher::can_be_java_arg(int reg)
2038 {
2039   return
2040     reg ==  RDI_num || reg ==  RDI_H_num ||
2041     reg ==  RSI_num || reg ==  RSI_H_num ||
2042     reg ==  RDX_num || reg ==  RDX_H_num ||
2043     reg ==  RCX_num || reg ==  RCX_H_num ||
2044     reg ==   R8_num || reg ==   R8_H_num ||
2045     reg ==   R9_num || reg ==   R9_H_num ||
2046     reg ==  R12_num || reg ==  R12_H_num ||
2047     reg == XMM0_num || reg == XMM0_H_num ||
2048     reg == XMM1_num || reg == XMM1_H_num ||
2049     reg == XMM2_num || reg == XMM2_H_num ||
2050     reg == XMM3_num || reg == XMM3_H_num ||
2051     reg == XMM4_num || reg == XMM4_H_num ||
2052     reg == XMM5_num || reg == XMM5_H_num ||
2053     reg == XMM6_num || reg == XMM6_H_num ||
2054     reg == XMM7_num || reg == XMM7_H_num;
2055 }
2056 
2057 bool Matcher::is_spillable_arg(int reg)
2058 {
2059   return can_be_java_arg(reg);
2060 }
2061 
2062 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2063   // In 64 bit mode a code which use multiply when
2064   // devisor is constant is faster than hardware
2065   // DIV instruction (it uses MulHiL).
2066   return false;
2067 }
2068 
2069 // Register for DIVI projection of divmodI
2070 RegMask Matcher::divI_proj_mask() {
2071   return INT_RAX_REG_mask;
2072 }
2073 
2074 // Register for MODI projection of divmodI
2075 RegMask Matcher::modI_proj_mask() {
2076   return INT_RDX_REG_mask;
2077 }
2078 
2079 // Register for DIVL projection of divmodL
2080 RegMask Matcher::divL_proj_mask() {
2081   return LONG_RAX_REG_mask;
2082 }
2083 
2084 // Register for MODL projection of divmodL
2085 RegMask Matcher::modL_proj_mask() {
2086   return LONG_RDX_REG_mask;
2087 }
2088 
2089 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2090   return PTR_RBP_REG_mask;
2091 }
2092 
2093 static Address build_address(int b, int i, int s, int d) {
2094   Register index = as_Register(i);
2095   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2096   if (index == rsp) {
2097     index = noreg;
2098     scale = Address::no_scale;
2099   }
2100   Address addr(as_Register(b), index, scale, d);
2101   return addr;
2102 }
2103 
2104 %}
2105 
2106 //----------ENCODING BLOCK-----------------------------------------------------
2107 // This block specifies the encoding classes used by the compiler to
2108 // output byte streams.  Encoding classes are parameterized macros
2109 // used by Machine Instruction Nodes in order to generate the bit
2110 // encoding of the instruction.  Operands specify their base encoding
2111 // interface with the interface keyword.  There are currently
2112 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2113 // COND_INTER.  REG_INTER causes an operand to generate a function
2114 // which returns its register number when queried.  CONST_INTER causes
2115 // an operand to generate a function which returns the value of the
2116 // constant when queried.  MEMORY_INTER causes an operand to generate
2117 // four functions which return the Base Register, the Index Register,
2118 // the Scale Value, and the Offset Value of the operand when queried.
2119 // COND_INTER causes an operand to generate six functions which return
2120 // the encoding code (ie - encoding bits for the instruction)
2121 // associated with each basic boolean condition for a conditional
2122 // instruction.
2123 //
2124 // Instructions specify two basic values for encoding.  Again, a
2125 // function is available to check if the constant displacement is an
2126 // oop. They use the ins_encode keyword to specify their encoding
2127 // classes (which must be a sequence of enc_class names, and their
2128 // parameters, specified in the encoding block), and they use the
2129 // opcode keyword to specify, in order, their primary, secondary, and
2130 // tertiary opcode.  Only the opcode sections which a particular
2131 // instruction needs for encoding need to be specified.
2132 encode %{
2133   // Build emit functions for each basic byte or larger field in the
2134   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2135   // from C++ code in the enc_class source block.  Emit functions will
2136   // live in the main source block for now.  In future, we can
2137   // generalize this by adding a syntax that specifies the sizes of
2138   // fields in an order, so that the adlc can build the emit functions
2139   // automagically
2140 
2141   // Emit primary opcode
2142   enc_class OpcP
2143   %{
2144     emit_opcode(cbuf, $primary);
2145   %}
2146 
2147   // Emit secondary opcode
2148   enc_class OpcS
2149   %{
2150     emit_opcode(cbuf, $secondary);
2151   %}
2152 
2153   // Emit tertiary opcode
2154   enc_class OpcT
2155   %{
2156     emit_opcode(cbuf, $tertiary);
2157   %}
2158 
2159   // Emit opcode directly
2160   enc_class Opcode(immI d8)
2161   %{
2162     emit_opcode(cbuf, $d8$$constant);
2163   %}
2164 
2165   // Emit size prefix
2166   enc_class SizePrefix
2167   %{
2168     emit_opcode(cbuf, 0x66);
2169   %}
2170 
2171   enc_class reg(rRegI reg)
2172   %{
2173     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2174   %}
2175 
2176   enc_class reg_reg(rRegI dst, rRegI src)
2177   %{
2178     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2179   %}
2180 
2181   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2182   %{
2183     emit_opcode(cbuf, $opcode$$constant);
2184     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2185   %}
2186 
2187   enc_class cmpfp_fixup() %{
2188       MacroAssembler _masm(&cbuf);
2189       emit_cmpfp_fixup(_masm);
2190   %}
2191 
2192   enc_class cmpfp3(rRegI dst)
2193   %{
2194     int dstenc = $dst$$reg;
2195 
2196     // movl $dst, -1
2197     if (dstenc >= 8) {
2198       emit_opcode(cbuf, Assembler::REX_B);
2199     }
2200     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2201     emit_d32(cbuf, -1);
2202 
2203     // jp,s done
2204     emit_opcode(cbuf, 0x7A);
2205     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2206 
2207     // jb,s done
2208     emit_opcode(cbuf, 0x72);
2209     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2210 
2211     // setne $dst
2212     if (dstenc >= 4) {
2213       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2214     }
2215     emit_opcode(cbuf, 0x0F);
2216     emit_opcode(cbuf, 0x95);
2217     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2218 
2219     // movzbl $dst, $dst
2220     if (dstenc >= 4) {
2221       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2222     }
2223     emit_opcode(cbuf, 0x0F);
2224     emit_opcode(cbuf, 0xB6);
2225     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2226   %}
2227 
2228   enc_class cdql_enc(no_rax_rdx_RegI div)
2229   %{
2230     // Full implementation of Java idiv and irem; checks for
2231     // special case as described in JVM spec., p.243 & p.271.
2232     //
2233     //         normal case                           special case
2234     //
2235     // input : rax: dividend                         min_int
2236     //         reg: divisor                          -1
2237     //
2238     // output: rax: quotient  (= rax idiv reg)       min_int
2239     //         rdx: remainder (= rax irem reg)       0
2240     //
2241     //  Code sequnce:
2242     //
2243     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2244     //    5:   75 07/08                jne    e <normal>
2245     //    7:   33 d2                   xor    %edx,%edx
2246     //  [div >= 8 -> offset + 1]
2247     //  [REX_B]
2248     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2249     //    c:   74 03/04                je     11 <done>
2250     // 000000000000000e <normal>:
2251     //    e:   99                      cltd
2252     //  [div >= 8 -> offset + 1]
2253     //  [REX_B]
2254     //    f:   f7 f9                   idiv   $div
2255     // 0000000000000011 <done>:
2256 
2257     // cmp    $0x80000000,%eax
2258     emit_opcode(cbuf, 0x3d);
2259     emit_d8(cbuf, 0x00);
2260     emit_d8(cbuf, 0x00);
2261     emit_d8(cbuf, 0x00);
2262     emit_d8(cbuf, 0x80);
2263 
2264     // jne    e <normal>
2265     emit_opcode(cbuf, 0x75);
2266     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2267 
2268     // xor    %edx,%edx
2269     emit_opcode(cbuf, 0x33);
2270     emit_d8(cbuf, 0xD2);
2271 
2272     // cmp    $0xffffffffffffffff,%ecx
2273     if ($div$$reg >= 8) {
2274       emit_opcode(cbuf, Assembler::REX_B);
2275     }
2276     emit_opcode(cbuf, 0x83);
2277     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2278     emit_d8(cbuf, 0xFF);
2279 
2280     // je     11 <done>
2281     emit_opcode(cbuf, 0x74);
2282     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2283 
2284     // <normal>
2285     // cltd
2286     emit_opcode(cbuf, 0x99);
2287 
2288     // idivl (note: must be emitted by the user of this rule)
2289     // <done>
2290   %}
2291 
2292   enc_class cdqq_enc(no_rax_rdx_RegL div)
2293   %{
2294     // Full implementation of Java ldiv and lrem; checks for
2295     // special case as described in JVM spec., p.243 & p.271.
2296     //
2297     //         normal case                           special case
2298     //
2299     // input : rax: dividend                         min_long
2300     //         reg: divisor                          -1
2301     //
2302     // output: rax: quotient  (= rax idiv reg)       min_long
2303     //         rdx: remainder (= rax irem reg)       0
2304     //
2305     //  Code sequnce:
2306     //
2307     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2308     //    7:   00 00 80
2309     //    a:   48 39 d0                cmp    %rdx,%rax
2310     //    d:   75 08                   jne    17 <normal>
2311     //    f:   33 d2                   xor    %edx,%edx
2312     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2313     //   15:   74 05                   je     1c <done>
2314     // 0000000000000017 <normal>:
2315     //   17:   48 99                   cqto
2316     //   19:   48 f7 f9                idiv   $div
2317     // 000000000000001c <done>:
2318 
2319     // mov    $0x8000000000000000,%rdx
2320     emit_opcode(cbuf, Assembler::REX_W);
2321     emit_opcode(cbuf, 0xBA);
2322     emit_d8(cbuf, 0x00);
2323     emit_d8(cbuf, 0x00);
2324     emit_d8(cbuf, 0x00);
2325     emit_d8(cbuf, 0x00);
2326     emit_d8(cbuf, 0x00);
2327     emit_d8(cbuf, 0x00);
2328     emit_d8(cbuf, 0x00);
2329     emit_d8(cbuf, 0x80);
2330 
2331     // cmp    %rdx,%rax
2332     emit_opcode(cbuf, Assembler::REX_W);
2333     emit_opcode(cbuf, 0x39);
2334     emit_d8(cbuf, 0xD0);
2335 
2336     // jne    17 <normal>
2337     emit_opcode(cbuf, 0x75);
2338     emit_d8(cbuf, 0x08);
2339 
2340     // xor    %edx,%edx
2341     emit_opcode(cbuf, 0x33);
2342     emit_d8(cbuf, 0xD2);
2343 
2344     // cmp    $0xffffffffffffffff,$div
2345     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2346     emit_opcode(cbuf, 0x83);
2347     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2348     emit_d8(cbuf, 0xFF);
2349 
2350     // je     1e <done>
2351     emit_opcode(cbuf, 0x74);
2352     emit_d8(cbuf, 0x05);
2353 
2354     // <normal>
2355     // cqto
2356     emit_opcode(cbuf, Assembler::REX_W);
2357     emit_opcode(cbuf, 0x99);
2358 
2359     // idivq (note: must be emitted by the user of this rule)
2360     // <done>
2361   %}
2362 
2363   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2364   enc_class OpcSE(immI imm)
2365   %{
2366     // Emit primary opcode and set sign-extend bit
2367     // Check for 8-bit immediate, and set sign extend bit in opcode
2368     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2369       emit_opcode(cbuf, $primary | 0x02);
2370     } else {
2371       // 32-bit immediate
2372       emit_opcode(cbuf, $primary);
2373     }
2374   %}
2375 
2376   enc_class OpcSErm(rRegI dst, immI imm)
2377   %{
2378     // OpcSEr/m
2379     int dstenc = $dst$$reg;
2380     if (dstenc >= 8) {
2381       emit_opcode(cbuf, Assembler::REX_B);
2382       dstenc -= 8;
2383     }
2384     // Emit primary opcode and set sign-extend bit
2385     // Check for 8-bit immediate, and set sign extend bit in opcode
2386     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2387       emit_opcode(cbuf, $primary | 0x02);
2388     } else {
2389       // 32-bit immediate
2390       emit_opcode(cbuf, $primary);
2391     }
2392     // Emit r/m byte with secondary opcode, after primary opcode.
2393     emit_rm(cbuf, 0x3, $secondary, dstenc);
2394   %}
2395 
2396   enc_class OpcSErm_wide(rRegL dst, immI imm)
2397   %{
2398     // OpcSEr/m
2399     int dstenc = $dst$$reg;
2400     if (dstenc < 8) {
2401       emit_opcode(cbuf, Assembler::REX_W);
2402     } else {
2403       emit_opcode(cbuf, Assembler::REX_WB);
2404       dstenc -= 8;
2405     }
2406     // Emit primary opcode and set sign-extend bit
2407     // Check for 8-bit immediate, and set sign extend bit in opcode
2408     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2409       emit_opcode(cbuf, $primary | 0x02);
2410     } else {
2411       // 32-bit immediate
2412       emit_opcode(cbuf, $primary);
2413     }
2414     // Emit r/m byte with secondary opcode, after primary opcode.
2415     emit_rm(cbuf, 0x3, $secondary, dstenc);
2416   %}
2417 
2418   enc_class Con8or32(immI imm)
2419   %{
2420     // Check for 8-bit immediate, and set sign extend bit in opcode
2421     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2422       $$$emit8$imm$$constant;
2423     } else {
2424       // 32-bit immediate
2425       $$$emit32$imm$$constant;
2426     }
2427   %}
2428 
2429   enc_class Lbl(label labl)
2430   %{
2431     // GOTO
2432     Label* l = $labl$$label;
2433     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4)));
2434   %}
2435 
2436   enc_class LblShort(label labl)
2437   %{
2438     // GOTO
2439     Label* l = $labl$$label;
2440     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
2441     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2442     emit_d8(cbuf, disp);
2443   %}
2444 
2445   enc_class opc2_reg(rRegI dst)
2446   %{
2447     // BSWAP
2448     emit_cc(cbuf, $secondary, $dst$$reg);
2449   %}
2450 
2451   enc_class opc3_reg(rRegI dst)
2452   %{
2453     // BSWAP
2454     emit_cc(cbuf, $tertiary, $dst$$reg);
2455   %}
2456 
2457   enc_class reg_opc(rRegI div)
2458   %{
2459     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2460     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2461   %}
2462 
2463   enc_class Jcc(cmpOp cop, label labl)
2464   %{
2465     // JCC
2466     Label* l = $labl$$label;
2467     assert(l != NULL, "need Label");
2468     $$$emit8$primary;
2469     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2470     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4)));
2471   %}
2472 
2473   enc_class JccShort (cmpOp cop, label labl)
2474   %{
2475   // JCC
2476     Label *l = $labl$$label;
2477     assert(l != NULL, "need Label");
2478     emit_cc(cbuf, $primary, $cop$$cmpcode);
2479     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
2480     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2481     emit_d8(cbuf, disp);
2482   %}
2483 
2484   enc_class enc_cmov(cmpOp cop)
2485   %{
2486     // CMOV
2487     $$$emit8$primary;
2488     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2489   %}
2490 
2491   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2492   %{
2493     // Invert sense of branch from sense of cmov
2494     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2495     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2496                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2497                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2498     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2499     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2500     if ($dst$$reg < 8) {
2501       if ($src$$reg >= 8) {
2502         emit_opcode(cbuf, Assembler::REX_B);
2503       }
2504     } else {
2505       if ($src$$reg < 8) {
2506         emit_opcode(cbuf, Assembler::REX_R);
2507       } else {
2508         emit_opcode(cbuf, Assembler::REX_RB);
2509       }
2510     }
2511     emit_opcode(cbuf, 0x0F);
2512     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2513     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2514   %}
2515 
2516   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2517   %{
2518     // Invert sense of branch from sense of cmov
2519     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2520     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2521 
2522     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2523     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2524     if ($dst$$reg < 8) {
2525       if ($src$$reg >= 8) {
2526         emit_opcode(cbuf, Assembler::REX_B);
2527       }
2528     } else {
2529       if ($src$$reg < 8) {
2530         emit_opcode(cbuf, Assembler::REX_R);
2531       } else {
2532         emit_opcode(cbuf, Assembler::REX_RB);
2533       }
2534     }
2535     emit_opcode(cbuf, 0x0F);
2536     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2537     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2538   %}
2539 
2540   enc_class enc_PartialSubtypeCheck()
2541   %{
2542     Register Rrdi = as_Register(RDI_enc); // result register
2543     Register Rrax = as_Register(RAX_enc); // super class
2544     Register Rrcx = as_Register(RCX_enc); // killed
2545     Register Rrsi = as_Register(RSI_enc); // sub class
2546     Label miss;
2547     const bool set_cond_codes = true;
2548 
2549     MacroAssembler _masm(&cbuf);
2550     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2551                                      NULL, &miss,
2552                                      /*set_cond_codes:*/ true);
2553     if ($primary) {
2554       __ xorptr(Rrdi, Rrdi);
2555     }
2556     __ bind(miss);
2557   %}
2558 
2559   enc_class Java_To_Interpreter(method meth)
2560   %{
2561     // CALL Java_To_Interpreter
2562     // This is the instruction starting address for relocation info.
2563     cbuf.set_insts_mark();
2564     $$$emit8$primary;
2565     // CALL directly to the runtime
2566     emit_d32_reloc(cbuf,
2567                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2568                    runtime_call_Relocation::spec(),
2569                    RELOC_DISP32);
2570   %}
2571 
2572   enc_class preserve_SP %{
2573     debug_only(int off0 = cbuf.insts_size());
2574     MacroAssembler _masm(&cbuf);
2575     // RBP is preserved across all calls, even compiled calls.
2576     // Use it to preserve RSP in places where the callee might change the SP.
2577     __ movptr(rbp_mh_SP_save, rsp);
2578     debug_only(int off1 = cbuf.insts_size());
2579     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2580   %}
2581 
2582   enc_class restore_SP %{
2583     MacroAssembler _masm(&cbuf);
2584     __ movptr(rsp, rbp_mh_SP_save);
2585   %}
2586 
2587   enc_class Java_Static_Call(method meth)
2588   %{
2589     // JAVA STATIC CALL
2590     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2591     // determine who we intended to call.
2592     cbuf.set_insts_mark();
2593     $$$emit8$primary;
2594 
2595     if (!_method) {
2596       emit_d32_reloc(cbuf,
2597                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2598                      runtime_call_Relocation::spec(),
2599                      RELOC_DISP32);
2600     } else if (_optimized_virtual) {
2601       emit_d32_reloc(cbuf,
2602                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2603                      opt_virtual_call_Relocation::spec(),
2604                      RELOC_DISP32);
2605     } else {
2606       emit_d32_reloc(cbuf,
2607                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2608                      static_call_Relocation::spec(),
2609                      RELOC_DISP32);
2610     }
2611     if (_method) {
2612       // Emit stub for static call
2613       emit_java_to_interp(cbuf);
2614     }
2615   %}
2616 
2617   enc_class Java_Dynamic_Call(method meth)
2618   %{
2619     // JAVA DYNAMIC CALL
2620     // !!!!!
2621     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2622     // emit_call_dynamic_prologue( cbuf );
2623     cbuf.set_insts_mark();
2624 
2625     // movq rax, -1
2626     emit_opcode(cbuf, Assembler::REX_W);
2627     emit_opcode(cbuf, 0xB8 | RAX_enc);
2628     emit_d64_reloc(cbuf,
2629                    (int64_t) Universe::non_oop_word(),
2630                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2631     address virtual_call_oop_addr = cbuf.insts_mark();
2632     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2633     // who we intended to call.
2634     cbuf.set_insts_mark();
2635     $$$emit8$primary;
2636     emit_d32_reloc(cbuf,
2637                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2638                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2639                    RELOC_DISP32);
2640   %}
2641 
2642   enc_class Java_Compiled_Call(method meth)
2643   %{
2644     // JAVA COMPILED CALL
2645     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2646 
2647     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2648     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2649 
2650     // callq *disp(%rax)
2651     cbuf.set_insts_mark();
2652     $$$emit8$primary;
2653     if (disp < 0x80) {
2654       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2655       emit_d8(cbuf, disp); // Displacement
2656     } else {
2657       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2658       emit_d32(cbuf, disp); // Displacement
2659     }
2660   %}
2661 
2662   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2663   %{
2664     // SAL, SAR, SHR
2665     int dstenc = $dst$$reg;
2666     if (dstenc >= 8) {
2667       emit_opcode(cbuf, Assembler::REX_B);
2668       dstenc -= 8;
2669     }
2670     $$$emit8$primary;
2671     emit_rm(cbuf, 0x3, $secondary, dstenc);
2672     $$$emit8$shift$$constant;
2673   %}
2674 
2675   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2676   %{
2677     // SAL, SAR, SHR
2678     int dstenc = $dst$$reg;
2679     if (dstenc < 8) {
2680       emit_opcode(cbuf, Assembler::REX_W);
2681     } else {
2682       emit_opcode(cbuf, Assembler::REX_WB);
2683       dstenc -= 8;
2684     }
2685     $$$emit8$primary;
2686     emit_rm(cbuf, 0x3, $secondary, dstenc);
2687     $$$emit8$shift$$constant;
2688   %}
2689 
2690   enc_class load_immI(rRegI dst, immI src)
2691   %{
2692     int dstenc = $dst$$reg;
2693     if (dstenc >= 8) {
2694       emit_opcode(cbuf, Assembler::REX_B);
2695       dstenc -= 8;
2696     }
2697     emit_opcode(cbuf, 0xB8 | dstenc);
2698     $$$emit32$src$$constant;
2699   %}
2700 
2701   enc_class load_immL(rRegL dst, immL src)
2702   %{
2703     int dstenc = $dst$$reg;
2704     if (dstenc < 8) {
2705       emit_opcode(cbuf, Assembler::REX_W);
2706     } else {
2707       emit_opcode(cbuf, Assembler::REX_WB);
2708       dstenc -= 8;
2709     }
2710     emit_opcode(cbuf, 0xB8 | dstenc);
2711     emit_d64(cbuf, $src$$constant);
2712   %}
2713 
2714   enc_class load_immUL32(rRegL dst, immUL32 src)
2715   %{
2716     // same as load_immI, but this time we care about zeroes in the high word
2717     int dstenc = $dst$$reg;
2718     if (dstenc >= 8) {
2719       emit_opcode(cbuf, Assembler::REX_B);
2720       dstenc -= 8;
2721     }
2722     emit_opcode(cbuf, 0xB8 | dstenc);
2723     $$$emit32$src$$constant;
2724   %}
2725 
2726   enc_class load_immL32(rRegL dst, immL32 src)
2727   %{
2728     int dstenc = $dst$$reg;
2729     if (dstenc < 8) {
2730       emit_opcode(cbuf, Assembler::REX_W);
2731     } else {
2732       emit_opcode(cbuf, Assembler::REX_WB);
2733       dstenc -= 8;
2734     }
2735     emit_opcode(cbuf, 0xC7);
2736     emit_rm(cbuf, 0x03, 0x00, dstenc);
2737     $$$emit32$src$$constant;
2738   %}
2739 
2740   enc_class load_immP31(rRegP dst, immP32 src)
2741   %{
2742     // same as load_immI, but this time we care about zeroes in the high word
2743     int dstenc = $dst$$reg;
2744     if (dstenc >= 8) {
2745       emit_opcode(cbuf, Assembler::REX_B);
2746       dstenc -= 8;
2747     }
2748     emit_opcode(cbuf, 0xB8 | dstenc);
2749     $$$emit32$src$$constant;
2750   %}
2751 
2752   enc_class load_immP(rRegP dst, immP src)
2753   %{
2754     int dstenc = $dst$$reg;
2755     if (dstenc < 8) {
2756       emit_opcode(cbuf, Assembler::REX_W);
2757     } else {
2758       emit_opcode(cbuf, Assembler::REX_WB);
2759       dstenc -= 8;
2760     }
2761     emit_opcode(cbuf, 0xB8 | dstenc);
2762     // This next line should be generated from ADLC
2763     if ($src->constant_is_oop()) {
2764       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2765     } else {
2766       emit_d64(cbuf, $src$$constant);
2767     }
2768   %}
2769 
2770   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2771   enc_class enc_copy(rRegI dst, rRegI src)
2772   %{
2773     encode_copy(cbuf, $dst$$reg, $src$$reg);
2774   %}
2775 
2776   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2777   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2778     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2779   %}
2780 
2781   enc_class enc_copy_always(rRegI dst, rRegI src)
2782   %{
2783     int srcenc = $src$$reg;
2784     int dstenc = $dst$$reg;
2785 
2786     if (dstenc < 8) {
2787       if (srcenc >= 8) {
2788         emit_opcode(cbuf, Assembler::REX_B);
2789         srcenc -= 8;
2790       }
2791     } else {
2792       if (srcenc < 8) {
2793         emit_opcode(cbuf, Assembler::REX_R);
2794       } else {
2795         emit_opcode(cbuf, Assembler::REX_RB);
2796         srcenc -= 8;
2797       }
2798       dstenc -= 8;
2799     }
2800 
2801     emit_opcode(cbuf, 0x8B);
2802     emit_rm(cbuf, 0x3, dstenc, srcenc);
2803   %}
2804 
2805   enc_class enc_copy_wide(rRegL dst, rRegL src)
2806   %{
2807     int srcenc = $src$$reg;
2808     int dstenc = $dst$$reg;
2809 
2810     if (dstenc != srcenc) {
2811       if (dstenc < 8) {
2812         if (srcenc < 8) {
2813           emit_opcode(cbuf, Assembler::REX_W);
2814         } else {
2815           emit_opcode(cbuf, Assembler::REX_WB);
2816           srcenc -= 8;
2817         }
2818       } else {
2819         if (srcenc < 8) {
2820           emit_opcode(cbuf, Assembler::REX_WR);
2821         } else {
2822           emit_opcode(cbuf, Assembler::REX_WRB);
2823           srcenc -= 8;
2824         }
2825         dstenc -= 8;
2826       }
2827       emit_opcode(cbuf, 0x8B);
2828       emit_rm(cbuf, 0x3, dstenc, srcenc);
2829     }
2830   %}
2831 
2832   enc_class Con32(immI src)
2833   %{
2834     // Output immediate
2835     $$$emit32$src$$constant;
2836   %}
2837 
2838   enc_class Con64(immL src)
2839   %{
2840     // Output immediate
2841     emit_d64($src$$constant);
2842   %}
2843 
2844   enc_class Con32F_as_bits(immF src)
2845   %{
2846     // Output Float immediate bits
2847     jfloat jf = $src$$constant;
2848     jint jf_as_bits = jint_cast(jf);
2849     emit_d32(cbuf, jf_as_bits);
2850   %}
2851 
2852   enc_class Con16(immI src)
2853   %{
2854     // Output immediate
2855     $$$emit16$src$$constant;
2856   %}
2857 
2858   // How is this different from Con32??? XXX
2859   enc_class Con_d32(immI src)
2860   %{
2861     emit_d32(cbuf,$src$$constant);
2862   %}
2863 
2864   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2865     // Output immediate memory reference
2866     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2867     emit_d32(cbuf, 0x00);
2868   %}
2869 
2870   enc_class lock_prefix()
2871   %{
2872     if (os::is_MP()) {
2873       emit_opcode(cbuf, 0xF0); // lock
2874     }
2875   %}
2876 
2877   enc_class REX_mem(memory mem)
2878   %{
2879     if ($mem$$base >= 8) {
2880       if ($mem$$index < 8) {
2881         emit_opcode(cbuf, Assembler::REX_B);
2882       } else {
2883         emit_opcode(cbuf, Assembler::REX_XB);
2884       }
2885     } else {
2886       if ($mem$$index >= 8) {
2887         emit_opcode(cbuf, Assembler::REX_X);
2888       }
2889     }
2890   %}
2891 
2892   enc_class REX_mem_wide(memory mem)
2893   %{
2894     if ($mem$$base >= 8) {
2895       if ($mem$$index < 8) {
2896         emit_opcode(cbuf, Assembler::REX_WB);
2897       } else {
2898         emit_opcode(cbuf, Assembler::REX_WXB);
2899       }
2900     } else {
2901       if ($mem$$index < 8) {
2902         emit_opcode(cbuf, Assembler::REX_W);
2903       } else {
2904         emit_opcode(cbuf, Assembler::REX_WX);
2905       }
2906     }
2907   %}
2908 
2909   // for byte regs
2910   enc_class REX_breg(rRegI reg)
2911   %{
2912     if ($reg$$reg >= 4) {
2913       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2914     }
2915   %}
2916 
2917   // for byte regs
2918   enc_class REX_reg_breg(rRegI dst, rRegI src)
2919   %{
2920     if ($dst$$reg < 8) {
2921       if ($src$$reg >= 4) {
2922         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2923       }
2924     } else {
2925       if ($src$$reg < 8) {
2926         emit_opcode(cbuf, Assembler::REX_R);
2927       } else {
2928         emit_opcode(cbuf, Assembler::REX_RB);
2929       }
2930     }
2931   %}
2932 
2933   // for byte regs
2934   enc_class REX_breg_mem(rRegI reg, memory mem)
2935   %{
2936     if ($reg$$reg < 8) {
2937       if ($mem$$base < 8) {
2938         if ($mem$$index >= 8) {
2939           emit_opcode(cbuf, Assembler::REX_X);
2940         } else if ($reg$$reg >= 4) {
2941           emit_opcode(cbuf, Assembler::REX);
2942         }
2943       } else {
2944         if ($mem$$index < 8) {
2945           emit_opcode(cbuf, Assembler::REX_B);
2946         } else {
2947           emit_opcode(cbuf, Assembler::REX_XB);
2948         }
2949       }
2950     } else {
2951       if ($mem$$base < 8) {
2952         if ($mem$$index < 8) {
2953           emit_opcode(cbuf, Assembler::REX_R);
2954         } else {
2955           emit_opcode(cbuf, Assembler::REX_RX);
2956         }
2957       } else {
2958         if ($mem$$index < 8) {
2959           emit_opcode(cbuf, Assembler::REX_RB);
2960         } else {
2961           emit_opcode(cbuf, Assembler::REX_RXB);
2962         }
2963       }
2964     }
2965   %}
2966 
2967   enc_class REX_reg(rRegI reg)
2968   %{
2969     if ($reg$$reg >= 8) {
2970       emit_opcode(cbuf, Assembler::REX_B);
2971     }
2972   %}
2973 
2974   enc_class REX_reg_wide(rRegI reg)
2975   %{
2976     if ($reg$$reg < 8) {
2977       emit_opcode(cbuf, Assembler::REX_W);
2978     } else {
2979       emit_opcode(cbuf, Assembler::REX_WB);
2980     }
2981   %}
2982 
2983   enc_class REX_reg_reg(rRegI dst, rRegI src)
2984   %{
2985     if ($dst$$reg < 8) {
2986       if ($src$$reg >= 8) {
2987         emit_opcode(cbuf, Assembler::REX_B);
2988       }
2989     } else {
2990       if ($src$$reg < 8) {
2991         emit_opcode(cbuf, Assembler::REX_R);
2992       } else {
2993         emit_opcode(cbuf, Assembler::REX_RB);
2994       }
2995     }
2996   %}
2997 
2998   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2999   %{
3000     if ($dst$$reg < 8) {
3001       if ($src$$reg < 8) {
3002         emit_opcode(cbuf, Assembler::REX_W);
3003       } else {
3004         emit_opcode(cbuf, Assembler::REX_WB);
3005       }
3006     } else {
3007       if ($src$$reg < 8) {
3008         emit_opcode(cbuf, Assembler::REX_WR);
3009       } else {
3010         emit_opcode(cbuf, Assembler::REX_WRB);
3011       }
3012     }
3013   %}
3014 
3015   enc_class REX_reg_mem(rRegI reg, memory mem)
3016   %{
3017     if ($reg$$reg < 8) {
3018       if ($mem$$base < 8) {
3019         if ($mem$$index >= 8) {
3020           emit_opcode(cbuf, Assembler::REX_X);
3021         }
3022       } else {
3023         if ($mem$$index < 8) {
3024           emit_opcode(cbuf, Assembler::REX_B);
3025         } else {
3026           emit_opcode(cbuf, Assembler::REX_XB);
3027         }
3028       }
3029     } else {
3030       if ($mem$$base < 8) {
3031         if ($mem$$index < 8) {
3032           emit_opcode(cbuf, Assembler::REX_R);
3033         } else {
3034           emit_opcode(cbuf, Assembler::REX_RX);
3035         }
3036       } else {
3037         if ($mem$$index < 8) {
3038           emit_opcode(cbuf, Assembler::REX_RB);
3039         } else {
3040           emit_opcode(cbuf, Assembler::REX_RXB);
3041         }
3042       }
3043     }
3044   %}
3045 
3046   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3047   %{
3048     if ($reg$$reg < 8) {
3049       if ($mem$$base < 8) {
3050         if ($mem$$index < 8) {
3051           emit_opcode(cbuf, Assembler::REX_W);
3052         } else {
3053           emit_opcode(cbuf, Assembler::REX_WX);
3054         }
3055       } else {
3056         if ($mem$$index < 8) {
3057           emit_opcode(cbuf, Assembler::REX_WB);
3058         } else {
3059           emit_opcode(cbuf, Assembler::REX_WXB);
3060         }
3061       }
3062     } else {
3063       if ($mem$$base < 8) {
3064         if ($mem$$index < 8) {
3065           emit_opcode(cbuf, Assembler::REX_WR);
3066         } else {
3067           emit_opcode(cbuf, Assembler::REX_WRX);
3068         }
3069       } else {
3070         if ($mem$$index < 8) {
3071           emit_opcode(cbuf, Assembler::REX_WRB);
3072         } else {
3073           emit_opcode(cbuf, Assembler::REX_WRXB);
3074         }
3075       }
3076     }
3077   %}
3078 
3079   enc_class reg_mem(rRegI ereg, memory mem)
3080   %{
3081     // High registers handle in encode_RegMem
3082     int reg = $ereg$$reg;
3083     int base = $mem$$base;
3084     int index = $mem$$index;
3085     int scale = $mem$$scale;
3086     int disp = $mem$$disp;
3087     bool disp_is_oop = $mem->disp_is_oop();
3088 
3089     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3090   %}
3091 
3092   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3093   %{
3094     int rm_byte_opcode = $rm_opcode$$constant;
3095 
3096     // High registers handle in encode_RegMem
3097     int base = $mem$$base;
3098     int index = $mem$$index;
3099     int scale = $mem$$scale;
3100     int displace = $mem$$disp;
3101 
3102     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3103                                             // working with static
3104                                             // globals
3105     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3106                   disp_is_oop);
3107   %}
3108 
3109   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3110   %{
3111     int reg_encoding = $dst$$reg;
3112     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3113     int index        = 0x04;            // 0x04 indicates no index
3114     int scale        = 0x00;            // 0x00 indicates no scale
3115     int displace     = $src1$$constant; // 0x00 indicates no displacement
3116     bool disp_is_oop = false;
3117     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3118                   disp_is_oop);
3119   %}
3120 
3121   enc_class neg_reg(rRegI dst)
3122   %{
3123     int dstenc = $dst$$reg;
3124     if (dstenc >= 8) {
3125       emit_opcode(cbuf, Assembler::REX_B);
3126       dstenc -= 8;
3127     }
3128     // NEG $dst
3129     emit_opcode(cbuf, 0xF7);
3130     emit_rm(cbuf, 0x3, 0x03, dstenc);
3131   %}
3132 
3133   enc_class neg_reg_wide(rRegI dst)
3134   %{
3135     int dstenc = $dst$$reg;
3136     if (dstenc < 8) {
3137       emit_opcode(cbuf, Assembler::REX_W);
3138     } else {
3139       emit_opcode(cbuf, Assembler::REX_WB);
3140       dstenc -= 8;
3141     }
3142     // NEG $dst
3143     emit_opcode(cbuf, 0xF7);
3144     emit_rm(cbuf, 0x3, 0x03, dstenc);
3145   %}
3146 
3147   enc_class setLT_reg(rRegI dst)
3148   %{
3149     int dstenc = $dst$$reg;
3150     if (dstenc >= 8) {
3151       emit_opcode(cbuf, Assembler::REX_B);
3152       dstenc -= 8;
3153     } else if (dstenc >= 4) {
3154       emit_opcode(cbuf, Assembler::REX);
3155     }
3156     // SETLT $dst
3157     emit_opcode(cbuf, 0x0F);
3158     emit_opcode(cbuf, 0x9C);
3159     emit_rm(cbuf, 0x3, 0x0, dstenc);
3160   %}
3161 
3162   enc_class setNZ_reg(rRegI dst)
3163   %{
3164     int dstenc = $dst$$reg;
3165     if (dstenc >= 8) {
3166       emit_opcode(cbuf, Assembler::REX_B);
3167       dstenc -= 8;
3168     } else if (dstenc >= 4) {
3169       emit_opcode(cbuf, Assembler::REX);
3170     }
3171     // SETNZ $dst
3172     emit_opcode(cbuf, 0x0F);
3173     emit_opcode(cbuf, 0x95);
3174     emit_rm(cbuf, 0x3, 0x0, dstenc);
3175   %}
3176 
3177 
3178   // Compare the lonogs and set -1, 0, or 1 into dst
3179   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3180   %{
3181     int src1enc = $src1$$reg;
3182     int src2enc = $src2$$reg;
3183     int dstenc = $dst$$reg;
3184 
3185     // cmpq $src1, $src2
3186     if (src1enc < 8) {
3187       if (src2enc < 8) {
3188         emit_opcode(cbuf, Assembler::REX_W);
3189       } else {
3190         emit_opcode(cbuf, Assembler::REX_WB);
3191       }
3192     } else {
3193       if (src2enc < 8) {
3194         emit_opcode(cbuf, Assembler::REX_WR);
3195       } else {
3196         emit_opcode(cbuf, Assembler::REX_WRB);
3197       }
3198     }
3199     emit_opcode(cbuf, 0x3B);
3200     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3201 
3202     // movl $dst, -1
3203     if (dstenc >= 8) {
3204       emit_opcode(cbuf, Assembler::REX_B);
3205     }
3206     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3207     emit_d32(cbuf, -1);
3208 
3209     // jl,s done
3210     emit_opcode(cbuf, 0x7C);
3211     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3212 
3213     // setne $dst
3214     if (dstenc >= 4) {
3215       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3216     }
3217     emit_opcode(cbuf, 0x0F);
3218     emit_opcode(cbuf, 0x95);
3219     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3220 
3221     // movzbl $dst, $dst
3222     if (dstenc >= 4) {
3223       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3224     }
3225     emit_opcode(cbuf, 0x0F);
3226     emit_opcode(cbuf, 0xB6);
3227     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3228   %}
3229 
3230   enc_class Push_ResultXD(regD dst) %{
3231     int dstenc = $dst$$reg;
3232 
3233     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3234 
3235     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3236     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3237     if (dstenc >= 8) {
3238       emit_opcode(cbuf, Assembler::REX_R);
3239     }
3240     emit_opcode  (cbuf, 0x0F );
3241     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3242     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3243 
3244     // add rsp,8
3245     emit_opcode(cbuf, Assembler::REX_W);
3246     emit_opcode(cbuf,0x83);
3247     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3248     emit_d8(cbuf,0x08);
3249   %}
3250 
3251   enc_class Push_SrcXD(regD src) %{
3252     int srcenc = $src$$reg;
3253 
3254     // subq rsp,#8
3255     emit_opcode(cbuf, Assembler::REX_W);
3256     emit_opcode(cbuf, 0x83);
3257     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3258     emit_d8(cbuf, 0x8);
3259 
3260     // movsd [rsp],src
3261     emit_opcode(cbuf, 0xF2);
3262     if (srcenc >= 8) {
3263       emit_opcode(cbuf, Assembler::REX_R);
3264     }
3265     emit_opcode(cbuf, 0x0F);
3266     emit_opcode(cbuf, 0x11);
3267     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3268 
3269     // fldd [rsp]
3270     emit_opcode(cbuf, 0x66);
3271     emit_opcode(cbuf, 0xDD);
3272     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3273   %}
3274 
3275 
3276   enc_class movq_ld(regD dst, memory mem) %{
3277     MacroAssembler _masm(&cbuf);
3278     __ movq($dst$$XMMRegister, $mem$$Address);
3279   %}
3280 
3281   enc_class movq_st(memory mem, regD src) %{
3282     MacroAssembler _masm(&cbuf);
3283     __ movq($mem$$Address, $src$$XMMRegister);
3284   %}
3285 
3286   enc_class pshufd_8x8(regF dst, regF src) %{
3287     MacroAssembler _masm(&cbuf);
3288 
3289     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3290     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3291     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3292   %}
3293 
3294   enc_class pshufd_4x16(regF dst, regF src) %{
3295     MacroAssembler _masm(&cbuf);
3296 
3297     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3298   %}
3299 
3300   enc_class pshufd(regD dst, regD src, int mode) %{
3301     MacroAssembler _masm(&cbuf);
3302 
3303     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3304   %}
3305 
3306   enc_class pxor(regD dst, regD src) %{
3307     MacroAssembler _masm(&cbuf);
3308 
3309     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3310   %}
3311 
3312   enc_class mov_i2x(regD dst, rRegI src) %{
3313     MacroAssembler _masm(&cbuf);
3314 
3315     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3316   %}
3317 
3318   // obj: object to lock
3319   // box: box address (header location) -- killed
3320   // tmp: rax -- killed
3321   // scr: rbx -- killed
3322   //
3323   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3324   // from i486.ad.  See that file for comments.
3325   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3326   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3327 
3328 
3329   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3330   %{
3331     Register objReg = as_Register((int)$obj$$reg);
3332     Register boxReg = as_Register((int)$box$$reg);
3333     Register tmpReg = as_Register($tmp$$reg);
3334     Register scrReg = as_Register($scr$$reg);
3335     MacroAssembler masm(&cbuf);
3336 
3337     // Verify uniqueness of register assignments -- necessary but not sufficient
3338     assert (objReg != boxReg && objReg != tmpReg &&
3339             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3340 
3341     if (_counters != NULL) {
3342       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3343     }
3344     if (EmitSync & 1) {
3345         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3346         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3347         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
3348     } else
3349     if (EmitSync & 2) {
3350         Label DONE_LABEL;
3351         if (UseBiasedLocking) {
3352            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3353           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3354         }
3355         // QQQ was movl...
3356         masm.movptr(tmpReg, 0x1);
3357         masm.orptr(tmpReg, Address(objReg, 0));
3358         masm.movptr(Address(boxReg, 0), tmpReg);
3359         if (os::is_MP()) {
3360           masm.lock();
3361         }
3362         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3363         masm.jcc(Assembler::equal, DONE_LABEL);
3364 
3365         // Recursive locking
3366         masm.subptr(tmpReg, rsp);
3367         masm.andptr(tmpReg, 7 - os::vm_page_size());
3368         masm.movptr(Address(boxReg, 0), tmpReg);
3369 
3370         masm.bind(DONE_LABEL);
3371         masm.nop(); // avoid branch to branch
3372     } else {
3373         Label DONE_LABEL, IsInflated, Egress;
3374 
3375         masm.movptr(tmpReg, Address(objReg, 0)) ;
3376         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3377         masm.jcc   (Assembler::notZero, IsInflated) ;
3378 
3379         // it's stack-locked, biased or neutral
3380         // TODO: optimize markword triage order to reduce the number of
3381         // conditional branches in the most common cases.
3382         // Beware -- there's a subtle invariant that fetch of the markword
3383         // at [FETCH], below, will never observe a biased encoding (*101b).
3384         // If this invariant is not held we'll suffer exclusion (safety) failure.
3385 
3386         if (UseBiasedLocking && !UseOptoBiasInlining) {
3387           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3388           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3389         }
3390 
3391         // was q will it destroy high?
3392         masm.orl   (tmpReg, 1) ;
3393         masm.movptr(Address(boxReg, 0), tmpReg) ;
3394         if (os::is_MP()) { masm.lock(); }
3395         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3396         if (_counters != NULL) {
3397            masm.cond_inc32(Assembler::equal,
3398                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3399         }
3400         masm.jcc   (Assembler::equal, DONE_LABEL);
3401 
3402         // Recursive locking
3403         masm.subptr(tmpReg, rsp);
3404         masm.andptr(tmpReg, 7 - os::vm_page_size());
3405         masm.movptr(Address(boxReg, 0), tmpReg);
3406         if (_counters != NULL) {
3407            masm.cond_inc32(Assembler::equal,
3408                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3409         }
3410         masm.jmp   (DONE_LABEL) ;
3411 
3412         masm.bind  (IsInflated) ;
3413         // It's inflated
3414 
3415         // TODO: someday avoid the ST-before-CAS penalty by
3416         // relocating (deferring) the following ST.
3417         // We should also think about trying a CAS without having
3418         // fetched _owner.  If the CAS is successful we may
3419         // avoid an RTO->RTS upgrade on the $line.
3420         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3421         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
3422 
3423         masm.mov    (boxReg, tmpReg) ;
3424         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3425         masm.testptr(tmpReg, tmpReg) ;
3426         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
3427 
3428         // It's inflated and appears unlocked
3429         if (os::is_MP()) { masm.lock(); }
3430         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3431         // Intentional fall-through into DONE_LABEL ...
3432 
3433         masm.bind  (DONE_LABEL) ;
3434         masm.nop   () ;                 // avoid jmp to jmp
3435     }
3436   %}
3437 
3438   // obj: object to unlock
3439   // box: box address (displaced header location), killed
3440   // RBX: killed tmp; cannot be obj nor box
3441   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3442   %{
3443 
3444     Register objReg = as_Register($obj$$reg);
3445     Register boxReg = as_Register($box$$reg);
3446     Register tmpReg = as_Register($tmp$$reg);
3447     MacroAssembler masm(&cbuf);
3448 
3449     if (EmitSync & 4) {
3450        masm.cmpptr(rsp, 0) ;
3451     } else
3452     if (EmitSync & 8) {
3453        Label DONE_LABEL;
3454        if (UseBiasedLocking) {
3455          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3456        }
3457 
3458        // Check whether the displaced header is 0
3459        //(=> recursive unlock)
3460        masm.movptr(tmpReg, Address(boxReg, 0));
3461        masm.testptr(tmpReg, tmpReg);
3462        masm.jcc(Assembler::zero, DONE_LABEL);
3463 
3464        // If not recursive lock, reset the header to displaced header
3465        if (os::is_MP()) {
3466          masm.lock();
3467        }
3468        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3469        masm.bind(DONE_LABEL);
3470        masm.nop(); // avoid branch to branch
3471     } else {
3472        Label DONE_LABEL, Stacked, CheckSucc ;
3473 
3474        if (UseBiasedLocking && !UseOptoBiasInlining) {
3475          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3476        }
3477 
3478        masm.movptr(tmpReg, Address(objReg, 0)) ;
3479        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3480        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3481        masm.testl (tmpReg, 0x02) ;
3482        masm.jcc   (Assembler::zero, Stacked) ;
3483 
3484        // It's inflated
3485        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3486        masm.xorptr(boxReg, r15_thread) ;
3487        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3488        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3489        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3490        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3491        masm.jcc   (Assembler::notZero, CheckSucc) ;
3492        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3493        masm.jmp   (DONE_LABEL) ;
3494 
3495        if ((EmitSync & 65536) == 0) {
3496          Label LSuccess, LGoSlowPath ;
3497          masm.bind  (CheckSucc) ;
3498          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3499          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3500 
3501          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3502          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3503          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3504          // are all faster when the write buffer is populated.
3505          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3506          if (os::is_MP()) {
3507             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3508          }
3509          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3510          masm.jcc   (Assembler::notZero, LSuccess) ;
3511 
3512          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3513          if (os::is_MP()) { masm.lock(); }
3514          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3515          masm.jcc   (Assembler::notEqual, LSuccess) ;
3516          // Intentional fall-through into slow-path
3517 
3518          masm.bind  (LGoSlowPath) ;
3519          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3520          masm.jmp   (DONE_LABEL) ;
3521 
3522          masm.bind  (LSuccess) ;
3523          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3524          masm.jmp   (DONE_LABEL) ;
3525        }
3526 
3527        masm.bind  (Stacked) ;
3528        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3529        if (os::is_MP()) { masm.lock(); }
3530        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3531 
3532        if (EmitSync & 65536) {
3533           masm.bind (CheckSucc) ;
3534        }
3535        masm.bind(DONE_LABEL);
3536        if (EmitSync & 32768) {
3537           masm.nop();                      // avoid branch to branch
3538        }
3539     }
3540   %}
3541 
3542 
3543   enc_class enc_rethrow()
3544   %{
3545     cbuf.set_insts_mark();
3546     emit_opcode(cbuf, 0xE9); // jmp entry
3547     emit_d32_reloc(cbuf,
3548                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3549                    runtime_call_Relocation::spec(),
3550                    RELOC_DISP32);
3551   %}
3552 
3553   enc_class absF_encoding(regF dst)
3554   %{
3555     int dstenc = $dst$$reg;
3556     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3557 
3558     cbuf.set_insts_mark();
3559     if (dstenc >= 8) {
3560       emit_opcode(cbuf, Assembler::REX_R);
3561       dstenc -= 8;
3562     }
3563     // XXX reg_mem doesn't support RIP-relative addressing yet
3564     emit_opcode(cbuf, 0x0F);
3565     emit_opcode(cbuf, 0x54);
3566     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3567     emit_d32_reloc(cbuf, signmask_address);
3568   %}
3569 
3570   enc_class absD_encoding(regD dst)
3571   %{
3572     int dstenc = $dst$$reg;
3573     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3574 
3575     cbuf.set_insts_mark();
3576     emit_opcode(cbuf, 0x66);
3577     if (dstenc >= 8) {
3578       emit_opcode(cbuf, Assembler::REX_R);
3579       dstenc -= 8;
3580     }
3581     // XXX reg_mem doesn't support RIP-relative addressing yet
3582     emit_opcode(cbuf, 0x0F);
3583     emit_opcode(cbuf, 0x54);
3584     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3585     emit_d32_reloc(cbuf, signmask_address);
3586   %}
3587 
3588   enc_class negF_encoding(regF dst)
3589   %{
3590     int dstenc = $dst$$reg;
3591     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3592 
3593     cbuf.set_insts_mark();
3594     if (dstenc >= 8) {
3595       emit_opcode(cbuf, Assembler::REX_R);
3596       dstenc -= 8;
3597     }
3598     // XXX reg_mem doesn't support RIP-relative addressing yet
3599     emit_opcode(cbuf, 0x0F);
3600     emit_opcode(cbuf, 0x57);
3601     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3602     emit_d32_reloc(cbuf, signflip_address);
3603   %}
3604 
3605   enc_class negD_encoding(regD dst)
3606   %{
3607     int dstenc = $dst$$reg;
3608     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3609 
3610     cbuf.set_insts_mark();
3611     emit_opcode(cbuf, 0x66);
3612     if (dstenc >= 8) {
3613       emit_opcode(cbuf, Assembler::REX_R);
3614       dstenc -= 8;
3615     }
3616     // XXX reg_mem doesn't support RIP-relative addressing yet
3617     emit_opcode(cbuf, 0x0F);
3618     emit_opcode(cbuf, 0x57);
3619     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3620     emit_d32_reloc(cbuf, signflip_address);
3621   %}
3622 
3623   enc_class f2i_fixup(rRegI dst, regF src)
3624   %{
3625     int dstenc = $dst$$reg;
3626     int srcenc = $src$$reg;
3627 
3628     // cmpl $dst, #0x80000000
3629     if (dstenc >= 8) {
3630       emit_opcode(cbuf, Assembler::REX_B);
3631     }
3632     emit_opcode(cbuf, 0x81);
3633     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3634     emit_d32(cbuf, 0x80000000);
3635 
3636     // jne,s done
3637     emit_opcode(cbuf, 0x75);
3638     if (srcenc < 8 && dstenc < 8) {
3639       emit_d8(cbuf, 0xF);
3640     } else if (srcenc >= 8 && dstenc >= 8) {
3641       emit_d8(cbuf, 0x11);
3642     } else {
3643       emit_d8(cbuf, 0x10);
3644     }
3645 
3646     // subq rsp, #8
3647     emit_opcode(cbuf, Assembler::REX_W);
3648     emit_opcode(cbuf, 0x83);
3649     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3650     emit_d8(cbuf, 8);
3651 
3652     // movss [rsp], $src
3653     emit_opcode(cbuf, 0xF3);
3654     if (srcenc >= 8) {
3655       emit_opcode(cbuf, Assembler::REX_R);
3656     }
3657     emit_opcode(cbuf, 0x0F);
3658     emit_opcode(cbuf, 0x11);
3659     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3660 
3661     // call f2i_fixup
3662     cbuf.set_insts_mark();
3663     emit_opcode(cbuf, 0xE8);
3664     emit_d32_reloc(cbuf,
3665                    (int)
3666                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3667                    runtime_call_Relocation::spec(),
3668                    RELOC_DISP32);
3669 
3670     // popq $dst
3671     if (dstenc >= 8) {
3672       emit_opcode(cbuf, Assembler::REX_B);
3673     }
3674     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3675 
3676     // done:
3677   %}
3678 
3679   enc_class f2l_fixup(rRegL dst, regF src)
3680   %{
3681     int dstenc = $dst$$reg;
3682     int srcenc = $src$$reg;
3683     address const_address = (address) StubRoutines::x86::double_sign_flip();
3684 
3685     // cmpq $dst, [0x8000000000000000]
3686     cbuf.set_insts_mark();
3687     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3688     emit_opcode(cbuf, 0x39);
3689     // XXX reg_mem doesn't support RIP-relative addressing yet
3690     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3691     emit_d32_reloc(cbuf, const_address);
3692 
3693 
3694     // jne,s done
3695     emit_opcode(cbuf, 0x75);
3696     if (srcenc < 8 && dstenc < 8) {
3697       emit_d8(cbuf, 0xF);
3698     } else if (srcenc >= 8 && dstenc >= 8) {
3699       emit_d8(cbuf, 0x11);
3700     } else {
3701       emit_d8(cbuf, 0x10);
3702     }
3703 
3704     // subq rsp, #8
3705     emit_opcode(cbuf, Assembler::REX_W);
3706     emit_opcode(cbuf, 0x83);
3707     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3708     emit_d8(cbuf, 8);
3709 
3710     // movss [rsp], $src
3711     emit_opcode(cbuf, 0xF3);
3712     if (srcenc >= 8) {
3713       emit_opcode(cbuf, Assembler::REX_R);
3714     }
3715     emit_opcode(cbuf, 0x0F);
3716     emit_opcode(cbuf, 0x11);
3717     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3718 
3719     // call f2l_fixup
3720     cbuf.set_insts_mark();
3721     emit_opcode(cbuf, 0xE8);
3722     emit_d32_reloc(cbuf,
3723                    (int)
3724                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3725                    runtime_call_Relocation::spec(),
3726                    RELOC_DISP32);
3727 
3728     // popq $dst
3729     if (dstenc >= 8) {
3730       emit_opcode(cbuf, Assembler::REX_B);
3731     }
3732     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3733 
3734     // done:
3735   %}
3736 
3737   enc_class d2i_fixup(rRegI dst, regD src)
3738   %{
3739     int dstenc = $dst$$reg;
3740     int srcenc = $src$$reg;
3741 
3742     // cmpl $dst, #0x80000000
3743     if (dstenc >= 8) {
3744       emit_opcode(cbuf, Assembler::REX_B);
3745     }
3746     emit_opcode(cbuf, 0x81);
3747     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3748     emit_d32(cbuf, 0x80000000);
3749 
3750     // jne,s done
3751     emit_opcode(cbuf, 0x75);
3752     if (srcenc < 8 && dstenc < 8) {
3753       emit_d8(cbuf, 0xF);
3754     } else if (srcenc >= 8 && dstenc >= 8) {
3755       emit_d8(cbuf, 0x11);
3756     } else {
3757       emit_d8(cbuf, 0x10);
3758     }
3759 
3760     // subq rsp, #8
3761     emit_opcode(cbuf, Assembler::REX_W);
3762     emit_opcode(cbuf, 0x83);
3763     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3764     emit_d8(cbuf, 8);
3765 
3766     // movsd [rsp], $src
3767     emit_opcode(cbuf, 0xF2);
3768     if (srcenc >= 8) {
3769       emit_opcode(cbuf, Assembler::REX_R);
3770     }
3771     emit_opcode(cbuf, 0x0F);
3772     emit_opcode(cbuf, 0x11);
3773     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3774 
3775     // call d2i_fixup
3776     cbuf.set_insts_mark();
3777     emit_opcode(cbuf, 0xE8);
3778     emit_d32_reloc(cbuf,
3779                    (int)
3780                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3781                    runtime_call_Relocation::spec(),
3782                    RELOC_DISP32);
3783 
3784     // popq $dst
3785     if (dstenc >= 8) {
3786       emit_opcode(cbuf, Assembler::REX_B);
3787     }
3788     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3789 
3790     // done:
3791   %}
3792 
3793   enc_class d2l_fixup(rRegL dst, regD src)
3794   %{
3795     int dstenc = $dst$$reg;
3796     int srcenc = $src$$reg;
3797     address const_address = (address) StubRoutines::x86::double_sign_flip();
3798 
3799     // cmpq $dst, [0x8000000000000000]
3800     cbuf.set_insts_mark();
3801     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3802     emit_opcode(cbuf, 0x39);
3803     // XXX reg_mem doesn't support RIP-relative addressing yet
3804     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3805     emit_d32_reloc(cbuf, const_address);
3806 
3807 
3808     // jne,s done
3809     emit_opcode(cbuf, 0x75);
3810     if (srcenc < 8 && dstenc < 8) {
3811       emit_d8(cbuf, 0xF);
3812     } else if (srcenc >= 8 && dstenc >= 8) {
3813       emit_d8(cbuf, 0x11);
3814     } else {
3815       emit_d8(cbuf, 0x10);
3816     }
3817 
3818     // subq rsp, #8
3819     emit_opcode(cbuf, Assembler::REX_W);
3820     emit_opcode(cbuf, 0x83);
3821     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3822     emit_d8(cbuf, 8);
3823 
3824     // movsd [rsp], $src
3825     emit_opcode(cbuf, 0xF2);
3826     if (srcenc >= 8) {
3827       emit_opcode(cbuf, Assembler::REX_R);
3828     }
3829     emit_opcode(cbuf, 0x0F);
3830     emit_opcode(cbuf, 0x11);
3831     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3832 
3833     // call d2l_fixup
3834     cbuf.set_insts_mark();
3835     emit_opcode(cbuf, 0xE8);
3836     emit_d32_reloc(cbuf,
3837                    (int)
3838                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3839                    runtime_call_Relocation::spec(),
3840                    RELOC_DISP32);
3841 
3842     // popq $dst
3843     if (dstenc >= 8) {
3844       emit_opcode(cbuf, Assembler::REX_B);
3845     }
3846     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3847 
3848     // done:
3849   %}
3850 %}
3851 
3852 
3853 
3854 //----------FRAME--------------------------------------------------------------
3855 // Definition of frame structure and management information.
3856 //
3857 //  S T A C K   L A Y O U T    Allocators stack-slot number
3858 //                             |   (to get allocators register number
3859 //  G  Owned by    |        |  v    add OptoReg::stack0())
3860 //  r   CALLER     |        |
3861 //  o     |        +--------+      pad to even-align allocators stack-slot
3862 //  w     V        |  pad0  |        numbers; owned by CALLER
3863 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3864 //  h     ^        |   in   |  5
3865 //        |        |  args  |  4   Holes in incoming args owned by SELF
3866 //  |     |        |        |  3
3867 //  |     |        +--------+
3868 //  V     |        | old out|      Empty on Intel, window on Sparc
3869 //        |    old |preserve|      Must be even aligned.
3870 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3871 //        |        |   in   |  3   area for Intel ret address
3872 //     Owned by    |preserve|      Empty on Sparc.
3873 //       SELF      +--------+
3874 //        |        |  pad2  |  2   pad to align old SP
3875 //        |        +--------+  1
3876 //        |        | locks  |  0
3877 //        |        +--------+----> OptoReg::stack0(), even aligned
3878 //        |        |  pad1  | 11   pad to align new SP
3879 //        |        +--------+
3880 //        |        |        | 10
3881 //        |        | spills |  9   spills
3882 //        V        |        |  8   (pad0 slot for callee)
3883 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3884 //        ^        |  out   |  7
3885 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3886 //     Owned by    +--------+
3887 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3888 //        |    new |preserve|      Must be even-aligned.
3889 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3890 //        |        |        |
3891 //
3892 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3893 //         known from SELF's arguments and the Java calling convention.
3894 //         Region 6-7 is determined per call site.
3895 // Note 2: If the calling convention leaves holes in the incoming argument
3896 //         area, those holes are owned by SELF.  Holes in the outgoing area
3897 //         are owned by the CALLEE.  Holes should not be nessecary in the
3898 //         incoming area, as the Java calling convention is completely under
3899 //         the control of the AD file.  Doubles can be sorted and packed to
3900 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3901 //         varargs C calling conventions.
3902 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3903 //         even aligned with pad0 as needed.
3904 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3905 //         region 6-11 is even aligned; it may be padded out more so that
3906 //         the region from SP to FP meets the minimum stack alignment.
3907 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3908 //         alignment.  Region 11, pad1, may be dynamically extended so that
3909 //         SP meets the minimum alignment.
3910 
3911 frame
3912 %{
3913   // What direction does stack grow in (assumed to be same for C & Java)
3914   stack_direction(TOWARDS_LOW);
3915 
3916   // These three registers define part of the calling convention
3917   // between compiled code and the interpreter.
3918   inline_cache_reg(RAX);                // Inline Cache Register
3919   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3920                                         // calling interpreter
3921 
3922   // Optional: name the operand used by cisc-spilling to access
3923   // [stack_pointer + offset]
3924   cisc_spilling_operand_name(indOffset32);
3925 
3926   // Number of stack slots consumed by locking an object
3927   sync_stack_slots(2);
3928 
3929   // Compiled code's Frame Pointer
3930   frame_pointer(RSP);
3931 
3932   // Interpreter stores its frame pointer in a register which is
3933   // stored to the stack by I2CAdaptors.
3934   // I2CAdaptors convert from interpreted java to compiled java.
3935   interpreter_frame_pointer(RBP);
3936 
3937   // Stack alignment requirement
3938   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3939 
3940   // Number of stack slots between incoming argument block and the start of
3941   // a new frame.  The PROLOG must add this many slots to the stack.  The
3942   // EPILOG must remove this many slots.  amd64 needs two slots for
3943   // return address.
3944   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3945 
3946   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3947   // for calls to C.  Supports the var-args backing area for register parms.
3948   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3949 
3950   // The after-PROLOG location of the return address.  Location of
3951   // return address specifies a type (REG or STACK) and a number
3952   // representing the register number (i.e. - use a register name) or
3953   // stack slot.
3954   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3955   // Otherwise, it is above the locks and verification slot and alignment word
3956   return_addr(STACK - 2 +
3957               round_to(2 + 2 * VerifyStackAtCalls +
3958                        Compile::current()->fixed_slots(),
3959                        WordsPerLong * 2));
3960 
3961   // Body of function which returns an integer array locating
3962   // arguments either in registers or in stack slots.  Passed an array
3963   // of ideal registers called "sig" and a "length" count.  Stack-slot
3964   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3965   // arguments for a CALLEE.  Incoming stack arguments are
3966   // automatically biased by the preserve_stack_slots field above.
3967 
3968   calling_convention
3969   %{
3970     // No difference between ingoing/outgoing just pass false
3971     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3972   %}
3973 
3974   c_calling_convention
3975   %{
3976     // This is obviously always outgoing
3977     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3978   %}
3979 
3980   // Location of compiled Java return values.  Same as C for now.
3981   return_value
3982   %{
3983     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3984            "only return normal values");
3985 
3986     static const int lo[Op_RegL + 1] = {
3987       0,
3988       0,
3989       RAX_num,  // Op_RegN
3990       RAX_num,  // Op_RegI
3991       RAX_num,  // Op_RegP
3992       XMM0_num, // Op_RegF
3993       XMM0_num, // Op_RegD
3994       RAX_num   // Op_RegL
3995     };
3996     static const int hi[Op_RegL + 1] = {
3997       0,
3998       0,
3999       OptoReg::Bad, // Op_RegN
4000       OptoReg::Bad, // Op_RegI
4001       RAX_H_num,    // Op_RegP
4002       OptoReg::Bad, // Op_RegF
4003       XMM0_H_num,   // Op_RegD
4004       RAX_H_num     // Op_RegL
4005     };
4006     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4007     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4008   %}
4009 %}
4010 
4011 //----------ATTRIBUTES---------------------------------------------------------
4012 //----------Operand Attributes-------------------------------------------------
4013 op_attrib op_cost(0);        // Required cost attribute
4014 
4015 //----------Instruction Attributes---------------------------------------------
4016 ins_attrib ins_cost(100);       // Required cost attribute
4017 ins_attrib ins_size(8);         // Required size attribute (in bits)
4018 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4019 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4020                                 // a non-matching short branch variant
4021                                 // of some long branch?
4022 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4023                                 // be a power of 2) specifies the
4024                                 // alignment that some part of the
4025                                 // instruction (not necessarily the
4026                                 // start) requires.  If > 1, a
4027                                 // compute_padding() function must be
4028                                 // provided for the instruction
4029 
4030 //----------OPERANDS-----------------------------------------------------------
4031 // Operand definitions must precede instruction definitions for correct parsing
4032 // in the ADLC because operands constitute user defined types which are used in
4033 // instruction definitions.
4034 
4035 //----------Simple Operands----------------------------------------------------
4036 // Immediate Operands
4037 // Integer Immediate
4038 operand immI()
4039 %{
4040   match(ConI);
4041 
4042   op_cost(10);
4043   format %{ %}
4044   interface(CONST_INTER);
4045 %}
4046 
4047 // Constant for test vs zero
4048 operand immI0()
4049 %{
4050   predicate(n->get_int() == 0);
4051   match(ConI);
4052 
4053   op_cost(0);
4054   format %{ %}
4055   interface(CONST_INTER);
4056 %}
4057 
4058 // Constant for increment
4059 operand immI1()
4060 %{
4061   predicate(n->get_int() == 1);
4062   match(ConI);
4063 
4064   op_cost(0);
4065   format %{ %}
4066   interface(CONST_INTER);
4067 %}
4068 
4069 // Constant for decrement
4070 operand immI_M1()
4071 %{
4072   predicate(n->get_int() == -1);
4073   match(ConI);
4074 
4075   op_cost(0);
4076   format %{ %}
4077   interface(CONST_INTER);
4078 %}
4079 
4080 // Valid scale values for addressing modes
4081 operand immI2()
4082 %{
4083   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4084   match(ConI);
4085 
4086   format %{ %}
4087   interface(CONST_INTER);
4088 %}
4089 
4090 operand immI8()
4091 %{
4092   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4093   match(ConI);
4094 
4095   op_cost(5);
4096   format %{ %}
4097   interface(CONST_INTER);
4098 %}
4099 
4100 operand immI16()
4101 %{
4102   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4103   match(ConI);
4104 
4105   op_cost(10);
4106   format %{ %}
4107   interface(CONST_INTER);
4108 %}
4109 
4110 // Constant for long shifts
4111 operand immI_32()
4112 %{
4113   predicate( n->get_int() == 32 );
4114   match(ConI);
4115 
4116   op_cost(0);
4117   format %{ %}
4118   interface(CONST_INTER);
4119 %}
4120 
4121 // Constant for long shifts
4122 operand immI_64()
4123 %{
4124   predicate( n->get_int() == 64 );
4125   match(ConI);
4126 
4127   op_cost(0);
4128   format %{ %}
4129   interface(CONST_INTER);
4130 %}
4131 
4132 // Pointer Immediate
4133 operand immP()
4134 %{
4135   match(ConP);
4136 
4137   op_cost(10);
4138   format %{ %}
4139   interface(CONST_INTER);
4140 %}
4141 
4142 // NULL Pointer Immediate
4143 operand immP0()
4144 %{
4145   predicate(n->get_ptr() == 0);
4146   match(ConP);
4147 
4148   op_cost(5);
4149   format %{ %}
4150   interface(CONST_INTER);
4151 %}
4152 
4153 operand immP_poll() %{
4154   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
4155   match(ConP);
4156 
4157   // formats are generated automatically for constants and base registers
4158   format %{ %}
4159   interface(CONST_INTER);
4160 %}
4161 
4162 // Pointer Immediate
4163 operand immN() %{
4164   match(ConN);
4165 
4166   op_cost(10);
4167   format %{ %}
4168   interface(CONST_INTER);
4169 %}
4170 
4171 // NULL Pointer Immediate
4172 operand immN0() %{
4173   predicate(n->get_narrowcon() == 0);
4174   match(ConN);
4175 
4176   op_cost(5);
4177   format %{ %}
4178   interface(CONST_INTER);
4179 %}
4180 
4181 operand immP31()
4182 %{
4183   predicate(!n->as_Type()->type()->isa_oopptr()
4184             && (n->get_ptr() >> 31) == 0);
4185   match(ConP);
4186 
4187   op_cost(5);
4188   format %{ %}
4189   interface(CONST_INTER);
4190 %}
4191 
4192 
4193 // Long Immediate
4194 operand immL()
4195 %{
4196   match(ConL);
4197 
4198   op_cost(20);
4199   format %{ %}
4200   interface(CONST_INTER);
4201 %}
4202 
4203 // Long Immediate 8-bit
4204 operand immL8()
4205 %{
4206   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4207   match(ConL);
4208 
4209   op_cost(5);
4210   format %{ %}
4211   interface(CONST_INTER);
4212 %}
4213 
4214 // Long Immediate 32-bit unsigned
4215 operand immUL32()
4216 %{
4217   predicate(n->get_long() == (unsigned int) (n->get_long()));
4218   match(ConL);
4219 
4220   op_cost(10);
4221   format %{ %}
4222   interface(CONST_INTER);
4223 %}
4224 
4225 // Long Immediate 32-bit signed
4226 operand immL32()
4227 %{
4228   predicate(n->get_long() == (int) (n->get_long()));
4229   match(ConL);
4230 
4231   op_cost(15);
4232   format %{ %}
4233   interface(CONST_INTER);
4234 %}
4235 
4236 // Long Immediate zero
4237 operand immL0()
4238 %{
4239   predicate(n->get_long() == 0L);
4240   match(ConL);
4241 
4242   op_cost(10);
4243   format %{ %}
4244   interface(CONST_INTER);
4245 %}
4246 
4247 // Constant for increment
4248 operand immL1()
4249 %{
4250   predicate(n->get_long() == 1);
4251   match(ConL);
4252 
4253   format %{ %}
4254   interface(CONST_INTER);
4255 %}
4256 
4257 // Constant for decrement
4258 operand immL_M1()
4259 %{
4260   predicate(n->get_long() == -1);
4261   match(ConL);
4262 
4263   format %{ %}
4264   interface(CONST_INTER);
4265 %}
4266 
4267 // Long Immediate: the value 10
4268 operand immL10()
4269 %{
4270   predicate(n->get_long() == 10);
4271   match(ConL);
4272 
4273   format %{ %}
4274   interface(CONST_INTER);
4275 %}
4276 
4277 // Long immediate from 0 to 127.
4278 // Used for a shorter form of long mul by 10.
4279 operand immL_127()
4280 %{
4281   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4282   match(ConL);
4283 
4284   op_cost(10);
4285   format %{ %}
4286   interface(CONST_INTER);
4287 %}
4288 
4289 // Long Immediate: low 32-bit mask
4290 operand immL_32bits()
4291 %{
4292   predicate(n->get_long() == 0xFFFFFFFFL);
4293   match(ConL);
4294   op_cost(20);
4295 
4296   format %{ %}
4297   interface(CONST_INTER);
4298 %}
4299 
4300 // Float Immediate zero
4301 operand immF0()
4302 %{
4303   predicate(jint_cast(n->getf()) == 0);
4304   match(ConF);
4305 
4306   op_cost(5);
4307   format %{ %}
4308   interface(CONST_INTER);
4309 %}
4310 
4311 // Float Immediate
4312 operand immF()
4313 %{
4314   match(ConF);
4315 
4316   op_cost(15);
4317   format %{ %}
4318   interface(CONST_INTER);
4319 %}
4320 
4321 // Double Immediate zero
4322 operand immD0()
4323 %{
4324   predicate(jlong_cast(n->getd()) == 0);
4325   match(ConD);
4326 
4327   op_cost(5);
4328   format %{ %}
4329   interface(CONST_INTER);
4330 %}
4331 
4332 // Double Immediate
4333 operand immD()
4334 %{
4335   match(ConD);
4336 
4337   op_cost(15);
4338   format %{ %}
4339   interface(CONST_INTER);
4340 %}
4341 
4342 // Immediates for special shifts (sign extend)
4343 
4344 // Constants for increment
4345 operand immI_16()
4346 %{
4347   predicate(n->get_int() == 16);
4348   match(ConI);
4349 
4350   format %{ %}
4351   interface(CONST_INTER);
4352 %}
4353 
4354 operand immI_24()
4355 %{
4356   predicate(n->get_int() == 24);
4357   match(ConI);
4358 
4359   format %{ %}
4360   interface(CONST_INTER);
4361 %}
4362 
4363 // Constant for byte-wide masking
4364 operand immI_255()
4365 %{
4366   predicate(n->get_int() == 255);
4367   match(ConI);
4368 
4369   format %{ %}
4370   interface(CONST_INTER);
4371 %}
4372 
4373 // Constant for short-wide masking
4374 operand immI_65535()
4375 %{
4376   predicate(n->get_int() == 65535);
4377   match(ConI);
4378 
4379   format %{ %}
4380   interface(CONST_INTER);
4381 %}
4382 
4383 // Constant for byte-wide masking
4384 operand immL_255()
4385 %{
4386   predicate(n->get_long() == 255);
4387   match(ConL);
4388 
4389   format %{ %}
4390   interface(CONST_INTER);
4391 %}
4392 
4393 // Constant for short-wide masking
4394 operand immL_65535()
4395 %{
4396   predicate(n->get_long() == 65535);
4397   match(ConL);
4398 
4399   format %{ %}
4400   interface(CONST_INTER);
4401 %}
4402 
4403 // Register Operands
4404 // Integer Register
4405 operand rRegI()
4406 %{
4407   constraint(ALLOC_IN_RC(int_reg));
4408   match(RegI);
4409 
4410   match(rax_RegI);
4411   match(rbx_RegI);
4412   match(rcx_RegI);
4413   match(rdx_RegI);
4414   match(rdi_RegI);
4415 
4416   format %{ %}
4417   interface(REG_INTER);
4418 %}
4419 
4420 // Special Registers
4421 operand rax_RegI()
4422 %{
4423   constraint(ALLOC_IN_RC(int_rax_reg));
4424   match(RegI);
4425   match(rRegI);
4426 
4427   format %{ "RAX" %}
4428   interface(REG_INTER);
4429 %}
4430 
4431 // Special Registers
4432 operand rbx_RegI()
4433 %{
4434   constraint(ALLOC_IN_RC(int_rbx_reg));
4435   match(RegI);
4436   match(rRegI);
4437 
4438   format %{ "RBX" %}
4439   interface(REG_INTER);
4440 %}
4441 
4442 operand rcx_RegI()
4443 %{
4444   constraint(ALLOC_IN_RC(int_rcx_reg));
4445   match(RegI);
4446   match(rRegI);
4447 
4448   format %{ "RCX" %}
4449   interface(REG_INTER);
4450 %}
4451 
4452 operand rdx_RegI()
4453 %{
4454   constraint(ALLOC_IN_RC(int_rdx_reg));
4455   match(RegI);
4456   match(rRegI);
4457 
4458   format %{ "RDX" %}
4459   interface(REG_INTER);
4460 %}
4461 
4462 operand rdi_RegI()
4463 %{
4464   constraint(ALLOC_IN_RC(int_rdi_reg));
4465   match(RegI);
4466   match(rRegI);
4467 
4468   format %{ "RDI" %}
4469   interface(REG_INTER);
4470 %}
4471 
4472 operand no_rcx_RegI()
4473 %{
4474   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4475   match(RegI);
4476   match(rax_RegI);
4477   match(rbx_RegI);
4478   match(rdx_RegI);
4479   match(rdi_RegI);
4480 
4481   format %{ %}
4482   interface(REG_INTER);
4483 %}
4484 
4485 operand no_rax_rdx_RegI()
4486 %{
4487   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4488   match(RegI);
4489   match(rbx_RegI);
4490   match(rcx_RegI);
4491   match(rdi_RegI);
4492 
4493   format %{ %}
4494   interface(REG_INTER);
4495 %}
4496 
4497 // Pointer Register
4498 operand any_RegP()
4499 %{
4500   constraint(ALLOC_IN_RC(any_reg));
4501   match(RegP);
4502   match(rax_RegP);
4503   match(rbx_RegP);
4504   match(rdi_RegP);
4505   match(rsi_RegP);
4506   match(rbp_RegP);
4507   match(r15_RegP);
4508   match(rRegP);
4509 
4510   format %{ %}
4511   interface(REG_INTER);
4512 %}
4513 
4514 operand rRegP()
4515 %{
4516   constraint(ALLOC_IN_RC(ptr_reg));
4517   match(RegP);
4518   match(rax_RegP);
4519   match(rbx_RegP);
4520   match(rdi_RegP);
4521   match(rsi_RegP);
4522   match(rbp_RegP);
4523   match(r15_RegP);  // See Q&A below about r15_RegP.
4524 
4525   format %{ %}
4526   interface(REG_INTER);
4527 %}
4528 
4529 operand rRegN() %{
4530   constraint(ALLOC_IN_RC(int_reg));
4531   match(RegN);
4532 
4533   format %{ %}
4534   interface(REG_INTER);
4535 %}
4536 
4537 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4538 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4539 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4540 // The output of an instruction is controlled by the allocator, which respects
4541 // register class masks, not match rules.  Unless an instruction mentions
4542 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4543 // by the allocator as an input.
4544 
4545 operand no_rax_RegP()
4546 %{
4547   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4548   match(RegP);
4549   match(rbx_RegP);
4550   match(rsi_RegP);
4551   match(rdi_RegP);
4552 
4553   format %{ %}
4554   interface(REG_INTER);
4555 %}
4556 
4557 operand no_rbp_RegP()
4558 %{
4559   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4560   match(RegP);
4561   match(rbx_RegP);
4562   match(rsi_RegP);
4563   match(rdi_RegP);
4564 
4565   format %{ %}
4566   interface(REG_INTER);
4567 %}
4568 
4569 operand no_rax_rbx_RegP()
4570 %{
4571   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4572   match(RegP);
4573   match(rsi_RegP);
4574   match(rdi_RegP);
4575 
4576   format %{ %}
4577   interface(REG_INTER);
4578 %}
4579 
4580 // Special Registers
4581 // Return a pointer value
4582 operand rax_RegP()
4583 %{
4584   constraint(ALLOC_IN_RC(ptr_rax_reg));
4585   match(RegP);
4586   match(rRegP);
4587 
4588   format %{ %}
4589   interface(REG_INTER);
4590 %}
4591 
4592 // Special Registers
4593 // Return a compressed pointer value
4594 operand rax_RegN()
4595 %{
4596   constraint(ALLOC_IN_RC(int_rax_reg));
4597   match(RegN);
4598   match(rRegN);
4599 
4600   format %{ %}
4601   interface(REG_INTER);
4602 %}
4603 
4604 // Used in AtomicAdd
4605 operand rbx_RegP()
4606 %{
4607   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4608   match(RegP);
4609   match(rRegP);
4610 
4611   format %{ %}
4612   interface(REG_INTER);
4613 %}
4614 
4615 operand rsi_RegP()
4616 %{
4617   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4618   match(RegP);
4619   match(rRegP);
4620 
4621   format %{ %}
4622   interface(REG_INTER);
4623 %}
4624 
4625 // Used in rep stosq
4626 operand rdi_RegP()
4627 %{
4628   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4629   match(RegP);
4630   match(rRegP);
4631 
4632   format %{ %}
4633   interface(REG_INTER);
4634 %}
4635 
4636 operand rbp_RegP()
4637 %{
4638   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4639   match(RegP);
4640   match(rRegP);
4641 
4642   format %{ %}
4643   interface(REG_INTER);
4644 %}
4645 
4646 operand r15_RegP()
4647 %{
4648   constraint(ALLOC_IN_RC(ptr_r15_reg));
4649   match(RegP);
4650   match(rRegP);
4651 
4652   format %{ %}
4653   interface(REG_INTER);
4654 %}
4655 
4656 operand rRegL()
4657 %{
4658   constraint(ALLOC_IN_RC(long_reg));
4659   match(RegL);
4660   match(rax_RegL);
4661   match(rdx_RegL);
4662 
4663   format %{ %}
4664   interface(REG_INTER);
4665 %}
4666 
4667 // Special Registers
4668 operand no_rax_rdx_RegL()
4669 %{
4670   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4671   match(RegL);
4672   match(rRegL);
4673 
4674   format %{ %}
4675   interface(REG_INTER);
4676 %}
4677 
4678 operand no_rax_RegL()
4679 %{
4680   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4681   match(RegL);
4682   match(rRegL);
4683   match(rdx_RegL);
4684 
4685   format %{ %}
4686   interface(REG_INTER);
4687 %}
4688 
4689 operand no_rcx_RegL()
4690 %{
4691   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4692   match(RegL);
4693   match(rRegL);
4694 
4695   format %{ %}
4696   interface(REG_INTER);
4697 %}
4698 
4699 operand rax_RegL()
4700 %{
4701   constraint(ALLOC_IN_RC(long_rax_reg));
4702   match(RegL);
4703   match(rRegL);
4704 
4705   format %{ "RAX" %}
4706   interface(REG_INTER);
4707 %}
4708 
4709 operand rcx_RegL()
4710 %{
4711   constraint(ALLOC_IN_RC(long_rcx_reg));
4712   match(RegL);
4713   match(rRegL);
4714 
4715   format %{ %}
4716   interface(REG_INTER);
4717 %}
4718 
4719 operand rdx_RegL()
4720 %{
4721   constraint(ALLOC_IN_RC(long_rdx_reg));
4722   match(RegL);
4723   match(rRegL);
4724 
4725   format %{ %}
4726   interface(REG_INTER);
4727 %}
4728 
4729 // Flags register, used as output of compare instructions
4730 operand rFlagsReg()
4731 %{
4732   constraint(ALLOC_IN_RC(int_flags));
4733   match(RegFlags);
4734 
4735   format %{ "RFLAGS" %}
4736   interface(REG_INTER);
4737 %}
4738 
4739 // Flags register, used as output of FLOATING POINT compare instructions
4740 operand rFlagsRegU()
4741 %{
4742   constraint(ALLOC_IN_RC(int_flags));
4743   match(RegFlags);
4744 
4745   format %{ "RFLAGS_U" %}
4746   interface(REG_INTER);
4747 %}
4748 
4749 operand rFlagsRegUCF() %{
4750   constraint(ALLOC_IN_RC(int_flags));
4751   match(RegFlags);
4752   predicate(false);
4753 
4754   format %{ "RFLAGS_U_CF" %}
4755   interface(REG_INTER);
4756 %}
4757 
4758 // Float register operands
4759 operand regF()
4760 %{
4761   constraint(ALLOC_IN_RC(float_reg));
4762   match(RegF);
4763 
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 // Double register operands
4769 operand regD()
4770 %{
4771   constraint(ALLOC_IN_RC(double_reg));
4772   match(RegD);
4773 
4774   format %{ %}
4775   interface(REG_INTER);
4776 %}
4777 
4778 
4779 //----------Memory Operands----------------------------------------------------
4780 // Direct Memory Operand
4781 // operand direct(immP addr)
4782 // %{
4783 //   match(addr);
4784 
4785 //   format %{ "[$addr]" %}
4786 //   interface(MEMORY_INTER) %{
4787 //     base(0xFFFFFFFF);
4788 //     index(0x4);
4789 //     scale(0x0);
4790 //     disp($addr);
4791 //   %}
4792 // %}
4793 
4794 // Indirect Memory Operand
4795 operand indirect(any_RegP reg)
4796 %{
4797   constraint(ALLOC_IN_RC(ptr_reg));
4798   match(reg);
4799 
4800   format %{ "[$reg]" %}
4801   interface(MEMORY_INTER) %{
4802     base($reg);
4803     index(0x4);
4804     scale(0x0);
4805     disp(0x0);
4806   %}
4807 %}
4808 
4809 // Indirect Memory Plus Short Offset Operand
4810 operand indOffset8(any_RegP reg, immL8 off)
4811 %{
4812   constraint(ALLOC_IN_RC(ptr_reg));
4813   match(AddP reg off);
4814 
4815   format %{ "[$reg + $off (8-bit)]" %}
4816   interface(MEMORY_INTER) %{
4817     base($reg);
4818     index(0x4);
4819     scale(0x0);
4820     disp($off);
4821   %}
4822 %}
4823 
4824 // Indirect Memory Plus Long Offset Operand
4825 operand indOffset32(any_RegP reg, immL32 off)
4826 %{
4827   constraint(ALLOC_IN_RC(ptr_reg));
4828   match(AddP reg off);
4829 
4830   format %{ "[$reg + $off (32-bit)]" %}
4831   interface(MEMORY_INTER) %{
4832     base($reg);
4833     index(0x4);
4834     scale(0x0);
4835     disp($off);
4836   %}
4837 %}
4838 
4839 // Indirect Memory Plus Index Register Plus Offset Operand
4840 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4841 %{
4842   constraint(ALLOC_IN_RC(ptr_reg));
4843   match(AddP (AddP reg lreg) off);
4844 
4845   op_cost(10);
4846   format %{"[$reg + $off + $lreg]" %}
4847   interface(MEMORY_INTER) %{
4848     base($reg);
4849     index($lreg);
4850     scale(0x0);
4851     disp($off);
4852   %}
4853 %}
4854 
4855 // Indirect Memory Plus Index Register Plus Offset Operand
4856 operand indIndex(any_RegP reg, rRegL lreg)
4857 %{
4858   constraint(ALLOC_IN_RC(ptr_reg));
4859   match(AddP reg lreg);
4860 
4861   op_cost(10);
4862   format %{"[$reg + $lreg]" %}
4863   interface(MEMORY_INTER) %{
4864     base($reg);
4865     index($lreg);
4866     scale(0x0);
4867     disp(0x0);
4868   %}
4869 %}
4870 
4871 // Indirect Memory Times Scale Plus Index Register
4872 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4873 %{
4874   constraint(ALLOC_IN_RC(ptr_reg));
4875   match(AddP reg (LShiftL lreg scale));
4876 
4877   op_cost(10);
4878   format %{"[$reg + $lreg << $scale]" %}
4879   interface(MEMORY_INTER) %{
4880     base($reg);
4881     index($lreg);
4882     scale($scale);
4883     disp(0x0);
4884   %}
4885 %}
4886 
4887 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4888 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4889 %{
4890   constraint(ALLOC_IN_RC(ptr_reg));
4891   match(AddP (AddP reg (LShiftL lreg scale)) off);
4892 
4893   op_cost(10);
4894   format %{"[$reg + $off + $lreg << $scale]" %}
4895   interface(MEMORY_INTER) %{
4896     base($reg);
4897     index($lreg);
4898     scale($scale);
4899     disp($off);
4900   %}
4901 %}
4902 
4903 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4904 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4905 %{
4906   constraint(ALLOC_IN_RC(ptr_reg));
4907   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4908   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4909 
4910   op_cost(10);
4911   format %{"[$reg + $off + $idx << $scale]" %}
4912   interface(MEMORY_INTER) %{
4913     base($reg);
4914     index($idx);
4915     scale($scale);
4916     disp($off);
4917   %}
4918 %}
4919 
4920 // Indirect Narrow Oop Plus Offset Operand
4921 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4922 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4923 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4924   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4925   constraint(ALLOC_IN_RC(ptr_reg));
4926   match(AddP (DecodeN reg) off);
4927 
4928   op_cost(10);
4929   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4930   interface(MEMORY_INTER) %{
4931     base(0xc); // R12
4932     index($reg);
4933     scale(0x3);
4934     disp($off);
4935   %}
4936 %}
4937 
4938 // Indirect Memory Operand
4939 operand indirectNarrow(rRegN reg)
4940 %{
4941   predicate(Universe::narrow_oop_shift() == 0);
4942   constraint(ALLOC_IN_RC(ptr_reg));
4943   match(DecodeN reg);
4944 
4945   format %{ "[$reg]" %}
4946   interface(MEMORY_INTER) %{
4947     base($reg);
4948     index(0x4);
4949     scale(0x0);
4950     disp(0x0);
4951   %}
4952 %}
4953 
4954 // Indirect Memory Plus Short Offset Operand
4955 operand indOffset8Narrow(rRegN reg, immL8 off)
4956 %{
4957   predicate(Universe::narrow_oop_shift() == 0);
4958   constraint(ALLOC_IN_RC(ptr_reg));
4959   match(AddP (DecodeN reg) off);
4960 
4961   format %{ "[$reg + $off (8-bit)]" %}
4962   interface(MEMORY_INTER) %{
4963     base($reg);
4964     index(0x4);
4965     scale(0x0);
4966     disp($off);
4967   %}
4968 %}
4969 
4970 // Indirect Memory Plus Long Offset Operand
4971 operand indOffset32Narrow(rRegN reg, immL32 off)
4972 %{
4973   predicate(Universe::narrow_oop_shift() == 0);
4974   constraint(ALLOC_IN_RC(ptr_reg));
4975   match(AddP (DecodeN reg) off);
4976 
4977   format %{ "[$reg + $off (32-bit)]" %}
4978   interface(MEMORY_INTER) %{
4979     base($reg);
4980     index(0x4);
4981     scale(0x0);
4982     disp($off);
4983   %}
4984 %}
4985 
4986 // Indirect Memory Plus Index Register Plus Offset Operand
4987 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4988 %{
4989   predicate(Universe::narrow_oop_shift() == 0);
4990   constraint(ALLOC_IN_RC(ptr_reg));
4991   match(AddP (AddP (DecodeN reg) lreg) off);
4992 
4993   op_cost(10);
4994   format %{"[$reg + $off + $lreg]" %}
4995   interface(MEMORY_INTER) %{
4996     base($reg);
4997     index($lreg);
4998     scale(0x0);
4999     disp($off);
5000   %}
5001 %}
5002 
5003 // Indirect Memory Plus Index Register Plus Offset Operand
5004 operand indIndexNarrow(rRegN reg, rRegL lreg)
5005 %{
5006   predicate(Universe::narrow_oop_shift() == 0);
5007   constraint(ALLOC_IN_RC(ptr_reg));
5008   match(AddP (DecodeN reg) lreg);
5009 
5010   op_cost(10);
5011   format %{"[$reg + $lreg]" %}
5012   interface(MEMORY_INTER) %{
5013     base($reg);
5014     index($lreg);
5015     scale(0x0);
5016     disp(0x0);
5017   %}
5018 %}
5019 
5020 // Indirect Memory Times Scale Plus Index Register
5021 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5022 %{
5023   predicate(Universe::narrow_oop_shift() == 0);
5024   constraint(ALLOC_IN_RC(ptr_reg));
5025   match(AddP (DecodeN reg) (LShiftL lreg scale));
5026 
5027   op_cost(10);
5028   format %{"[$reg + $lreg << $scale]" %}
5029   interface(MEMORY_INTER) %{
5030     base($reg);
5031     index($lreg);
5032     scale($scale);
5033     disp(0x0);
5034   %}
5035 %}
5036 
5037 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5038 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5039 %{
5040   predicate(Universe::narrow_oop_shift() == 0);
5041   constraint(ALLOC_IN_RC(ptr_reg));
5042   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5043 
5044   op_cost(10);
5045   format %{"[$reg + $off + $lreg << $scale]" %}
5046   interface(MEMORY_INTER) %{
5047     base($reg);
5048     index($lreg);
5049     scale($scale);
5050     disp($off);
5051   %}
5052 %}
5053 
5054 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5055 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5056 %{
5057   constraint(ALLOC_IN_RC(ptr_reg));
5058   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5059   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5060 
5061   op_cost(10);
5062   format %{"[$reg + $off + $idx << $scale]" %}
5063   interface(MEMORY_INTER) %{
5064     base($reg);
5065     index($idx);
5066     scale($scale);
5067     disp($off);
5068   %}
5069 %}
5070 
5071 
5072 //----------Special Memory Operands--------------------------------------------
5073 // Stack Slot Operand - This operand is used for loading and storing temporary
5074 //                      values on the stack where a match requires a value to
5075 //                      flow through memory.
5076 operand stackSlotP(sRegP reg)
5077 %{
5078   constraint(ALLOC_IN_RC(stack_slots));
5079   // No match rule because this operand is only generated in matching
5080 
5081   format %{ "[$reg]" %}
5082   interface(MEMORY_INTER) %{
5083     base(0x4);   // RSP
5084     index(0x4);  // No Index
5085     scale(0x0);  // No Scale
5086     disp($reg);  // Stack Offset
5087   %}
5088 %}
5089 
5090 operand stackSlotI(sRegI reg)
5091 %{
5092   constraint(ALLOC_IN_RC(stack_slots));
5093   // No match rule because this operand is only generated in matching
5094 
5095   format %{ "[$reg]" %}
5096   interface(MEMORY_INTER) %{
5097     base(0x4);   // RSP
5098     index(0x4);  // No Index
5099     scale(0x0);  // No Scale
5100     disp($reg);  // Stack Offset
5101   %}
5102 %}
5103 
5104 operand stackSlotF(sRegF reg)
5105 %{
5106   constraint(ALLOC_IN_RC(stack_slots));
5107   // No match rule because this operand is only generated in matching
5108 
5109   format %{ "[$reg]" %}
5110   interface(MEMORY_INTER) %{
5111     base(0x4);   // RSP
5112     index(0x4);  // No Index
5113     scale(0x0);  // No Scale
5114     disp($reg);  // Stack Offset
5115   %}
5116 %}
5117 
5118 operand stackSlotD(sRegD reg)
5119 %{
5120   constraint(ALLOC_IN_RC(stack_slots));
5121   // No match rule because this operand is only generated in matching
5122 
5123   format %{ "[$reg]" %}
5124   interface(MEMORY_INTER) %{
5125     base(0x4);   // RSP
5126     index(0x4);  // No Index
5127     scale(0x0);  // No Scale
5128     disp($reg);  // Stack Offset
5129   %}
5130 %}
5131 operand stackSlotL(sRegL reg)
5132 %{
5133   constraint(ALLOC_IN_RC(stack_slots));
5134   // No match rule because this operand is only generated in matching
5135 
5136   format %{ "[$reg]" %}
5137   interface(MEMORY_INTER) %{
5138     base(0x4);   // RSP
5139     index(0x4);  // No Index
5140     scale(0x0);  // No Scale
5141     disp($reg);  // Stack Offset
5142   %}
5143 %}
5144 
5145 //----------Conditional Branch Operands----------------------------------------
5146 // Comparison Op  - This is the operation of the comparison, and is limited to
5147 //                  the following set of codes:
5148 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5149 //
5150 // Other attributes of the comparison, such as unsignedness, are specified
5151 // by the comparison instruction that sets a condition code flags register.
5152 // That result is represented by a flags operand whose subtype is appropriate
5153 // to the unsignedness (etc.) of the comparison.
5154 //
5155 // Later, the instruction which matches both the Comparison Op (a Bool) and
5156 // the flags (produced by the Cmp) specifies the coding of the comparison op
5157 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5158 
5159 // Comparision Code
5160 operand cmpOp()
5161 %{
5162   match(Bool);
5163 
5164   format %{ "" %}
5165   interface(COND_INTER) %{
5166     equal(0x4, "e");
5167     not_equal(0x5, "ne");
5168     less(0xC, "l");
5169     greater_equal(0xD, "ge");
5170     less_equal(0xE, "le");
5171     greater(0xF, "g");
5172   %}
5173 %}
5174 
5175 // Comparison Code, unsigned compare.  Used by FP also, with
5176 // C2 (unordered) turned into GT or LT already.  The other bits
5177 // C0 and C3 are turned into Carry & Zero flags.
5178 operand cmpOpU()
5179 %{
5180   match(Bool);
5181 
5182   format %{ "" %}
5183   interface(COND_INTER) %{
5184     equal(0x4, "e");
5185     not_equal(0x5, "ne");
5186     less(0x2, "b");
5187     greater_equal(0x3, "nb");
5188     less_equal(0x6, "be");
5189     greater(0x7, "nbe");
5190   %}
5191 %}
5192 
5193 
5194 // Floating comparisons that don't require any fixup for the unordered case
5195 operand cmpOpUCF() %{
5196   match(Bool);
5197   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5198             n->as_Bool()->_test._test == BoolTest::ge ||
5199             n->as_Bool()->_test._test == BoolTest::le ||
5200             n->as_Bool()->_test._test == BoolTest::gt);
5201   format %{ "" %}
5202   interface(COND_INTER) %{
5203     equal(0x4, "e");
5204     not_equal(0x5, "ne");
5205     less(0x2, "b");
5206     greater_equal(0x3, "nb");
5207     less_equal(0x6, "be");
5208     greater(0x7, "nbe");
5209   %}
5210 %}
5211 
5212 
5213 // Floating comparisons that can be fixed up with extra conditional jumps
5214 operand cmpOpUCF2() %{
5215   match(Bool);
5216   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5217             n->as_Bool()->_test._test == BoolTest::eq);
5218   format %{ "" %}
5219   interface(COND_INTER) %{
5220     equal(0x4, "e");
5221     not_equal(0x5, "ne");
5222     less(0x2, "b");
5223     greater_equal(0x3, "nb");
5224     less_equal(0x6, "be");
5225     greater(0x7, "nbe");
5226   %}
5227 %}
5228 
5229 
5230 //----------OPERAND CLASSES----------------------------------------------------
5231 // Operand Classes are groups of operands that are used as to simplify
5232 // instruction definitions by not requiring the AD writer to specify separate
5233 // instructions for every form of operand when the instruction accepts
5234 // multiple operand types with the same basic encoding and format.  The classic
5235 // case of this is memory operands.
5236 
5237 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5238                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5239                indCompressedOopOffset,
5240                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5241                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5242                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5243 
5244 //----------PIPELINE-----------------------------------------------------------
5245 // Rules which define the behavior of the target architectures pipeline.
5246 pipeline %{
5247 
5248 //----------ATTRIBUTES---------------------------------------------------------
5249 attributes %{
5250   variable_size_instructions;        // Fixed size instructions
5251   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5252   instruction_unit_size = 1;         // An instruction is 1 bytes long
5253   instruction_fetch_unit_size = 16;  // The processor fetches one line
5254   instruction_fetch_units = 1;       // of 16 bytes
5255 
5256   // List of nop instructions
5257   nops( MachNop );
5258 %}
5259 
5260 //----------RESOURCES----------------------------------------------------------
5261 // Resources are the functional units available to the machine
5262 
5263 // Generic P2/P3 pipeline
5264 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5265 // 3 instructions decoded per cycle.
5266 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5267 // 3 ALU op, only ALU0 handles mul instructions.
5268 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5269            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5270            BR, FPU,
5271            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5272 
5273 //----------PIPELINE DESCRIPTION-----------------------------------------------
5274 // Pipeline Description specifies the stages in the machine's pipeline
5275 
5276 // Generic P2/P3 pipeline
5277 pipe_desc(S0, S1, S2, S3, S4, S5);
5278 
5279 //----------PIPELINE CLASSES---------------------------------------------------
5280 // Pipeline Classes describe the stages in which input and output are
5281 // referenced by the hardware pipeline.
5282 
5283 // Naming convention: ialu or fpu
5284 // Then: _reg
5285 // Then: _reg if there is a 2nd register
5286 // Then: _long if it's a pair of instructions implementing a long
5287 // Then: _fat if it requires the big decoder
5288 //   Or: _mem if it requires the big decoder and a memory unit.
5289 
5290 // Integer ALU reg operation
5291 pipe_class ialu_reg(rRegI dst)
5292 %{
5293     single_instruction;
5294     dst    : S4(write);
5295     dst    : S3(read);
5296     DECODE : S0;        // any decoder
5297     ALU    : S3;        // any alu
5298 %}
5299 
5300 // Long ALU reg operation
5301 pipe_class ialu_reg_long(rRegL dst)
5302 %{
5303     instruction_count(2);
5304     dst    : S4(write);
5305     dst    : S3(read);
5306     DECODE : S0(2);     // any 2 decoders
5307     ALU    : S3(2);     // both alus
5308 %}
5309 
5310 // Integer ALU reg operation using big decoder
5311 pipe_class ialu_reg_fat(rRegI dst)
5312 %{
5313     single_instruction;
5314     dst    : S4(write);
5315     dst    : S3(read);
5316     D0     : S0;        // big decoder only
5317     ALU    : S3;        // any alu
5318 %}
5319 
5320 // Long ALU reg operation using big decoder
5321 pipe_class ialu_reg_long_fat(rRegL dst)
5322 %{
5323     instruction_count(2);
5324     dst    : S4(write);
5325     dst    : S3(read);
5326     D0     : S0(2);     // big decoder only; twice
5327     ALU    : S3(2);     // any 2 alus
5328 %}
5329 
5330 // Integer ALU reg-reg operation
5331 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5332 %{
5333     single_instruction;
5334     dst    : S4(write);
5335     src    : S3(read);
5336     DECODE : S0;        // any decoder
5337     ALU    : S3;        // any alu
5338 %}
5339 
5340 // Long ALU reg-reg operation
5341 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5342 %{
5343     instruction_count(2);
5344     dst    : S4(write);
5345     src    : S3(read);
5346     DECODE : S0(2);     // any 2 decoders
5347     ALU    : S3(2);     // both alus
5348 %}
5349 
5350 // Integer ALU reg-reg operation
5351 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5352 %{
5353     single_instruction;
5354     dst    : S4(write);
5355     src    : S3(read);
5356     D0     : S0;        // big decoder only
5357     ALU    : S3;        // any alu
5358 %}
5359 
5360 // Long ALU reg-reg operation
5361 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5362 %{
5363     instruction_count(2);
5364     dst    : S4(write);
5365     src    : S3(read);
5366     D0     : S0(2);     // big decoder only; twice
5367     ALU    : S3(2);     // both alus
5368 %}
5369 
5370 // Integer ALU reg-mem operation
5371 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5372 %{
5373     single_instruction;
5374     dst    : S5(write);
5375     mem    : S3(read);
5376     D0     : S0;        // big decoder only
5377     ALU    : S4;        // any alu
5378     MEM    : S3;        // any mem
5379 %}
5380 
5381 // Integer mem operation (prefetch)
5382 pipe_class ialu_mem(memory mem)
5383 %{
5384     single_instruction;
5385     mem    : S3(read);
5386     D0     : S0;        // big decoder only
5387     MEM    : S3;        // any mem
5388 %}
5389 
5390 // Integer Store to Memory
5391 pipe_class ialu_mem_reg(memory mem, rRegI src)
5392 %{
5393     single_instruction;
5394     mem    : S3(read);
5395     src    : S5(read);
5396     D0     : S0;        // big decoder only
5397     ALU    : S4;        // any alu
5398     MEM    : S3;
5399 %}
5400 
5401 // // Long Store to Memory
5402 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5403 // %{
5404 //     instruction_count(2);
5405 //     mem    : S3(read);
5406 //     src    : S5(read);
5407 //     D0     : S0(2);          // big decoder only; twice
5408 //     ALU    : S4(2);     // any 2 alus
5409 //     MEM    : S3(2);  // Both mems
5410 // %}
5411 
5412 // Integer Store to Memory
5413 pipe_class ialu_mem_imm(memory mem)
5414 %{
5415     single_instruction;
5416     mem    : S3(read);
5417     D0     : S0;        // big decoder only
5418     ALU    : S4;        // any alu
5419     MEM    : S3;
5420 %}
5421 
5422 // Integer ALU0 reg-reg operation
5423 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5424 %{
5425     single_instruction;
5426     dst    : S4(write);
5427     src    : S3(read);
5428     D0     : S0;        // Big decoder only
5429     ALU0   : S3;        // only alu0
5430 %}
5431 
5432 // Integer ALU0 reg-mem operation
5433 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5434 %{
5435     single_instruction;
5436     dst    : S5(write);
5437     mem    : S3(read);
5438     D0     : S0;        // big decoder only
5439     ALU0   : S4;        // ALU0 only
5440     MEM    : S3;        // any mem
5441 %}
5442 
5443 // Integer ALU reg-reg operation
5444 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5445 %{
5446     single_instruction;
5447     cr     : S4(write);
5448     src1   : S3(read);
5449     src2   : S3(read);
5450     DECODE : S0;        // any decoder
5451     ALU    : S3;        // any alu
5452 %}
5453 
5454 // Integer ALU reg-imm operation
5455 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5456 %{
5457     single_instruction;
5458     cr     : S4(write);
5459     src1   : S3(read);
5460     DECODE : S0;        // any decoder
5461     ALU    : S3;        // any alu
5462 %}
5463 
5464 // Integer ALU reg-mem operation
5465 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5466 %{
5467     single_instruction;
5468     cr     : S4(write);
5469     src1   : S3(read);
5470     src2   : S3(read);
5471     D0     : S0;        // big decoder only
5472     ALU    : S4;        // any alu
5473     MEM    : S3;
5474 %}
5475 
5476 // Conditional move reg-reg
5477 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5478 %{
5479     instruction_count(4);
5480     y      : S4(read);
5481     q      : S3(read);
5482     p      : S3(read);
5483     DECODE : S0(4);     // any decoder
5484 %}
5485 
5486 // Conditional move reg-reg
5487 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5488 %{
5489     single_instruction;
5490     dst    : S4(write);
5491     src    : S3(read);
5492     cr     : S3(read);
5493     DECODE : S0;        // any decoder
5494 %}
5495 
5496 // Conditional move reg-mem
5497 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5498 %{
5499     single_instruction;
5500     dst    : S4(write);
5501     src    : S3(read);
5502     cr     : S3(read);
5503     DECODE : S0;        // any decoder
5504     MEM    : S3;
5505 %}
5506 
5507 // Conditional move reg-reg long
5508 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5509 %{
5510     single_instruction;
5511     dst    : S4(write);
5512     src    : S3(read);
5513     cr     : S3(read);
5514     DECODE : S0(2);     // any 2 decoders
5515 %}
5516 
5517 // XXX
5518 // // Conditional move double reg-reg
5519 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5520 // %{
5521 //     single_instruction;
5522 //     dst    : S4(write);
5523 //     src    : S3(read);
5524 //     cr     : S3(read);
5525 //     DECODE : S0;     // any decoder
5526 // %}
5527 
5528 // Float reg-reg operation
5529 pipe_class fpu_reg(regD dst)
5530 %{
5531     instruction_count(2);
5532     dst    : S3(read);
5533     DECODE : S0(2);     // any 2 decoders
5534     FPU    : S3;
5535 %}
5536 
5537 // Float reg-reg operation
5538 pipe_class fpu_reg_reg(regD dst, regD src)
5539 %{
5540     instruction_count(2);
5541     dst    : S4(write);
5542     src    : S3(read);
5543     DECODE : S0(2);     // any 2 decoders
5544     FPU    : S3;
5545 %}
5546 
5547 // Float reg-reg operation
5548 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5549 %{
5550     instruction_count(3);
5551     dst    : S4(write);
5552     src1   : S3(read);
5553     src2   : S3(read);
5554     DECODE : S0(3);     // any 3 decoders
5555     FPU    : S3(2);
5556 %}
5557 
5558 // Float reg-reg operation
5559 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5560 %{
5561     instruction_count(4);
5562     dst    : S4(write);
5563     src1   : S3(read);
5564     src2   : S3(read);
5565     src3   : S3(read);
5566     DECODE : S0(4);     // any 3 decoders
5567     FPU    : S3(2);
5568 %}
5569 
5570 // Float reg-reg operation
5571 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5572 %{
5573     instruction_count(4);
5574     dst    : S4(write);
5575     src1   : S3(read);
5576     src2   : S3(read);
5577     src3   : S3(read);
5578     DECODE : S1(3);     // any 3 decoders
5579     D0     : S0;        // Big decoder only
5580     FPU    : S3(2);
5581     MEM    : S3;
5582 %}
5583 
5584 // Float reg-mem operation
5585 pipe_class fpu_reg_mem(regD dst, memory mem)
5586 %{
5587     instruction_count(2);
5588     dst    : S5(write);
5589     mem    : S3(read);
5590     D0     : S0;        // big decoder only
5591     DECODE : S1;        // any decoder for FPU POP
5592     FPU    : S4;
5593     MEM    : S3;        // any mem
5594 %}
5595 
5596 // Float reg-mem operation
5597 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5598 %{
5599     instruction_count(3);
5600     dst    : S5(write);
5601     src1   : S3(read);
5602     mem    : S3(read);
5603     D0     : S0;        // big decoder only
5604     DECODE : S1(2);     // any decoder for FPU POP
5605     FPU    : S4;
5606     MEM    : S3;        // any mem
5607 %}
5608 
5609 // Float mem-reg operation
5610 pipe_class fpu_mem_reg(memory mem, regD src)
5611 %{
5612     instruction_count(2);
5613     src    : S5(read);
5614     mem    : S3(read);
5615     DECODE : S0;        // any decoder for FPU PUSH
5616     D0     : S1;        // big decoder only
5617     FPU    : S4;
5618     MEM    : S3;        // any mem
5619 %}
5620 
5621 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5622 %{
5623     instruction_count(3);
5624     src1   : S3(read);
5625     src2   : S3(read);
5626     mem    : S3(read);
5627     DECODE : S0(2);     // any decoder for FPU PUSH
5628     D0     : S1;        // big decoder only
5629     FPU    : S4;
5630     MEM    : S3;        // any mem
5631 %}
5632 
5633 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5634 %{
5635     instruction_count(3);
5636     src1   : S3(read);
5637     src2   : S3(read);
5638     mem    : S4(read);
5639     DECODE : S0;        // any decoder for FPU PUSH
5640     D0     : S0(2);     // big decoder only
5641     FPU    : S4;
5642     MEM    : S3(2);     // any mem
5643 %}
5644 
5645 pipe_class fpu_mem_mem(memory dst, memory src1)
5646 %{
5647     instruction_count(2);
5648     src1   : S3(read);
5649     dst    : S4(read);
5650     D0     : S0(2);     // big decoder only
5651     MEM    : S3(2);     // any mem
5652 %}
5653 
5654 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5655 %{
5656     instruction_count(3);
5657     src1   : S3(read);
5658     src2   : S3(read);
5659     dst    : S4(read);
5660     D0     : S0(3);     // big decoder only
5661     FPU    : S4;
5662     MEM    : S3(3);     // any mem
5663 %}
5664 
5665 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5666 %{
5667     instruction_count(3);
5668     src1   : S4(read);
5669     mem    : S4(read);
5670     DECODE : S0;        // any decoder for FPU PUSH
5671     D0     : S0(2);     // big decoder only
5672     FPU    : S4;
5673     MEM    : S3(2);     // any mem
5674 %}
5675 
5676 // Float load constant
5677 pipe_class fpu_reg_con(regD dst)
5678 %{
5679     instruction_count(2);
5680     dst    : S5(write);
5681     D0     : S0;        // big decoder only for the load
5682     DECODE : S1;        // any decoder for FPU POP
5683     FPU    : S4;
5684     MEM    : S3;        // any mem
5685 %}
5686 
5687 // Float load constant
5688 pipe_class fpu_reg_reg_con(regD dst, regD src)
5689 %{
5690     instruction_count(3);
5691     dst    : S5(write);
5692     src    : S3(read);
5693     D0     : S0;        // big decoder only for the load
5694     DECODE : S1(2);     // any decoder for FPU POP
5695     FPU    : S4;
5696     MEM    : S3;        // any mem
5697 %}
5698 
5699 // UnConditional branch
5700 pipe_class pipe_jmp(label labl)
5701 %{
5702     single_instruction;
5703     BR   : S3;
5704 %}
5705 
5706 // Conditional branch
5707 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5708 %{
5709     single_instruction;
5710     cr    : S1(read);
5711     BR    : S3;
5712 %}
5713 
5714 // Allocation idiom
5715 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5716 %{
5717     instruction_count(1); force_serialization;
5718     fixed_latency(6);
5719     heap_ptr : S3(read);
5720     DECODE   : S0(3);
5721     D0       : S2;
5722     MEM      : S3;
5723     ALU      : S3(2);
5724     dst      : S5(write);
5725     BR       : S5;
5726 %}
5727 
5728 // Generic big/slow expanded idiom
5729 pipe_class pipe_slow()
5730 %{
5731     instruction_count(10); multiple_bundles; force_serialization;
5732     fixed_latency(100);
5733     D0  : S0(2);
5734     MEM : S3(2);
5735 %}
5736 
5737 // The real do-nothing guy
5738 pipe_class empty()
5739 %{
5740     instruction_count(0);
5741 %}
5742 
5743 // Define the class for the Nop node
5744 define
5745 %{
5746    MachNop = empty;
5747 %}
5748 
5749 %}
5750 
5751 //----------INSTRUCTIONS-------------------------------------------------------
5752 //
5753 // match      -- States which machine-independent subtree may be replaced
5754 //               by this instruction.
5755 // ins_cost   -- The estimated cost of this instruction is used by instruction
5756 //               selection to identify a minimum cost tree of machine
5757 //               instructions that matches a tree of machine-independent
5758 //               instructions.
5759 // format     -- A string providing the disassembly for this instruction.
5760 //               The value of an instruction's operand may be inserted
5761 //               by referring to it with a '$' prefix.
5762 // opcode     -- Three instruction opcodes may be provided.  These are referred
5763 //               to within an encode class as $primary, $secondary, and $tertiary
5764 //               rrspectively.  The primary opcode is commonly used to
5765 //               indicate the type of machine instruction, while secondary
5766 //               and tertiary are often used for prefix options or addressing
5767 //               modes.
5768 // ins_encode -- A list of encode classes with parameters. The encode class
5769 //               name must have been defined in an 'enc_class' specification
5770 //               in the encode section of the architecture description.
5771 
5772 
5773 //----------Load/Store/Move Instructions---------------------------------------
5774 //----------Load Instructions--------------------------------------------------
5775 
5776 // Load Byte (8 bit signed)
5777 instruct loadB(rRegI dst, memory mem)
5778 %{
5779   match(Set dst (LoadB mem));
5780 
5781   ins_cost(125);
5782   format %{ "movsbl  $dst, $mem\t# byte" %}
5783 
5784   ins_encode %{
5785     __ movsbl($dst$$Register, $mem$$Address);
5786   %}
5787 
5788   ins_pipe(ialu_reg_mem);
5789 %}
5790 
5791 // Load Byte (8 bit signed) into Long Register
5792 instruct loadB2L(rRegL dst, memory mem)
5793 %{
5794   match(Set dst (ConvI2L (LoadB mem)));
5795 
5796   ins_cost(125);
5797   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5798 
5799   ins_encode %{
5800     __ movsbq($dst$$Register, $mem$$Address);
5801   %}
5802 
5803   ins_pipe(ialu_reg_mem);
5804 %}
5805 
5806 // Load Unsigned Byte (8 bit UNsigned)
5807 instruct loadUB(rRegI dst, memory mem)
5808 %{
5809   match(Set dst (LoadUB mem));
5810 
5811   ins_cost(125);
5812   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5813 
5814   ins_encode %{
5815     __ movzbl($dst$$Register, $mem$$Address);
5816   %}
5817 
5818   ins_pipe(ialu_reg_mem);
5819 %}
5820 
5821 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5822 instruct loadUB2L(rRegL dst, memory mem)
5823 %{
5824   match(Set dst (ConvI2L (LoadUB mem)));
5825 
5826   ins_cost(125);
5827   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5828 
5829   ins_encode %{
5830     __ movzbq($dst$$Register, $mem$$Address);
5831   %}
5832 
5833   ins_pipe(ialu_reg_mem);
5834 %}
5835 
5836 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5837 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5838   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5839   effect(KILL cr);
5840 
5841   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5842             "andl    $dst, $mask" %}
5843   ins_encode %{
5844     Register Rdst = $dst$$Register;
5845     __ movzbq(Rdst, $mem$$Address);
5846     __ andl(Rdst, $mask$$constant);
5847   %}
5848   ins_pipe(ialu_reg_mem);
5849 %}
5850 
5851 // Load Short (16 bit signed)
5852 instruct loadS(rRegI dst, memory mem)
5853 %{
5854   match(Set dst (LoadS mem));
5855 
5856   ins_cost(125);
5857   format %{ "movswl $dst, $mem\t# short" %}
5858 
5859   ins_encode %{
5860     __ movswl($dst$$Register, $mem$$Address);
5861   %}
5862 
5863   ins_pipe(ialu_reg_mem);
5864 %}
5865 
5866 // Load Short (16 bit signed) to Byte (8 bit signed)
5867 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5868   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5869 
5870   ins_cost(125);
5871   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5872   ins_encode %{
5873     __ movsbl($dst$$Register, $mem$$Address);
5874   %}
5875   ins_pipe(ialu_reg_mem);
5876 %}
5877 
5878 // Load Short (16 bit signed) into Long Register
5879 instruct loadS2L(rRegL dst, memory mem)
5880 %{
5881   match(Set dst (ConvI2L (LoadS mem)));
5882 
5883   ins_cost(125);
5884   format %{ "movswq $dst, $mem\t# short -> long" %}
5885 
5886   ins_encode %{
5887     __ movswq($dst$$Register, $mem$$Address);
5888   %}
5889 
5890   ins_pipe(ialu_reg_mem);
5891 %}
5892 
5893 // Load Unsigned Short/Char (16 bit UNsigned)
5894 instruct loadUS(rRegI dst, memory mem)
5895 %{
5896   match(Set dst (LoadUS mem));
5897 
5898   ins_cost(125);
5899   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5900 
5901   ins_encode %{
5902     __ movzwl($dst$$Register, $mem$$Address);
5903   %}
5904 
5905   ins_pipe(ialu_reg_mem);
5906 %}
5907 
5908 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5909 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5910   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5911 
5912   ins_cost(125);
5913   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5914   ins_encode %{
5915     __ movsbl($dst$$Register, $mem$$Address);
5916   %}
5917   ins_pipe(ialu_reg_mem);
5918 %}
5919 
5920 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5921 instruct loadUS2L(rRegL dst, memory mem)
5922 %{
5923   match(Set dst (ConvI2L (LoadUS mem)));
5924 
5925   ins_cost(125);
5926   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5927 
5928   ins_encode %{
5929     __ movzwq($dst$$Register, $mem$$Address);
5930   %}
5931 
5932   ins_pipe(ialu_reg_mem);
5933 %}
5934 
5935 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5936 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5937   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5938 
5939   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5940   ins_encode %{
5941     __ movzbq($dst$$Register, $mem$$Address);
5942   %}
5943   ins_pipe(ialu_reg_mem);
5944 %}
5945 
5946 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5947 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5948   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5949   effect(KILL cr);
5950 
5951   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5952             "andl    $dst, $mask" %}
5953   ins_encode %{
5954     Register Rdst = $dst$$Register;
5955     __ movzwq(Rdst, $mem$$Address);
5956     __ andl(Rdst, $mask$$constant);
5957   %}
5958   ins_pipe(ialu_reg_mem);
5959 %}
5960 
5961 // Load Integer
5962 instruct loadI(rRegI dst, memory mem)
5963 %{
5964   match(Set dst (LoadI mem));
5965 
5966   ins_cost(125);
5967   format %{ "movl    $dst, $mem\t# int" %}
5968 
5969   ins_encode %{
5970     __ movl($dst$$Register, $mem$$Address);
5971   %}
5972 
5973   ins_pipe(ialu_reg_mem);
5974 %}
5975 
5976 // Load Integer (32 bit signed) to Byte (8 bit signed)
5977 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5978   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5979 
5980   ins_cost(125);
5981   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5982   ins_encode %{
5983     __ movsbl($dst$$Register, $mem$$Address);
5984   %}
5985   ins_pipe(ialu_reg_mem);
5986 %}
5987 
5988 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5989 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5990   match(Set dst (AndI (LoadI mem) mask));
5991 
5992   ins_cost(125);
5993   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5994   ins_encode %{
5995     __ movzbl($dst$$Register, $mem$$Address);
5996   %}
5997   ins_pipe(ialu_reg_mem);
5998 %}
5999 
6000 // Load Integer (32 bit signed) to Short (16 bit signed)
6001 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6002   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6003 
6004   ins_cost(125);
6005   format %{ "movswl  $dst, $mem\t# int -> short" %}
6006   ins_encode %{
6007     __ movswl($dst$$Register, $mem$$Address);
6008   %}
6009   ins_pipe(ialu_reg_mem);
6010 %}
6011 
6012 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6013 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6014   match(Set dst (AndI (LoadI mem) mask));
6015 
6016   ins_cost(125);
6017   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6018   ins_encode %{
6019     __ movzwl($dst$$Register, $mem$$Address);
6020   %}
6021   ins_pipe(ialu_reg_mem);
6022 %}
6023 
6024 // Load Integer into Long Register
6025 instruct loadI2L(rRegL dst, memory mem)
6026 %{
6027   match(Set dst (ConvI2L (LoadI mem)));
6028 
6029   ins_cost(125);
6030   format %{ "movslq  $dst, $mem\t# int -> long" %}
6031 
6032   ins_encode %{
6033     __ movslq($dst$$Register, $mem$$Address);
6034   %}
6035 
6036   ins_pipe(ialu_reg_mem);
6037 %}
6038 
6039 // Load Integer with mask 0xFF into Long Register
6040 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6041   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6042 
6043   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6044   ins_encode %{
6045     __ movzbq($dst$$Register, $mem$$Address);
6046   %}
6047   ins_pipe(ialu_reg_mem);
6048 %}
6049 
6050 // Load Integer with mask 0xFFFF into Long Register
6051 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6052   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6053 
6054   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6055   ins_encode %{
6056     __ movzwq($dst$$Register, $mem$$Address);
6057   %}
6058   ins_pipe(ialu_reg_mem);
6059 %}
6060 
6061 // Load Integer with a 32-bit mask into Long Register
6062 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6063   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6064   effect(KILL cr);
6065 
6066   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6067             "andl    $dst, $mask" %}
6068   ins_encode %{
6069     Register Rdst = $dst$$Register;
6070     __ movl(Rdst, $mem$$Address);
6071     __ andl(Rdst, $mask$$constant);
6072   %}
6073   ins_pipe(ialu_reg_mem);
6074 %}
6075 
6076 // Load Unsigned Integer into Long Register
6077 instruct loadUI2L(rRegL dst, memory mem)
6078 %{
6079   match(Set dst (LoadUI2L mem));
6080 
6081   ins_cost(125);
6082   format %{ "movl    $dst, $mem\t# uint -> long" %}
6083 
6084   ins_encode %{
6085     __ movl($dst$$Register, $mem$$Address);
6086   %}
6087 
6088   ins_pipe(ialu_reg_mem);
6089 %}
6090 
6091 // Load Long
6092 instruct loadL(rRegL dst, memory mem)
6093 %{
6094   match(Set dst (LoadL mem));
6095 
6096   ins_cost(125);
6097   format %{ "movq    $dst, $mem\t# long" %}
6098 
6099   ins_encode %{
6100     __ movq($dst$$Register, $mem$$Address);
6101   %}
6102 
6103   ins_pipe(ialu_reg_mem); // XXX
6104 %}
6105 
6106 // Load Range
6107 instruct loadRange(rRegI dst, memory mem)
6108 %{
6109   match(Set dst (LoadRange mem));
6110 
6111   ins_cost(125); // XXX
6112   format %{ "movl    $dst, $mem\t# range" %}
6113   opcode(0x8B);
6114   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6115   ins_pipe(ialu_reg_mem);
6116 %}
6117 
6118 // Load Pointer
6119 instruct loadP(rRegP dst, memory mem)
6120 %{
6121   match(Set dst (LoadP mem));
6122 
6123   ins_cost(125); // XXX
6124   format %{ "movq    $dst, $mem\t# ptr" %}
6125   opcode(0x8B);
6126   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6127   ins_pipe(ialu_reg_mem); // XXX
6128 %}
6129 
6130 // Load Compressed Pointer
6131 instruct loadN(rRegN dst, memory mem)
6132 %{
6133    match(Set dst (LoadN mem));
6134 
6135    ins_cost(125); // XXX
6136    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6137    ins_encode %{
6138      __ movl($dst$$Register, $mem$$Address);
6139    %}
6140    ins_pipe(ialu_reg_mem); // XXX
6141 %}
6142 
6143 
6144 // Load Klass Pointer
6145 instruct loadKlass(rRegP dst, memory mem)
6146 %{
6147   match(Set dst (LoadKlass mem));
6148 
6149   ins_cost(125); // XXX
6150   format %{ "movq    $dst, $mem\t# class" %}
6151   opcode(0x8B);
6152   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6153   ins_pipe(ialu_reg_mem); // XXX
6154 %}
6155 
6156 // Load narrow Klass Pointer
6157 instruct loadNKlass(rRegN dst, memory mem)
6158 %{
6159   match(Set dst (LoadNKlass mem));
6160 
6161   ins_cost(125); // XXX
6162   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6163   ins_encode %{
6164     __ movl($dst$$Register, $mem$$Address);
6165   %}
6166   ins_pipe(ialu_reg_mem); // XXX
6167 %}
6168 
6169 // Load Float
6170 instruct loadF(regF dst, memory mem)
6171 %{
6172   match(Set dst (LoadF mem));
6173 
6174   ins_cost(145); // XXX
6175   format %{ "movss   $dst, $mem\t# float" %}
6176   opcode(0xF3, 0x0F, 0x10);
6177   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6178   ins_pipe(pipe_slow); // XXX
6179 %}
6180 
6181 // Load Double
6182 instruct loadD_partial(regD dst, memory mem)
6183 %{
6184   predicate(!UseXmmLoadAndClearUpper);
6185   match(Set dst (LoadD mem));
6186 
6187   ins_cost(145); // XXX
6188   format %{ "movlpd  $dst, $mem\t# double" %}
6189   opcode(0x66, 0x0F, 0x12);
6190   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6191   ins_pipe(pipe_slow); // XXX
6192 %}
6193 
6194 instruct loadD(regD dst, memory mem)
6195 %{
6196   predicate(UseXmmLoadAndClearUpper);
6197   match(Set dst (LoadD mem));
6198 
6199   ins_cost(145); // XXX
6200   format %{ "movsd   $dst, $mem\t# double" %}
6201   opcode(0xF2, 0x0F, 0x10);
6202   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6203   ins_pipe(pipe_slow); // XXX
6204 %}
6205 
6206 // Load Aligned Packed Byte to XMM register
6207 instruct loadA8B(regD dst, memory mem) %{
6208   match(Set dst (Load8B mem));
6209   ins_cost(125);
6210   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6211   ins_encode( movq_ld(dst, mem));
6212   ins_pipe( pipe_slow );
6213 %}
6214 
6215 // Load Aligned Packed Short to XMM register
6216 instruct loadA4S(regD dst, memory mem) %{
6217   match(Set dst (Load4S mem));
6218   ins_cost(125);
6219   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6220   ins_encode( movq_ld(dst, mem));
6221   ins_pipe( pipe_slow );
6222 %}
6223 
6224 // Load Aligned Packed Char to XMM register
6225 instruct loadA4C(regD dst, memory mem) %{
6226   match(Set dst (Load4C mem));
6227   ins_cost(125);
6228   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6229   ins_encode( movq_ld(dst, mem));
6230   ins_pipe( pipe_slow );
6231 %}
6232 
6233 // Load Aligned Packed Integer to XMM register
6234 instruct load2IU(regD dst, memory mem) %{
6235   match(Set dst (Load2I mem));
6236   ins_cost(125);
6237   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6238   ins_encode( movq_ld(dst, mem));
6239   ins_pipe( pipe_slow );
6240 %}
6241 
6242 // Load Aligned Packed Single to XMM
6243 instruct loadA2F(regD dst, memory mem) %{
6244   match(Set dst (Load2F mem));
6245   ins_cost(145);
6246   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6247   ins_encode( movq_ld(dst, mem));
6248   ins_pipe( pipe_slow );
6249 %}
6250 
6251 // Load Effective Address
6252 instruct leaP8(rRegP dst, indOffset8 mem)
6253 %{
6254   match(Set dst mem);
6255 
6256   ins_cost(110); // XXX
6257   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6258   opcode(0x8D);
6259   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6260   ins_pipe(ialu_reg_reg_fat);
6261 %}
6262 
6263 instruct leaP32(rRegP dst, indOffset32 mem)
6264 %{
6265   match(Set dst mem);
6266 
6267   ins_cost(110);
6268   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6269   opcode(0x8D);
6270   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6271   ins_pipe(ialu_reg_reg_fat);
6272 %}
6273 
6274 // instruct leaPIdx(rRegP dst, indIndex mem)
6275 // %{
6276 //   match(Set dst mem);
6277 
6278 //   ins_cost(110);
6279 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6280 //   opcode(0x8D);
6281 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6282 //   ins_pipe(ialu_reg_reg_fat);
6283 // %}
6284 
6285 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6286 %{
6287   match(Set dst mem);
6288 
6289   ins_cost(110);
6290   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6291   opcode(0x8D);
6292   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6293   ins_pipe(ialu_reg_reg_fat);
6294 %}
6295 
6296 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6297 %{
6298   match(Set dst mem);
6299 
6300   ins_cost(110);
6301   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6302   opcode(0x8D);
6303   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6304   ins_pipe(ialu_reg_reg_fat);
6305 %}
6306 
6307 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6308 %{
6309   match(Set dst mem);
6310 
6311   ins_cost(110);
6312   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6313   opcode(0x8D);
6314   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6315   ins_pipe(ialu_reg_reg_fat);
6316 %}
6317 
6318 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6319 %{
6320   match(Set dst mem);
6321 
6322   ins_cost(110);
6323   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6324   opcode(0x8D);
6325   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6326   ins_pipe(ialu_reg_reg_fat);
6327 %}
6328 
6329 // Load Effective Address which uses Narrow (32-bits) oop
6330 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6331 %{
6332   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6333   match(Set dst mem);
6334 
6335   ins_cost(110);
6336   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6337   opcode(0x8D);
6338   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6339   ins_pipe(ialu_reg_reg_fat);
6340 %}
6341 
6342 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6343 %{
6344   predicate(Universe::narrow_oop_shift() == 0);
6345   match(Set dst mem);
6346 
6347   ins_cost(110); // XXX
6348   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6349   opcode(0x8D);
6350   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6351   ins_pipe(ialu_reg_reg_fat);
6352 %}
6353 
6354 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6355 %{
6356   predicate(Universe::narrow_oop_shift() == 0);
6357   match(Set dst mem);
6358 
6359   ins_cost(110);
6360   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6361   opcode(0x8D);
6362   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6363   ins_pipe(ialu_reg_reg_fat);
6364 %}
6365 
6366 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6367 %{
6368   predicate(Universe::narrow_oop_shift() == 0);
6369   match(Set dst mem);
6370 
6371   ins_cost(110);
6372   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6373   opcode(0x8D);
6374   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6375   ins_pipe(ialu_reg_reg_fat);
6376 %}
6377 
6378 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6379 %{
6380   predicate(Universe::narrow_oop_shift() == 0);
6381   match(Set dst mem);
6382 
6383   ins_cost(110);
6384   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6385   opcode(0x8D);
6386   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6387   ins_pipe(ialu_reg_reg_fat);
6388 %}
6389 
6390 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6391 %{
6392   predicate(Universe::narrow_oop_shift() == 0);
6393   match(Set dst mem);
6394 
6395   ins_cost(110);
6396   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6397   opcode(0x8D);
6398   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6399   ins_pipe(ialu_reg_reg_fat);
6400 %}
6401 
6402 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6403 %{
6404   predicate(Universe::narrow_oop_shift() == 0);
6405   match(Set dst mem);
6406 
6407   ins_cost(110);
6408   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6409   opcode(0x8D);
6410   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6411   ins_pipe(ialu_reg_reg_fat);
6412 %}
6413 
6414 instruct loadConI(rRegI dst, immI src)
6415 %{
6416   match(Set dst src);
6417 
6418   format %{ "movl    $dst, $src\t# int" %}
6419   ins_encode(load_immI(dst, src));
6420   ins_pipe(ialu_reg_fat); // XXX
6421 %}
6422 
6423 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6424 %{
6425   match(Set dst src);
6426   effect(KILL cr);
6427 
6428   ins_cost(50);
6429   format %{ "xorl    $dst, $dst\t# int" %}
6430   opcode(0x33); /* + rd */
6431   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6432   ins_pipe(ialu_reg);
6433 %}
6434 
6435 instruct loadConL(rRegL dst, immL src)
6436 %{
6437   match(Set dst src);
6438 
6439   ins_cost(150);
6440   format %{ "movq    $dst, $src\t# long" %}
6441   ins_encode(load_immL(dst, src));
6442   ins_pipe(ialu_reg);
6443 %}
6444 
6445 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6446 %{
6447   match(Set dst src);
6448   effect(KILL cr);
6449 
6450   ins_cost(50);
6451   format %{ "xorl    $dst, $dst\t# long" %}
6452   opcode(0x33); /* + rd */
6453   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6454   ins_pipe(ialu_reg); // XXX
6455 %}
6456 
6457 instruct loadConUL32(rRegL dst, immUL32 src)
6458 %{
6459   match(Set dst src);
6460 
6461   ins_cost(60);
6462   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6463   ins_encode(load_immUL32(dst, src));
6464   ins_pipe(ialu_reg);
6465 %}
6466 
6467 instruct loadConL32(rRegL dst, immL32 src)
6468 %{
6469   match(Set dst src);
6470 
6471   ins_cost(70);
6472   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6473   ins_encode(load_immL32(dst, src));
6474   ins_pipe(ialu_reg);
6475 %}
6476 
6477 instruct loadConP(rRegP dst, immP con) %{
6478   match(Set dst con);
6479 
6480   format %{ "movq    $dst, $con\t# ptr" %}
6481   ins_encode(load_immP(dst, con));
6482   ins_pipe(ialu_reg_fat); // XXX
6483 %}
6484 
6485 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6486 %{
6487   match(Set dst src);
6488   effect(KILL cr);
6489 
6490   ins_cost(50);
6491   format %{ "xorl    $dst, $dst\t# ptr" %}
6492   opcode(0x33); /* + rd */
6493   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6494   ins_pipe(ialu_reg);
6495 %}
6496 
6497 instruct loadConP_poll(rRegP dst, immP_poll src) %{
6498   match(Set dst src);
6499   format %{ "movq    $dst, $src\t!ptr" %}
6500   ins_encode %{
6501     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
6502     __ lea($dst$$Register, polling_page);
6503   %}
6504   ins_pipe(ialu_reg_fat);
6505 %}
6506 
6507 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6508 %{
6509   match(Set dst src);
6510   effect(KILL cr);
6511 
6512   ins_cost(60);
6513   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6514   ins_encode(load_immP31(dst, src));
6515   ins_pipe(ialu_reg);
6516 %}
6517 
6518 instruct loadConF(regF dst, immF con) %{
6519   match(Set dst con);
6520   ins_cost(125);
6521   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6522   ins_encode %{
6523     __ movflt($dst$$XMMRegister, $constantaddress($con));
6524   %}
6525   ins_pipe(pipe_slow);
6526 %}
6527 
6528 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6529   match(Set dst src);
6530   effect(KILL cr);
6531   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6532   ins_encode %{
6533     __ xorq($dst$$Register, $dst$$Register);
6534   %}
6535   ins_pipe(ialu_reg);
6536 %}
6537 
6538 instruct loadConN(rRegN dst, immN src) %{
6539   match(Set dst src);
6540 
6541   ins_cost(125);
6542   format %{ "movl    $dst, $src\t# compressed ptr" %}
6543   ins_encode %{
6544     address con = (address)$src$$constant;
6545     if (con == NULL) {
6546       ShouldNotReachHere();
6547     } else {
6548       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6549     }
6550   %}
6551   ins_pipe(ialu_reg_fat); // XXX
6552 %}
6553 
6554 instruct loadConF0(regF dst, immF0 src)
6555 %{
6556   match(Set dst src);
6557   ins_cost(100);
6558 
6559   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6560   opcode(0x0F, 0x57);
6561   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6562   ins_pipe(pipe_slow);
6563 %}
6564 
6565 // Use the same format since predicate() can not be used here.
6566 instruct loadConD(regD dst, immD con) %{
6567   match(Set dst con);
6568   ins_cost(125);
6569   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6570   ins_encode %{
6571     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6572   %}
6573   ins_pipe(pipe_slow);
6574 %}
6575 
6576 instruct loadConD0(regD dst, immD0 src)
6577 %{
6578   match(Set dst src);
6579   ins_cost(100);
6580 
6581   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6582   opcode(0x66, 0x0F, 0x57);
6583   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6584   ins_pipe(pipe_slow);
6585 %}
6586 
6587 instruct loadSSI(rRegI dst, stackSlotI src)
6588 %{
6589   match(Set dst src);
6590 
6591   ins_cost(125);
6592   format %{ "movl    $dst, $src\t# int stk" %}
6593   opcode(0x8B);
6594   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6595   ins_pipe(ialu_reg_mem);
6596 %}
6597 
6598 instruct loadSSL(rRegL dst, stackSlotL src)
6599 %{
6600   match(Set dst src);
6601 
6602   ins_cost(125);
6603   format %{ "movq    $dst, $src\t# long stk" %}
6604   opcode(0x8B);
6605   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6606   ins_pipe(ialu_reg_mem);
6607 %}
6608 
6609 instruct loadSSP(rRegP dst, stackSlotP src)
6610 %{
6611   match(Set dst src);
6612 
6613   ins_cost(125);
6614   format %{ "movq    $dst, $src\t# ptr stk" %}
6615   opcode(0x8B);
6616   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6617   ins_pipe(ialu_reg_mem);
6618 %}
6619 
6620 instruct loadSSF(regF dst, stackSlotF src)
6621 %{
6622   match(Set dst src);
6623 
6624   ins_cost(125);
6625   format %{ "movss   $dst, $src\t# float stk" %}
6626   opcode(0xF3, 0x0F, 0x10);
6627   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6628   ins_pipe(pipe_slow); // XXX
6629 %}
6630 
6631 // Use the same format since predicate() can not be used here.
6632 instruct loadSSD(regD dst, stackSlotD src)
6633 %{
6634   match(Set dst src);
6635 
6636   ins_cost(125);
6637   format %{ "movsd   $dst, $src\t# double stk" %}
6638   ins_encode  %{
6639     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6640   %}
6641   ins_pipe(pipe_slow); // XXX
6642 %}
6643 
6644 // Prefetch instructions.
6645 // Must be safe to execute with invalid address (cannot fault).
6646 
6647 instruct prefetchr( memory mem ) %{
6648   predicate(ReadPrefetchInstr==3);
6649   match(PrefetchRead mem);
6650   ins_cost(125);
6651 
6652   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6653   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6654   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6655   ins_pipe(ialu_mem);
6656 %}
6657 
6658 instruct prefetchrNTA( memory mem ) %{
6659   predicate(ReadPrefetchInstr==0);
6660   match(PrefetchRead mem);
6661   ins_cost(125);
6662 
6663   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6664   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6665   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6666   ins_pipe(ialu_mem);
6667 %}
6668 
6669 instruct prefetchrT0( memory mem ) %{
6670   predicate(ReadPrefetchInstr==1);
6671   match(PrefetchRead mem);
6672   ins_cost(125);
6673 
6674   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6675   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6676   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6677   ins_pipe(ialu_mem);
6678 %}
6679 
6680 instruct prefetchrT2( memory mem ) %{
6681   predicate(ReadPrefetchInstr==2);
6682   match(PrefetchRead mem);
6683   ins_cost(125);
6684 
6685   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6686   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6687   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6688   ins_pipe(ialu_mem);
6689 %}
6690 
6691 instruct prefetchw( memory mem ) %{
6692   predicate(AllocatePrefetchInstr==3);
6693   match(PrefetchWrite mem);
6694   ins_cost(125);
6695 
6696   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6697   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6698   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6699   ins_pipe(ialu_mem);
6700 %}
6701 
6702 instruct prefetchwNTA( memory mem ) %{
6703   predicate(AllocatePrefetchInstr==0);
6704   match(PrefetchWrite mem);
6705   ins_cost(125);
6706 
6707   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6708   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6709   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6710   ins_pipe(ialu_mem);
6711 %}
6712 
6713 instruct prefetchwT0( memory mem ) %{
6714   predicate(AllocatePrefetchInstr==1);
6715   match(PrefetchWrite mem);
6716   ins_cost(125);
6717 
6718   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6719   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6720   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6721   ins_pipe(ialu_mem);
6722 %}
6723 
6724 instruct prefetchwT2( memory mem ) %{
6725   predicate(AllocatePrefetchInstr==2);
6726   match(PrefetchWrite mem);
6727   ins_cost(125);
6728 
6729   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6730   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6731   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6732   ins_pipe(ialu_mem);
6733 %}
6734 
6735 //----------Store Instructions-------------------------------------------------
6736 
6737 // Store Byte
6738 instruct storeB(memory mem, rRegI src)
6739 %{
6740   match(Set mem (StoreB mem src));
6741 
6742   ins_cost(125); // XXX
6743   format %{ "movb    $mem, $src\t# byte" %}
6744   opcode(0x88);
6745   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6746   ins_pipe(ialu_mem_reg);
6747 %}
6748 
6749 // Store Char/Short
6750 instruct storeC(memory mem, rRegI src)
6751 %{
6752   match(Set mem (StoreC mem src));
6753 
6754   ins_cost(125); // XXX
6755   format %{ "movw    $mem, $src\t# char/short" %}
6756   opcode(0x89);
6757   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6758   ins_pipe(ialu_mem_reg);
6759 %}
6760 
6761 // Store Integer
6762 instruct storeI(memory mem, rRegI src)
6763 %{
6764   match(Set mem (StoreI mem src));
6765 
6766   ins_cost(125); // XXX
6767   format %{ "movl    $mem, $src\t# int" %}
6768   opcode(0x89);
6769   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6770   ins_pipe(ialu_mem_reg);
6771 %}
6772 
6773 // Store Long
6774 instruct storeL(memory mem, rRegL src)
6775 %{
6776   match(Set mem (StoreL mem src));
6777 
6778   ins_cost(125); // XXX
6779   format %{ "movq    $mem, $src\t# long" %}
6780   opcode(0x89);
6781   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6782   ins_pipe(ialu_mem_reg); // XXX
6783 %}
6784 
6785 // Store Pointer
6786 instruct storeP(memory mem, any_RegP src)
6787 %{
6788   match(Set mem (StoreP mem src));
6789 
6790   ins_cost(125); // XXX
6791   format %{ "movq    $mem, $src\t# ptr" %}
6792   opcode(0x89);
6793   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6794   ins_pipe(ialu_mem_reg);
6795 %}
6796 
6797 instruct storeImmP0(memory mem, immP0 zero)
6798 %{
6799   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6800   match(Set mem (StoreP mem zero));
6801 
6802   ins_cost(125); // XXX
6803   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6804   ins_encode %{
6805     __ movq($mem$$Address, r12);
6806   %}
6807   ins_pipe(ialu_mem_reg);
6808 %}
6809 
6810 // Store NULL Pointer, mark word, or other simple pointer constant.
6811 instruct storeImmP(memory mem, immP31 src)
6812 %{
6813   match(Set mem (StoreP mem src));
6814 
6815   ins_cost(150); // XXX
6816   format %{ "movq    $mem, $src\t# ptr" %}
6817   opcode(0xC7); /* C7 /0 */
6818   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6819   ins_pipe(ialu_mem_imm);
6820 %}
6821 
6822 // Store Compressed Pointer
6823 instruct storeN(memory mem, rRegN src)
6824 %{
6825   match(Set mem (StoreN mem src));
6826 
6827   ins_cost(125); // XXX
6828   format %{ "movl    $mem, $src\t# compressed ptr" %}
6829   ins_encode %{
6830     __ movl($mem$$Address, $src$$Register);
6831   %}
6832   ins_pipe(ialu_mem_reg);
6833 %}
6834 
6835 instruct storeImmN0(memory mem, immN0 zero)
6836 %{
6837   predicate(Universe::narrow_oop_base() == NULL);
6838   match(Set mem (StoreN mem zero));
6839 
6840   ins_cost(125); // XXX
6841   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6842   ins_encode %{
6843     __ movl($mem$$Address, r12);
6844   %}
6845   ins_pipe(ialu_mem_reg);
6846 %}
6847 
6848 instruct storeImmN(memory mem, immN src)
6849 %{
6850   match(Set mem (StoreN mem src));
6851 
6852   ins_cost(150); // XXX
6853   format %{ "movl    $mem, $src\t# compressed ptr" %}
6854   ins_encode %{
6855     address con = (address)$src$$constant;
6856     if (con == NULL) {
6857       __ movl($mem$$Address, (int32_t)0);
6858     } else {
6859       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6860     }
6861   %}
6862   ins_pipe(ialu_mem_imm);
6863 %}
6864 
6865 // Store Integer Immediate
6866 instruct storeImmI0(memory mem, immI0 zero)
6867 %{
6868   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6869   match(Set mem (StoreI mem zero));
6870 
6871   ins_cost(125); // XXX
6872   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6873   ins_encode %{
6874     __ movl($mem$$Address, r12);
6875   %}
6876   ins_pipe(ialu_mem_reg);
6877 %}
6878 
6879 instruct storeImmI(memory mem, immI src)
6880 %{
6881   match(Set mem (StoreI mem src));
6882 
6883   ins_cost(150);
6884   format %{ "movl    $mem, $src\t# int" %}
6885   opcode(0xC7); /* C7 /0 */
6886   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6887   ins_pipe(ialu_mem_imm);
6888 %}
6889 
6890 // Store Long Immediate
6891 instruct storeImmL0(memory mem, immL0 zero)
6892 %{
6893   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6894   match(Set mem (StoreL mem zero));
6895 
6896   ins_cost(125); // XXX
6897   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6898   ins_encode %{
6899     __ movq($mem$$Address, r12);
6900   %}
6901   ins_pipe(ialu_mem_reg);
6902 %}
6903 
6904 instruct storeImmL(memory mem, immL32 src)
6905 %{
6906   match(Set mem (StoreL mem src));
6907 
6908   ins_cost(150);
6909   format %{ "movq    $mem, $src\t# long" %}
6910   opcode(0xC7); /* C7 /0 */
6911   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6912   ins_pipe(ialu_mem_imm);
6913 %}
6914 
6915 // Store Short/Char Immediate
6916 instruct storeImmC0(memory mem, immI0 zero)
6917 %{
6918   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6919   match(Set mem (StoreC mem zero));
6920 
6921   ins_cost(125); // XXX
6922   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6923   ins_encode %{
6924     __ movw($mem$$Address, r12);
6925   %}
6926   ins_pipe(ialu_mem_reg);
6927 %}
6928 
6929 instruct storeImmI16(memory mem, immI16 src)
6930 %{
6931   predicate(UseStoreImmI16);
6932   match(Set mem (StoreC mem src));
6933 
6934   ins_cost(150);
6935   format %{ "movw    $mem, $src\t# short/char" %}
6936   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6937   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6938   ins_pipe(ialu_mem_imm);
6939 %}
6940 
6941 // Store Byte Immediate
6942 instruct storeImmB0(memory mem, immI0 zero)
6943 %{
6944   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6945   match(Set mem (StoreB mem zero));
6946 
6947   ins_cost(125); // XXX
6948   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6949   ins_encode %{
6950     __ movb($mem$$Address, r12);
6951   %}
6952   ins_pipe(ialu_mem_reg);
6953 %}
6954 
6955 instruct storeImmB(memory mem, immI8 src)
6956 %{
6957   match(Set mem (StoreB mem src));
6958 
6959   ins_cost(150); // XXX
6960   format %{ "movb    $mem, $src\t# byte" %}
6961   opcode(0xC6); /* C6 /0 */
6962   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6963   ins_pipe(ialu_mem_imm);
6964 %}
6965 
6966 // Store Aligned Packed Byte XMM register to memory
6967 instruct storeA8B(memory mem, regD src) %{
6968   match(Set mem (Store8B mem src));
6969   ins_cost(145);
6970   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6971   ins_encode( movq_st(mem, src));
6972   ins_pipe( pipe_slow );
6973 %}
6974 
6975 // Store Aligned Packed Char/Short XMM register to memory
6976 instruct storeA4C(memory mem, regD src) %{
6977   match(Set mem (Store4C mem src));
6978   ins_cost(145);
6979   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6980   ins_encode( movq_st(mem, src));
6981   ins_pipe( pipe_slow );
6982 %}
6983 
6984 // Store Aligned Packed Integer XMM register to memory
6985 instruct storeA2I(memory mem, regD src) %{
6986   match(Set mem (Store2I mem src));
6987   ins_cost(145);
6988   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6989   ins_encode( movq_st(mem, src));
6990   ins_pipe( pipe_slow );
6991 %}
6992 
6993 // Store CMS card-mark Immediate
6994 instruct storeImmCM0_reg(memory mem, immI0 zero)
6995 %{
6996   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6997   match(Set mem (StoreCM mem zero));
6998 
6999   ins_cost(125); // XXX
7000   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7001   ins_encode %{
7002     __ movb($mem$$Address, r12);
7003   %}
7004   ins_pipe(ialu_mem_reg);
7005 %}
7006 
7007 instruct storeImmCM0(memory mem, immI0 src)
7008 %{
7009   match(Set mem (StoreCM mem src));
7010 
7011   ins_cost(150); // XXX
7012   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7013   opcode(0xC6); /* C6 /0 */
7014   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7015   ins_pipe(ialu_mem_imm);
7016 %}
7017 
7018 // Store Aligned Packed Single Float XMM register to memory
7019 instruct storeA2F(memory mem, regD src) %{
7020   match(Set mem (Store2F mem src));
7021   ins_cost(145);
7022   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7023   ins_encode( movq_st(mem, src));
7024   ins_pipe( pipe_slow );
7025 %}
7026 
7027 // Store Float
7028 instruct storeF(memory mem, regF src)
7029 %{
7030   match(Set mem (StoreF mem src));
7031 
7032   ins_cost(95); // XXX
7033   format %{ "movss   $mem, $src\t# float" %}
7034   opcode(0xF3, 0x0F, 0x11);
7035   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7036   ins_pipe(pipe_slow); // XXX
7037 %}
7038 
7039 // Store immediate Float value (it is faster than store from XMM register)
7040 instruct storeF0(memory mem, immF0 zero)
7041 %{
7042   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7043   match(Set mem (StoreF mem zero));
7044 
7045   ins_cost(25); // XXX
7046   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7047   ins_encode %{
7048     __ movl($mem$$Address, r12);
7049   %}
7050   ins_pipe(ialu_mem_reg);
7051 %}
7052 
7053 instruct storeF_imm(memory mem, immF src)
7054 %{
7055   match(Set mem (StoreF mem src));
7056 
7057   ins_cost(50);
7058   format %{ "movl    $mem, $src\t# float" %}
7059   opcode(0xC7); /* C7 /0 */
7060   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7061   ins_pipe(ialu_mem_imm);
7062 %}
7063 
7064 // Store Double
7065 instruct storeD(memory mem, regD src)
7066 %{
7067   match(Set mem (StoreD mem src));
7068 
7069   ins_cost(95); // XXX
7070   format %{ "movsd   $mem, $src\t# double" %}
7071   opcode(0xF2, 0x0F, 0x11);
7072   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7073   ins_pipe(pipe_slow); // XXX
7074 %}
7075 
7076 // Store immediate double 0.0 (it is faster than store from XMM register)
7077 instruct storeD0_imm(memory mem, immD0 src)
7078 %{
7079   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7080   match(Set mem (StoreD mem src));
7081 
7082   ins_cost(50);
7083   format %{ "movq    $mem, $src\t# double 0." %}
7084   opcode(0xC7); /* C7 /0 */
7085   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7086   ins_pipe(ialu_mem_imm);
7087 %}
7088 
7089 instruct storeD0(memory mem, immD0 zero)
7090 %{
7091   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7092   match(Set mem (StoreD mem zero));
7093 
7094   ins_cost(25); // XXX
7095   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7096   ins_encode %{
7097     __ movq($mem$$Address, r12);
7098   %}
7099   ins_pipe(ialu_mem_reg);
7100 %}
7101 
7102 instruct storeSSI(stackSlotI dst, rRegI src)
7103 %{
7104   match(Set dst src);
7105 
7106   ins_cost(100);
7107   format %{ "movl    $dst, $src\t# int stk" %}
7108   opcode(0x89);
7109   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7110   ins_pipe( ialu_mem_reg );
7111 %}
7112 
7113 instruct storeSSL(stackSlotL dst, rRegL src)
7114 %{
7115   match(Set dst src);
7116 
7117   ins_cost(100);
7118   format %{ "movq    $dst, $src\t# long stk" %}
7119   opcode(0x89);
7120   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7121   ins_pipe(ialu_mem_reg);
7122 %}
7123 
7124 instruct storeSSP(stackSlotP dst, rRegP src)
7125 %{
7126   match(Set dst src);
7127 
7128   ins_cost(100);
7129   format %{ "movq    $dst, $src\t# ptr stk" %}
7130   opcode(0x89);
7131   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7132   ins_pipe(ialu_mem_reg);
7133 %}
7134 
7135 instruct storeSSF(stackSlotF dst, regF src)
7136 %{
7137   match(Set dst src);
7138 
7139   ins_cost(95); // XXX
7140   format %{ "movss   $dst, $src\t# float stk" %}
7141   opcode(0xF3, 0x0F, 0x11);
7142   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7143   ins_pipe(pipe_slow); // XXX
7144 %}
7145 
7146 instruct storeSSD(stackSlotD dst, regD src)
7147 %{
7148   match(Set dst src);
7149 
7150   ins_cost(95); // XXX
7151   format %{ "movsd   $dst, $src\t# double stk" %}
7152   opcode(0xF2, 0x0F, 0x11);
7153   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7154   ins_pipe(pipe_slow); // XXX
7155 %}
7156 
7157 //----------BSWAP Instructions-------------------------------------------------
7158 instruct bytes_reverse_int(rRegI dst) %{
7159   match(Set dst (ReverseBytesI dst));
7160 
7161   format %{ "bswapl  $dst" %}
7162   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7163   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7164   ins_pipe( ialu_reg );
7165 %}
7166 
7167 instruct bytes_reverse_long(rRegL dst) %{
7168   match(Set dst (ReverseBytesL dst));
7169 
7170   format %{ "bswapq  $dst" %}
7171 
7172   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7173   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7174   ins_pipe( ialu_reg);
7175 %}
7176 
7177 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7178   match(Set dst (ReverseBytesUS dst));
7179 
7180   format %{ "bswapl  $dst\n\t"
7181             "shrl    $dst,16\n\t" %}
7182   ins_encode %{
7183     __ bswapl($dst$$Register);
7184     __ shrl($dst$$Register, 16);
7185   %}
7186   ins_pipe( ialu_reg );
7187 %}
7188 
7189 instruct bytes_reverse_short(rRegI dst) %{
7190   match(Set dst (ReverseBytesS dst));
7191 
7192   format %{ "bswapl  $dst\n\t"
7193             "sar     $dst,16\n\t" %}
7194   ins_encode %{
7195     __ bswapl($dst$$Register);
7196     __ sarl($dst$$Register, 16);
7197   %}
7198   ins_pipe( ialu_reg );
7199 %}
7200 
7201 //---------- Zeros Count Instructions ------------------------------------------
7202 
7203 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7204   predicate(UseCountLeadingZerosInstruction);
7205   match(Set dst (CountLeadingZerosI src));
7206   effect(KILL cr);
7207 
7208   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7209   ins_encode %{
7210     __ lzcntl($dst$$Register, $src$$Register);
7211   %}
7212   ins_pipe(ialu_reg);
7213 %}
7214 
7215 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7216   predicate(!UseCountLeadingZerosInstruction);
7217   match(Set dst (CountLeadingZerosI src));
7218   effect(KILL cr);
7219 
7220   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7221             "jnz     skip\n\t"
7222             "movl    $dst, -1\n"
7223       "skip:\n\t"
7224             "negl    $dst\n\t"
7225             "addl    $dst, 31" %}
7226   ins_encode %{
7227     Register Rdst = $dst$$Register;
7228     Register Rsrc = $src$$Register;
7229     Label skip;
7230     __ bsrl(Rdst, Rsrc);
7231     __ jccb(Assembler::notZero, skip);
7232     __ movl(Rdst, -1);
7233     __ bind(skip);
7234     __ negl(Rdst);
7235     __ addl(Rdst, BitsPerInt - 1);
7236   %}
7237   ins_pipe(ialu_reg);
7238 %}
7239 
7240 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7241   predicate(UseCountLeadingZerosInstruction);
7242   match(Set dst (CountLeadingZerosL src));
7243   effect(KILL cr);
7244 
7245   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7246   ins_encode %{
7247     __ lzcntq($dst$$Register, $src$$Register);
7248   %}
7249   ins_pipe(ialu_reg);
7250 %}
7251 
7252 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7253   predicate(!UseCountLeadingZerosInstruction);
7254   match(Set dst (CountLeadingZerosL src));
7255   effect(KILL cr);
7256 
7257   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7258             "jnz     skip\n\t"
7259             "movl    $dst, -1\n"
7260       "skip:\n\t"
7261             "negl    $dst\n\t"
7262             "addl    $dst, 63" %}
7263   ins_encode %{
7264     Register Rdst = $dst$$Register;
7265     Register Rsrc = $src$$Register;
7266     Label skip;
7267     __ bsrq(Rdst, Rsrc);
7268     __ jccb(Assembler::notZero, skip);
7269     __ movl(Rdst, -1);
7270     __ bind(skip);
7271     __ negl(Rdst);
7272     __ addl(Rdst, BitsPerLong - 1);
7273   %}
7274   ins_pipe(ialu_reg);
7275 %}
7276 
7277 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7278   match(Set dst (CountTrailingZerosI src));
7279   effect(KILL cr);
7280 
7281   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7282             "jnz     done\n\t"
7283             "movl    $dst, 32\n"
7284       "done:" %}
7285   ins_encode %{
7286     Register Rdst = $dst$$Register;
7287     Label done;
7288     __ bsfl(Rdst, $src$$Register);
7289     __ jccb(Assembler::notZero, done);
7290     __ movl(Rdst, BitsPerInt);
7291     __ bind(done);
7292   %}
7293   ins_pipe(ialu_reg);
7294 %}
7295 
7296 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7297   match(Set dst (CountTrailingZerosL src));
7298   effect(KILL cr);
7299 
7300   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7301             "jnz     done\n\t"
7302             "movl    $dst, 64\n"
7303       "done:" %}
7304   ins_encode %{
7305     Register Rdst = $dst$$Register;
7306     Label done;
7307     __ bsfq(Rdst, $src$$Register);
7308     __ jccb(Assembler::notZero, done);
7309     __ movl(Rdst, BitsPerLong);
7310     __ bind(done);
7311   %}
7312   ins_pipe(ialu_reg);
7313 %}
7314 
7315 
7316 //---------- Population Count Instructions -------------------------------------
7317 
7318 instruct popCountI(rRegI dst, rRegI src) %{
7319   predicate(UsePopCountInstruction);
7320   match(Set dst (PopCountI src));
7321 
7322   format %{ "popcnt  $dst, $src" %}
7323   ins_encode %{
7324     __ popcntl($dst$$Register, $src$$Register);
7325   %}
7326   ins_pipe(ialu_reg);
7327 %}
7328 
7329 instruct popCountI_mem(rRegI dst, memory mem) %{
7330   predicate(UsePopCountInstruction);
7331   match(Set dst (PopCountI (LoadI mem)));
7332 
7333   format %{ "popcnt  $dst, $mem" %}
7334   ins_encode %{
7335     __ popcntl($dst$$Register, $mem$$Address);
7336   %}
7337   ins_pipe(ialu_reg);
7338 %}
7339 
7340 // Note: Long.bitCount(long) returns an int.
7341 instruct popCountL(rRegI dst, rRegL src) %{
7342   predicate(UsePopCountInstruction);
7343   match(Set dst (PopCountL src));
7344 
7345   format %{ "popcnt  $dst, $src" %}
7346   ins_encode %{
7347     __ popcntq($dst$$Register, $src$$Register);
7348   %}
7349   ins_pipe(ialu_reg);
7350 %}
7351 
7352 // Note: Long.bitCount(long) returns an int.
7353 instruct popCountL_mem(rRegI dst, memory mem) %{
7354   predicate(UsePopCountInstruction);
7355   match(Set dst (PopCountL (LoadL mem)));
7356 
7357   format %{ "popcnt  $dst, $mem" %}
7358   ins_encode %{
7359     __ popcntq($dst$$Register, $mem$$Address);
7360   %}
7361   ins_pipe(ialu_reg);
7362 %}
7363 
7364 
7365 //----------MemBar Instructions-----------------------------------------------
7366 // Memory barrier flavors
7367 
7368 instruct membar_acquire()
7369 %{
7370   match(MemBarAcquire);
7371   ins_cost(0);
7372 
7373   size(0);
7374   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7375   ins_encode();
7376   ins_pipe(empty);
7377 %}
7378 
7379 instruct membar_acquire_lock()
7380 %{
7381   match(MemBarAcquire);
7382   predicate(Matcher::prior_fast_lock(n));
7383   ins_cost(0);
7384 
7385   size(0);
7386   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7387   ins_encode();
7388   ins_pipe(empty);
7389 %}
7390 
7391 instruct membar_release()
7392 %{
7393   match(MemBarRelease);
7394   ins_cost(0);
7395 
7396   size(0);
7397   format %{ "MEMBAR-release ! (empty encoding)" %}
7398   ins_encode();
7399   ins_pipe(empty);
7400 %}
7401 
7402 instruct membar_release_lock()
7403 %{
7404   match(MemBarRelease);
7405   predicate(Matcher::post_fast_unlock(n));
7406   ins_cost(0);
7407 
7408   size(0);
7409   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7410   ins_encode();
7411   ins_pipe(empty);
7412 %}
7413 
7414 instruct membar_volatile(rFlagsReg cr) %{
7415   match(MemBarVolatile);
7416   effect(KILL cr);
7417   ins_cost(400);
7418 
7419   format %{
7420     $$template
7421     if (os::is_MP()) {
7422       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7423     } else {
7424       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7425     }
7426   %}
7427   ins_encode %{
7428     __ membar(Assembler::StoreLoad);
7429   %}
7430   ins_pipe(pipe_slow);
7431 %}
7432 
7433 instruct unnecessary_membar_volatile()
7434 %{
7435   match(MemBarVolatile);
7436   predicate(Matcher::post_store_load_barrier(n));
7437   ins_cost(0);
7438 
7439   size(0);
7440   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7441   ins_encode();
7442   ins_pipe(empty);
7443 %}
7444 
7445 //----------Move Instructions--------------------------------------------------
7446 
7447 instruct castX2P(rRegP dst, rRegL src)
7448 %{
7449   match(Set dst (CastX2P src));
7450 
7451   format %{ "movq    $dst, $src\t# long->ptr" %}
7452   ins_encode(enc_copy_wide(dst, src));
7453   ins_pipe(ialu_reg_reg); // XXX
7454 %}
7455 
7456 instruct castP2X(rRegL dst, rRegP src)
7457 %{
7458   match(Set dst (CastP2X src));
7459 
7460   format %{ "movq    $dst, $src\t# ptr -> long" %}
7461   ins_encode(enc_copy_wide(dst, src));
7462   ins_pipe(ialu_reg_reg); // XXX
7463 %}
7464 
7465 
7466 // Convert oop pointer into compressed form
7467 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7468   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7469   match(Set dst (EncodeP src));
7470   effect(KILL cr);
7471   format %{ "encode_heap_oop $dst,$src" %}
7472   ins_encode %{
7473     Register s = $src$$Register;
7474     Register d = $dst$$Register;
7475     if (s != d) {
7476       __ movq(d, s);
7477     }
7478     __ encode_heap_oop(d);
7479   %}
7480   ins_pipe(ialu_reg_long);
7481 %}
7482 
7483 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7484   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7485   match(Set dst (EncodeP src));
7486   effect(KILL cr);
7487   format %{ "encode_heap_oop_not_null $dst,$src" %}
7488   ins_encode %{
7489     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7490   %}
7491   ins_pipe(ialu_reg_long);
7492 %}
7493 
7494 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7495   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7496             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7497   match(Set dst (DecodeN src));
7498   effect(KILL cr);
7499   format %{ "decode_heap_oop $dst,$src" %}
7500   ins_encode %{
7501     Register s = $src$$Register;
7502     Register d = $dst$$Register;
7503     if (s != d) {
7504       __ movq(d, s);
7505     }
7506     __ decode_heap_oop(d);
7507   %}
7508   ins_pipe(ialu_reg_long);
7509 %}
7510 
7511 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7512   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7513             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7514   match(Set dst (DecodeN src));
7515   effect(KILL cr);
7516   format %{ "decode_heap_oop_not_null $dst,$src" %}
7517   ins_encode %{
7518     Register s = $src$$Register;
7519     Register d = $dst$$Register;
7520     if (s != d) {
7521       __ decode_heap_oop_not_null(d, s);
7522     } else {
7523       __ decode_heap_oop_not_null(d);
7524     }
7525   %}
7526   ins_pipe(ialu_reg_long);
7527 %}
7528 
7529 
7530 //----------Conditional Move---------------------------------------------------
7531 // Jump
7532 // dummy instruction for generating temp registers
7533 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7534   match(Jump (LShiftL switch_val shift));
7535   ins_cost(350);
7536   predicate(false);
7537   effect(TEMP dest);
7538 
7539   format %{ "leaq    $dest, [$constantaddress]\n\t"
7540             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7541   ins_encode %{
7542     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7543     // to do that and the compiler is using that register as one it can allocate.
7544     // So we build it all by hand.
7545     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7546     // ArrayAddress dispatch(table, index);
7547     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7548     __ lea($dest$$Register, $constantaddress);
7549     __ jmp(dispatch);
7550   %}
7551   ins_pipe(pipe_jmp);
7552   ins_pc_relative(1);
7553 %}
7554 
7555 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7556   match(Jump (AddL (LShiftL switch_val shift) offset));
7557   ins_cost(350);
7558   effect(TEMP dest);
7559 
7560   format %{ "leaq    $dest, [$constantaddress]\n\t"
7561             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7562   ins_encode %{
7563     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7564     // to do that and the compiler is using that register as one it can allocate.
7565     // So we build it all by hand.
7566     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7567     // ArrayAddress dispatch(table, index);
7568     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7569     __ lea($dest$$Register, $constantaddress);
7570     __ jmp(dispatch);
7571   %}
7572   ins_pipe(pipe_jmp);
7573   ins_pc_relative(1);
7574 %}
7575 
7576 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7577   match(Jump switch_val);
7578   ins_cost(350);
7579   effect(TEMP dest);
7580 
7581   format %{ "leaq    $dest, [$constantaddress]\n\t"
7582             "jmp     [$dest + $switch_val]\n\t" %}
7583   ins_encode %{
7584     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7585     // to do that and the compiler is using that register as one it can allocate.
7586     // So we build it all by hand.
7587     // Address index(noreg, switch_reg, Address::times_1);
7588     // ArrayAddress dispatch(table, index);
7589     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7590     __ lea($dest$$Register, $constantaddress);
7591     __ jmp(dispatch);
7592   %}
7593   ins_pipe(pipe_jmp);
7594   ins_pc_relative(1);
7595 %}
7596 
7597 // Conditional move
7598 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7599 %{
7600   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7601 
7602   ins_cost(200); // XXX
7603   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7604   opcode(0x0F, 0x40);
7605   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7606   ins_pipe(pipe_cmov_reg);
7607 %}
7608 
7609 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7610   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7611 
7612   ins_cost(200); // XXX
7613   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7614   opcode(0x0F, 0x40);
7615   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7616   ins_pipe(pipe_cmov_reg);
7617 %}
7618 
7619 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7620   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7621   ins_cost(200);
7622   expand %{
7623     cmovI_regU(cop, cr, dst, src);
7624   %}
7625 %}
7626 
7627 // Conditional move
7628 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7629   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7630 
7631   ins_cost(250); // XXX
7632   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7633   opcode(0x0F, 0x40);
7634   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7635   ins_pipe(pipe_cmov_mem);
7636 %}
7637 
7638 // Conditional move
7639 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7640 %{
7641   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7642 
7643   ins_cost(250); // XXX
7644   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7645   opcode(0x0F, 0x40);
7646   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7647   ins_pipe(pipe_cmov_mem);
7648 %}
7649 
7650 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7651   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7652   ins_cost(250);
7653   expand %{
7654     cmovI_memU(cop, cr, dst, src);
7655   %}
7656 %}
7657 
7658 // Conditional move
7659 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7660 %{
7661   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7662 
7663   ins_cost(200); // XXX
7664   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7665   opcode(0x0F, 0x40);
7666   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7667   ins_pipe(pipe_cmov_reg);
7668 %}
7669 
7670 // Conditional move
7671 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7672 %{
7673   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7674 
7675   ins_cost(200); // XXX
7676   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7677   opcode(0x0F, 0x40);
7678   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7679   ins_pipe(pipe_cmov_reg);
7680 %}
7681 
7682 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7683   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7684   ins_cost(200);
7685   expand %{
7686     cmovN_regU(cop, cr, dst, src);
7687   %}
7688 %}
7689 
7690 // Conditional move
7691 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7692 %{
7693   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7694 
7695   ins_cost(200); // XXX
7696   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7697   opcode(0x0F, 0x40);
7698   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7699   ins_pipe(pipe_cmov_reg);  // XXX
7700 %}
7701 
7702 // Conditional move
7703 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7704 %{
7705   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7706 
7707   ins_cost(200); // XXX
7708   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7709   opcode(0x0F, 0x40);
7710   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7711   ins_pipe(pipe_cmov_reg); // XXX
7712 %}
7713 
7714 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7715   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7716   ins_cost(200);
7717   expand %{
7718     cmovP_regU(cop, cr, dst, src);
7719   %}
7720 %}
7721 
7722 // DISABLED: Requires the ADLC to emit a bottom_type call that
7723 // correctly meets the two pointer arguments; one is an incoming
7724 // register but the other is a memory operand.  ALSO appears to
7725 // be buggy with implicit null checks.
7726 //
7727 //// Conditional move
7728 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7729 //%{
7730 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7731 //  ins_cost(250);
7732 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7733 //  opcode(0x0F,0x40);
7734 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7735 //  ins_pipe( pipe_cmov_mem );
7736 //%}
7737 //
7738 //// Conditional move
7739 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7740 //%{
7741 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7742 //  ins_cost(250);
7743 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7744 //  opcode(0x0F,0x40);
7745 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7746 //  ins_pipe( pipe_cmov_mem );
7747 //%}
7748 
7749 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7750 %{
7751   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7752 
7753   ins_cost(200); // XXX
7754   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7755   opcode(0x0F, 0x40);
7756   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7757   ins_pipe(pipe_cmov_reg);  // XXX
7758 %}
7759 
7760 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7761 %{
7762   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7763 
7764   ins_cost(200); // XXX
7765   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7766   opcode(0x0F, 0x40);
7767   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7768   ins_pipe(pipe_cmov_mem);  // XXX
7769 %}
7770 
7771 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7772 %{
7773   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7774 
7775   ins_cost(200); // XXX
7776   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7777   opcode(0x0F, 0x40);
7778   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7779   ins_pipe(pipe_cmov_reg); // XXX
7780 %}
7781 
7782 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7783   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7784   ins_cost(200);
7785   expand %{
7786     cmovL_regU(cop, cr, dst, src);
7787   %}
7788 %}
7789 
7790 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7791 %{
7792   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7793 
7794   ins_cost(200); // XXX
7795   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7796   opcode(0x0F, 0x40);
7797   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7798   ins_pipe(pipe_cmov_mem); // XXX
7799 %}
7800 
7801 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7802   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7803   ins_cost(200);
7804   expand %{
7805     cmovL_memU(cop, cr, dst, src);
7806   %}
7807 %}
7808 
7809 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7810 %{
7811   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7812 
7813   ins_cost(200); // XXX
7814   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7815             "movss     $dst, $src\n"
7816     "skip:" %}
7817   ins_encode(enc_cmovf_branch(cop, dst, src));
7818   ins_pipe(pipe_slow);
7819 %}
7820 
7821 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7822 // %{
7823 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7824 
7825 //   ins_cost(200); // XXX
7826 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7827 //             "movss     $dst, $src\n"
7828 //     "skip:" %}
7829 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7830 //   ins_pipe(pipe_slow);
7831 // %}
7832 
7833 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7834 %{
7835   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7836 
7837   ins_cost(200); // XXX
7838   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7839             "movss     $dst, $src\n"
7840     "skip:" %}
7841   ins_encode(enc_cmovf_branch(cop, dst, src));
7842   ins_pipe(pipe_slow);
7843 %}
7844 
7845 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7846   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7847   ins_cost(200);
7848   expand %{
7849     cmovF_regU(cop, cr, dst, src);
7850   %}
7851 %}
7852 
7853 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7854 %{
7855   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7856 
7857   ins_cost(200); // XXX
7858   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7859             "movsd     $dst, $src\n"
7860     "skip:" %}
7861   ins_encode(enc_cmovd_branch(cop, dst, src));
7862   ins_pipe(pipe_slow);
7863 %}
7864 
7865 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7866 %{
7867   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7868 
7869   ins_cost(200); // XXX
7870   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7871             "movsd     $dst, $src\n"
7872     "skip:" %}
7873   ins_encode(enc_cmovd_branch(cop, dst, src));
7874   ins_pipe(pipe_slow);
7875 %}
7876 
7877 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7878   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7879   ins_cost(200);
7880   expand %{
7881     cmovD_regU(cop, cr, dst, src);
7882   %}
7883 %}
7884 
7885 //----------Arithmetic Instructions--------------------------------------------
7886 //----------Addition Instructions----------------------------------------------
7887 
7888 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7889 %{
7890   match(Set dst (AddI dst src));
7891   effect(KILL cr);
7892 
7893   format %{ "addl    $dst, $src\t# int" %}
7894   opcode(0x03);
7895   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7896   ins_pipe(ialu_reg_reg);
7897 %}
7898 
7899 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7900 %{
7901   match(Set dst (AddI dst src));
7902   effect(KILL cr);
7903 
7904   format %{ "addl    $dst, $src\t# int" %}
7905   opcode(0x81, 0x00); /* /0 id */
7906   ins_encode(OpcSErm(dst, src), Con8or32(src));
7907   ins_pipe( ialu_reg );
7908 %}
7909 
7910 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7911 %{
7912   match(Set dst (AddI dst (LoadI src)));
7913   effect(KILL cr);
7914 
7915   ins_cost(125); // XXX
7916   format %{ "addl    $dst, $src\t# int" %}
7917   opcode(0x03);
7918   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7919   ins_pipe(ialu_reg_mem);
7920 %}
7921 
7922 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7923 %{
7924   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7925   effect(KILL cr);
7926 
7927   ins_cost(150); // XXX
7928   format %{ "addl    $dst, $src\t# int" %}
7929   opcode(0x01); /* Opcode 01 /r */
7930   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7931   ins_pipe(ialu_mem_reg);
7932 %}
7933 
7934 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7935 %{
7936   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7937   effect(KILL cr);
7938 
7939   ins_cost(125); // XXX
7940   format %{ "addl    $dst, $src\t# int" %}
7941   opcode(0x81); /* Opcode 81 /0 id */
7942   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7943   ins_pipe(ialu_mem_imm);
7944 %}
7945 
7946 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7947 %{
7948   predicate(UseIncDec);
7949   match(Set dst (AddI dst src));
7950   effect(KILL cr);
7951 
7952   format %{ "incl    $dst\t# int" %}
7953   opcode(0xFF, 0x00); // FF /0
7954   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7955   ins_pipe(ialu_reg);
7956 %}
7957 
7958 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7959 %{
7960   predicate(UseIncDec);
7961   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7962   effect(KILL cr);
7963 
7964   ins_cost(125); // XXX
7965   format %{ "incl    $dst\t# int" %}
7966   opcode(0xFF); /* Opcode FF /0 */
7967   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7968   ins_pipe(ialu_mem_imm);
7969 %}
7970 
7971 // XXX why does that use AddI
7972 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7973 %{
7974   predicate(UseIncDec);
7975   match(Set dst (AddI dst src));
7976   effect(KILL cr);
7977 
7978   format %{ "decl    $dst\t# int" %}
7979   opcode(0xFF, 0x01); // FF /1
7980   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7981   ins_pipe(ialu_reg);
7982 %}
7983 
7984 // XXX why does that use AddI
7985 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7986 %{
7987   predicate(UseIncDec);
7988   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7989   effect(KILL cr);
7990 
7991   ins_cost(125); // XXX
7992   format %{ "decl    $dst\t# int" %}
7993   opcode(0xFF); /* Opcode FF /1 */
7994   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7995   ins_pipe(ialu_mem_imm);
7996 %}
7997 
7998 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7999 %{
8000   match(Set dst (AddI src0 src1));
8001 
8002   ins_cost(110);
8003   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8004   opcode(0x8D); /* 0x8D /r */
8005   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8006   ins_pipe(ialu_reg_reg);
8007 %}
8008 
8009 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8010 %{
8011   match(Set dst (AddL dst src));
8012   effect(KILL cr);
8013 
8014   format %{ "addq    $dst, $src\t# long" %}
8015   opcode(0x03);
8016   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8017   ins_pipe(ialu_reg_reg);
8018 %}
8019 
8020 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8021 %{
8022   match(Set dst (AddL dst src));
8023   effect(KILL cr);
8024 
8025   format %{ "addq    $dst, $src\t# long" %}
8026   opcode(0x81, 0x00); /* /0 id */
8027   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8028   ins_pipe( ialu_reg );
8029 %}
8030 
8031 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8032 %{
8033   match(Set dst (AddL dst (LoadL src)));
8034   effect(KILL cr);
8035 
8036   ins_cost(125); // XXX
8037   format %{ "addq    $dst, $src\t# long" %}
8038   opcode(0x03);
8039   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8040   ins_pipe(ialu_reg_mem);
8041 %}
8042 
8043 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8044 %{
8045   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8046   effect(KILL cr);
8047 
8048   ins_cost(150); // XXX
8049   format %{ "addq    $dst, $src\t# long" %}
8050   opcode(0x01); /* Opcode 01 /r */
8051   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8052   ins_pipe(ialu_mem_reg);
8053 %}
8054 
8055 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8056 %{
8057   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8058   effect(KILL cr);
8059 
8060   ins_cost(125); // XXX
8061   format %{ "addq    $dst, $src\t# long" %}
8062   opcode(0x81); /* Opcode 81 /0 id */
8063   ins_encode(REX_mem_wide(dst),
8064              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8065   ins_pipe(ialu_mem_imm);
8066 %}
8067 
8068 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8069 %{
8070   predicate(UseIncDec);
8071   match(Set dst (AddL dst src));
8072   effect(KILL cr);
8073 
8074   format %{ "incq    $dst\t# long" %}
8075   opcode(0xFF, 0x00); // FF /0
8076   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8077   ins_pipe(ialu_reg);
8078 %}
8079 
8080 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8081 %{
8082   predicate(UseIncDec);
8083   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8084   effect(KILL cr);
8085 
8086   ins_cost(125); // XXX
8087   format %{ "incq    $dst\t# long" %}
8088   opcode(0xFF); /* Opcode FF /0 */
8089   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8090   ins_pipe(ialu_mem_imm);
8091 %}
8092 
8093 // XXX why does that use AddL
8094 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8095 %{
8096   predicate(UseIncDec);
8097   match(Set dst (AddL dst src));
8098   effect(KILL cr);
8099 
8100   format %{ "decq    $dst\t# long" %}
8101   opcode(0xFF, 0x01); // FF /1
8102   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8103   ins_pipe(ialu_reg);
8104 %}
8105 
8106 // XXX why does that use AddL
8107 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8108 %{
8109   predicate(UseIncDec);
8110   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8111   effect(KILL cr);
8112 
8113   ins_cost(125); // XXX
8114   format %{ "decq    $dst\t# long" %}
8115   opcode(0xFF); /* Opcode FF /1 */
8116   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8117   ins_pipe(ialu_mem_imm);
8118 %}
8119 
8120 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8121 %{
8122   match(Set dst (AddL src0 src1));
8123 
8124   ins_cost(110);
8125   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8126   opcode(0x8D); /* 0x8D /r */
8127   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8128   ins_pipe(ialu_reg_reg);
8129 %}
8130 
8131 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8132 %{
8133   match(Set dst (AddP dst src));
8134   effect(KILL cr);
8135 
8136   format %{ "addq    $dst, $src\t# ptr" %}
8137   opcode(0x03);
8138   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8139   ins_pipe(ialu_reg_reg);
8140 %}
8141 
8142 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8143 %{
8144   match(Set dst (AddP dst src));
8145   effect(KILL cr);
8146 
8147   format %{ "addq    $dst, $src\t# ptr" %}
8148   opcode(0x81, 0x00); /* /0 id */
8149   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8150   ins_pipe( ialu_reg );
8151 %}
8152 
8153 // XXX addP mem ops ????
8154 
8155 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8156 %{
8157   match(Set dst (AddP src0 src1));
8158 
8159   ins_cost(110);
8160   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8161   opcode(0x8D); /* 0x8D /r */
8162   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8163   ins_pipe(ialu_reg_reg);
8164 %}
8165 
8166 instruct checkCastPP(rRegP dst)
8167 %{
8168   match(Set dst (CheckCastPP dst));
8169 
8170   size(0);
8171   format %{ "# checkcastPP of $dst" %}
8172   ins_encode(/* empty encoding */);
8173   ins_pipe(empty);
8174 %}
8175 
8176 instruct castPP(rRegP dst)
8177 %{
8178   match(Set dst (CastPP dst));
8179 
8180   size(0);
8181   format %{ "# castPP of $dst" %}
8182   ins_encode(/* empty encoding */);
8183   ins_pipe(empty);
8184 %}
8185 
8186 instruct castII(rRegI dst)
8187 %{
8188   match(Set dst (CastII dst));
8189 
8190   size(0);
8191   format %{ "# castII of $dst" %}
8192   ins_encode(/* empty encoding */);
8193   ins_cost(0);
8194   ins_pipe(empty);
8195 %}
8196 
8197 // LoadP-locked same as a regular LoadP when used with compare-swap
8198 instruct loadPLocked(rRegP dst, memory mem)
8199 %{
8200   match(Set dst (LoadPLocked mem));
8201 
8202   ins_cost(125); // XXX
8203   format %{ "movq    $dst, $mem\t# ptr locked" %}
8204   opcode(0x8B);
8205   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8206   ins_pipe(ialu_reg_mem); // XXX
8207 %}
8208 
8209 // LoadL-locked - same as a regular LoadL when used with compare-swap
8210 instruct loadLLocked(rRegL dst, memory mem)
8211 %{
8212   match(Set dst (LoadLLocked mem));
8213 
8214   ins_cost(125); // XXX
8215   format %{ "movq    $dst, $mem\t# long locked" %}
8216   opcode(0x8B);
8217   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8218   ins_pipe(ialu_reg_mem); // XXX
8219 %}
8220 
8221 // Conditional-store of the updated heap-top.
8222 // Used during allocation of the shared heap.
8223 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8224 
8225 instruct storePConditional(memory heap_top_ptr,
8226                            rax_RegP oldval, rRegP newval,
8227                            rFlagsReg cr)
8228 %{
8229   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8230 
8231   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8232             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8233   opcode(0x0F, 0xB1);
8234   ins_encode(lock_prefix,
8235              REX_reg_mem_wide(newval, heap_top_ptr),
8236              OpcP, OpcS,
8237              reg_mem(newval, heap_top_ptr));
8238   ins_pipe(pipe_cmpxchg);
8239 %}
8240 
8241 // Conditional-store of an int value.
8242 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8243 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8244 %{
8245   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8246   effect(KILL oldval);
8247 
8248   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8249   opcode(0x0F, 0xB1);
8250   ins_encode(lock_prefix,
8251              REX_reg_mem(newval, mem),
8252              OpcP, OpcS,
8253              reg_mem(newval, mem));
8254   ins_pipe(pipe_cmpxchg);
8255 %}
8256 
8257 // Conditional-store of a long value.
8258 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8259 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8260 %{
8261   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8262   effect(KILL oldval);
8263 
8264   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8265   opcode(0x0F, 0xB1);
8266   ins_encode(lock_prefix,
8267              REX_reg_mem_wide(newval, mem),
8268              OpcP, OpcS,
8269              reg_mem(newval, mem));
8270   ins_pipe(pipe_cmpxchg);
8271 %}
8272 
8273 
8274 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8275 instruct compareAndSwapP(rRegI res,
8276                          memory mem_ptr,
8277                          rax_RegP oldval, rRegP newval,
8278                          rFlagsReg cr)
8279 %{
8280   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8281   effect(KILL cr, KILL oldval);
8282 
8283   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8284             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8285             "sete    $res\n\t"
8286             "movzbl  $res, $res" %}
8287   opcode(0x0F, 0xB1);
8288   ins_encode(lock_prefix,
8289              REX_reg_mem_wide(newval, mem_ptr),
8290              OpcP, OpcS,
8291              reg_mem(newval, mem_ptr),
8292              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8293              REX_reg_breg(res, res), // movzbl
8294              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8295   ins_pipe( pipe_cmpxchg );
8296 %}
8297 
8298 instruct compareAndSwapL(rRegI res,
8299                          memory mem_ptr,
8300                          rax_RegL oldval, rRegL newval,
8301                          rFlagsReg cr)
8302 %{
8303   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8304   effect(KILL cr, KILL oldval);
8305 
8306   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8307             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8308             "sete    $res\n\t"
8309             "movzbl  $res, $res" %}
8310   opcode(0x0F, 0xB1);
8311   ins_encode(lock_prefix,
8312              REX_reg_mem_wide(newval, mem_ptr),
8313              OpcP, OpcS,
8314              reg_mem(newval, mem_ptr),
8315              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8316              REX_reg_breg(res, res), // movzbl
8317              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8318   ins_pipe( pipe_cmpxchg );
8319 %}
8320 
8321 instruct compareAndSwapI(rRegI res,
8322                          memory mem_ptr,
8323                          rax_RegI oldval, rRegI newval,
8324                          rFlagsReg cr)
8325 %{
8326   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8327   effect(KILL cr, KILL oldval);
8328 
8329   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8330             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8331             "sete    $res\n\t"
8332             "movzbl  $res, $res" %}
8333   opcode(0x0F, 0xB1);
8334   ins_encode(lock_prefix,
8335              REX_reg_mem(newval, mem_ptr),
8336              OpcP, OpcS,
8337              reg_mem(newval, mem_ptr),
8338              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8339              REX_reg_breg(res, res), // movzbl
8340              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8341   ins_pipe( pipe_cmpxchg );
8342 %}
8343 
8344 
8345 instruct compareAndSwapN(rRegI res,
8346                           memory mem_ptr,
8347                           rax_RegN oldval, rRegN newval,
8348                           rFlagsReg cr) %{
8349   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8350   effect(KILL cr, KILL oldval);
8351 
8352   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8353             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8354             "sete    $res\n\t"
8355             "movzbl  $res, $res" %}
8356   opcode(0x0F, 0xB1);
8357   ins_encode(lock_prefix,
8358              REX_reg_mem(newval, mem_ptr),
8359              OpcP, OpcS,
8360              reg_mem(newval, mem_ptr),
8361              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8362              REX_reg_breg(res, res), // movzbl
8363              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8364   ins_pipe( pipe_cmpxchg );
8365 %}
8366 
8367 //----------Subtraction Instructions-------------------------------------------
8368 
8369 // Integer Subtraction Instructions
8370 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8371 %{
8372   match(Set dst (SubI dst src));
8373   effect(KILL cr);
8374 
8375   format %{ "subl    $dst, $src\t# int" %}
8376   opcode(0x2B);
8377   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8378   ins_pipe(ialu_reg_reg);
8379 %}
8380 
8381 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8382 %{
8383   match(Set dst (SubI dst src));
8384   effect(KILL cr);
8385 
8386   format %{ "subl    $dst, $src\t# int" %}
8387   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8388   ins_encode(OpcSErm(dst, src), Con8or32(src));
8389   ins_pipe(ialu_reg);
8390 %}
8391 
8392 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8393 %{
8394   match(Set dst (SubI dst (LoadI src)));
8395   effect(KILL cr);
8396 
8397   ins_cost(125);
8398   format %{ "subl    $dst, $src\t# int" %}
8399   opcode(0x2B);
8400   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8401   ins_pipe(ialu_reg_mem);
8402 %}
8403 
8404 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8405 %{
8406   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8407   effect(KILL cr);
8408 
8409   ins_cost(150);
8410   format %{ "subl    $dst, $src\t# int" %}
8411   opcode(0x29); /* Opcode 29 /r */
8412   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8413   ins_pipe(ialu_mem_reg);
8414 %}
8415 
8416 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8417 %{
8418   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8419   effect(KILL cr);
8420 
8421   ins_cost(125); // XXX
8422   format %{ "subl    $dst, $src\t# int" %}
8423   opcode(0x81); /* Opcode 81 /5 id */
8424   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8425   ins_pipe(ialu_mem_imm);
8426 %}
8427 
8428 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8429 %{
8430   match(Set dst (SubL dst src));
8431   effect(KILL cr);
8432 
8433   format %{ "subq    $dst, $src\t# long" %}
8434   opcode(0x2B);
8435   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8436   ins_pipe(ialu_reg_reg);
8437 %}
8438 
8439 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8440 %{
8441   match(Set dst (SubL dst src));
8442   effect(KILL cr);
8443 
8444   format %{ "subq    $dst, $src\t# long" %}
8445   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8446   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8447   ins_pipe(ialu_reg);
8448 %}
8449 
8450 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8451 %{
8452   match(Set dst (SubL dst (LoadL src)));
8453   effect(KILL cr);
8454 
8455   ins_cost(125);
8456   format %{ "subq    $dst, $src\t# long" %}
8457   opcode(0x2B);
8458   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8459   ins_pipe(ialu_reg_mem);
8460 %}
8461 
8462 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8463 %{
8464   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8465   effect(KILL cr);
8466 
8467   ins_cost(150);
8468   format %{ "subq    $dst, $src\t# long" %}
8469   opcode(0x29); /* Opcode 29 /r */
8470   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8471   ins_pipe(ialu_mem_reg);
8472 %}
8473 
8474 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8475 %{
8476   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8477   effect(KILL cr);
8478 
8479   ins_cost(125); // XXX
8480   format %{ "subq    $dst, $src\t# long" %}
8481   opcode(0x81); /* Opcode 81 /5 id */
8482   ins_encode(REX_mem_wide(dst),
8483              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8484   ins_pipe(ialu_mem_imm);
8485 %}
8486 
8487 // Subtract from a pointer
8488 // XXX hmpf???
8489 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8490 %{
8491   match(Set dst (AddP dst (SubI zero src)));
8492   effect(KILL cr);
8493 
8494   format %{ "subq    $dst, $src\t# ptr - int" %}
8495   opcode(0x2B);
8496   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8497   ins_pipe(ialu_reg_reg);
8498 %}
8499 
8500 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8501 %{
8502   match(Set dst (SubI zero dst));
8503   effect(KILL cr);
8504 
8505   format %{ "negl    $dst\t# int" %}
8506   opcode(0xF7, 0x03);  // Opcode F7 /3
8507   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8508   ins_pipe(ialu_reg);
8509 %}
8510 
8511 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8512 %{
8513   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8514   effect(KILL cr);
8515 
8516   format %{ "negl    $dst\t# int" %}
8517   opcode(0xF7, 0x03);  // Opcode F7 /3
8518   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8519   ins_pipe(ialu_reg);
8520 %}
8521 
8522 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8523 %{
8524   match(Set dst (SubL zero dst));
8525   effect(KILL cr);
8526 
8527   format %{ "negq    $dst\t# long" %}
8528   opcode(0xF7, 0x03);  // Opcode F7 /3
8529   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8530   ins_pipe(ialu_reg);
8531 %}
8532 
8533 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8534 %{
8535   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8536   effect(KILL cr);
8537 
8538   format %{ "negq    $dst\t# long" %}
8539   opcode(0xF7, 0x03);  // Opcode F7 /3
8540   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8541   ins_pipe(ialu_reg);
8542 %}
8543 
8544 
8545 //----------Multiplication/Division Instructions-------------------------------
8546 // Integer Multiplication Instructions
8547 // Multiply Register
8548 
8549 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8550 %{
8551   match(Set dst (MulI dst src));
8552   effect(KILL cr);
8553 
8554   ins_cost(300);
8555   format %{ "imull   $dst, $src\t# int" %}
8556   opcode(0x0F, 0xAF);
8557   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8558   ins_pipe(ialu_reg_reg_alu0);
8559 %}
8560 
8561 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8562 %{
8563   match(Set dst (MulI src imm));
8564   effect(KILL cr);
8565 
8566   ins_cost(300);
8567   format %{ "imull   $dst, $src, $imm\t# int" %}
8568   opcode(0x69); /* 69 /r id */
8569   ins_encode(REX_reg_reg(dst, src),
8570              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8571   ins_pipe(ialu_reg_reg_alu0);
8572 %}
8573 
8574 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8575 %{
8576   match(Set dst (MulI dst (LoadI src)));
8577   effect(KILL cr);
8578 
8579   ins_cost(350);
8580   format %{ "imull   $dst, $src\t# int" %}
8581   opcode(0x0F, 0xAF);
8582   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8583   ins_pipe(ialu_reg_mem_alu0);
8584 %}
8585 
8586 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8587 %{
8588   match(Set dst (MulI (LoadI src) imm));
8589   effect(KILL cr);
8590 
8591   ins_cost(300);
8592   format %{ "imull   $dst, $src, $imm\t# int" %}
8593   opcode(0x69); /* 69 /r id */
8594   ins_encode(REX_reg_mem(dst, src),
8595              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8596   ins_pipe(ialu_reg_mem_alu0);
8597 %}
8598 
8599 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8600 %{
8601   match(Set dst (MulL dst src));
8602   effect(KILL cr);
8603 
8604   ins_cost(300);
8605   format %{ "imulq   $dst, $src\t# long" %}
8606   opcode(0x0F, 0xAF);
8607   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8608   ins_pipe(ialu_reg_reg_alu0);
8609 %}
8610 
8611 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8612 %{
8613   match(Set dst (MulL src imm));
8614   effect(KILL cr);
8615 
8616   ins_cost(300);
8617   format %{ "imulq   $dst, $src, $imm\t# long" %}
8618   opcode(0x69); /* 69 /r id */
8619   ins_encode(REX_reg_reg_wide(dst, src),
8620              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8621   ins_pipe(ialu_reg_reg_alu0);
8622 %}
8623 
8624 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8625 %{
8626   match(Set dst (MulL dst (LoadL src)));
8627   effect(KILL cr);
8628 
8629   ins_cost(350);
8630   format %{ "imulq   $dst, $src\t# long" %}
8631   opcode(0x0F, 0xAF);
8632   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8633   ins_pipe(ialu_reg_mem_alu0);
8634 %}
8635 
8636 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8637 %{
8638   match(Set dst (MulL (LoadL src) imm));
8639   effect(KILL cr);
8640 
8641   ins_cost(300);
8642   format %{ "imulq   $dst, $src, $imm\t# long" %}
8643   opcode(0x69); /* 69 /r id */
8644   ins_encode(REX_reg_mem_wide(dst, src),
8645              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8646   ins_pipe(ialu_reg_mem_alu0);
8647 %}
8648 
8649 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8650 %{
8651   match(Set dst (MulHiL src rax));
8652   effect(USE_KILL rax, KILL cr);
8653 
8654   ins_cost(300);
8655   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8656   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8657   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8658   ins_pipe(ialu_reg_reg_alu0);
8659 %}
8660 
8661 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8662                    rFlagsReg cr)
8663 %{
8664   match(Set rax (DivI rax div));
8665   effect(KILL rdx, KILL cr);
8666 
8667   ins_cost(30*100+10*100); // XXX
8668   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8669             "jne,s   normal\n\t"
8670             "xorl    rdx, rdx\n\t"
8671             "cmpl    $div, -1\n\t"
8672             "je,s    done\n"
8673     "normal: cdql\n\t"
8674             "idivl   $div\n"
8675     "done:"        %}
8676   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8677   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8678   ins_pipe(ialu_reg_reg_alu0);
8679 %}
8680 
8681 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8682                    rFlagsReg cr)
8683 %{
8684   match(Set rax (DivL rax div));
8685   effect(KILL rdx, KILL cr);
8686 
8687   ins_cost(30*100+10*100); // XXX
8688   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8689             "cmpq    rax, rdx\n\t"
8690             "jne,s   normal\n\t"
8691             "xorl    rdx, rdx\n\t"
8692             "cmpq    $div, -1\n\t"
8693             "je,s    done\n"
8694     "normal: cdqq\n\t"
8695             "idivq   $div\n"
8696     "done:"        %}
8697   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8698   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8699   ins_pipe(ialu_reg_reg_alu0);
8700 %}
8701 
8702 // Integer DIVMOD with Register, both quotient and mod results
8703 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8704                              rFlagsReg cr)
8705 %{
8706   match(DivModI rax div);
8707   effect(KILL cr);
8708 
8709   ins_cost(30*100+10*100); // XXX
8710   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8711             "jne,s   normal\n\t"
8712             "xorl    rdx, rdx\n\t"
8713             "cmpl    $div, -1\n\t"
8714             "je,s    done\n"
8715     "normal: cdql\n\t"
8716             "idivl   $div\n"
8717     "done:"        %}
8718   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8719   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8720   ins_pipe(pipe_slow);
8721 %}
8722 
8723 // Long DIVMOD with Register, both quotient and mod results
8724 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8725                              rFlagsReg cr)
8726 %{
8727   match(DivModL rax div);
8728   effect(KILL cr);
8729 
8730   ins_cost(30*100+10*100); // XXX
8731   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8732             "cmpq    rax, rdx\n\t"
8733             "jne,s   normal\n\t"
8734             "xorl    rdx, rdx\n\t"
8735             "cmpq    $div, -1\n\t"
8736             "je,s    done\n"
8737     "normal: cdqq\n\t"
8738             "idivq   $div\n"
8739     "done:"        %}
8740   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8741   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8742   ins_pipe(pipe_slow);
8743 %}
8744 
8745 //----------- DivL-By-Constant-Expansions--------------------------------------
8746 // DivI cases are handled by the compiler
8747 
8748 // Magic constant, reciprocal of 10
8749 instruct loadConL_0x6666666666666667(rRegL dst)
8750 %{
8751   effect(DEF dst);
8752 
8753   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8754   ins_encode(load_immL(dst, 0x6666666666666667));
8755   ins_pipe(ialu_reg);
8756 %}
8757 
8758 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8759 %{
8760   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8761 
8762   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8763   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8764   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8765   ins_pipe(ialu_reg_reg_alu0);
8766 %}
8767 
8768 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8769 %{
8770   effect(USE_DEF dst, KILL cr);
8771 
8772   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8773   opcode(0xC1, 0x7); /* C1 /7 ib */
8774   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8775   ins_pipe(ialu_reg);
8776 %}
8777 
8778 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8779 %{
8780   effect(USE_DEF dst, KILL cr);
8781 
8782   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8783   opcode(0xC1, 0x7); /* C1 /7 ib */
8784   ins_encode(reg_opc_imm_wide(dst, 0x2));
8785   ins_pipe(ialu_reg);
8786 %}
8787 
8788 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8789 %{
8790   match(Set dst (DivL src div));
8791 
8792   ins_cost((5+8)*100);
8793   expand %{
8794     rax_RegL rax;                     // Killed temp
8795     rFlagsReg cr;                     // Killed
8796     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8797     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8798     sarL_rReg_63(src, cr);            // sarq  src, 63
8799     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8800     subL_rReg(dst, src, cr);          // subl  rdx, src
8801   %}
8802 %}
8803 
8804 //-----------------------------------------------------------------------------
8805 
8806 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8807                    rFlagsReg cr)
8808 %{
8809   match(Set rdx (ModI rax div));
8810   effect(KILL rax, KILL cr);
8811 
8812   ins_cost(300); // XXX
8813   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8814             "jne,s   normal\n\t"
8815             "xorl    rdx, rdx\n\t"
8816             "cmpl    $div, -1\n\t"
8817             "je,s    done\n"
8818     "normal: cdql\n\t"
8819             "idivl   $div\n"
8820     "done:"        %}
8821   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8822   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8823   ins_pipe(ialu_reg_reg_alu0);
8824 %}
8825 
8826 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8827                    rFlagsReg cr)
8828 %{
8829   match(Set rdx (ModL rax div));
8830   effect(KILL rax, KILL cr);
8831 
8832   ins_cost(300); // XXX
8833   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8834             "cmpq    rax, rdx\n\t"
8835             "jne,s   normal\n\t"
8836             "xorl    rdx, rdx\n\t"
8837             "cmpq    $div, -1\n\t"
8838             "je,s    done\n"
8839     "normal: cdqq\n\t"
8840             "idivq   $div\n"
8841     "done:"        %}
8842   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8843   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8844   ins_pipe(ialu_reg_reg_alu0);
8845 %}
8846 
8847 // Integer Shift Instructions
8848 // Shift Left by one
8849 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8850 %{
8851   match(Set dst (LShiftI dst shift));
8852   effect(KILL cr);
8853 
8854   format %{ "sall    $dst, $shift" %}
8855   opcode(0xD1, 0x4); /* D1 /4 */
8856   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8857   ins_pipe(ialu_reg);
8858 %}
8859 
8860 // Shift Left by one
8861 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8862 %{
8863   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8864   effect(KILL cr);
8865 
8866   format %{ "sall    $dst, $shift\t" %}
8867   opcode(0xD1, 0x4); /* D1 /4 */
8868   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8869   ins_pipe(ialu_mem_imm);
8870 %}
8871 
8872 // Shift Left by 8-bit immediate
8873 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8874 %{
8875   match(Set dst (LShiftI dst shift));
8876   effect(KILL cr);
8877 
8878   format %{ "sall    $dst, $shift" %}
8879   opcode(0xC1, 0x4); /* C1 /4 ib */
8880   ins_encode(reg_opc_imm(dst, shift));
8881   ins_pipe(ialu_reg);
8882 %}
8883 
8884 // Shift Left by 8-bit immediate
8885 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8886 %{
8887   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8888   effect(KILL cr);
8889 
8890   format %{ "sall    $dst, $shift" %}
8891   opcode(0xC1, 0x4); /* C1 /4 ib */
8892   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8893   ins_pipe(ialu_mem_imm);
8894 %}
8895 
8896 // Shift Left by variable
8897 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8898 %{
8899   match(Set dst (LShiftI dst shift));
8900   effect(KILL cr);
8901 
8902   format %{ "sall    $dst, $shift" %}
8903   opcode(0xD3, 0x4); /* D3 /4 */
8904   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8905   ins_pipe(ialu_reg_reg);
8906 %}
8907 
8908 // Shift Left by variable
8909 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8910 %{
8911   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8912   effect(KILL cr);
8913 
8914   format %{ "sall    $dst, $shift" %}
8915   opcode(0xD3, 0x4); /* D3 /4 */
8916   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8917   ins_pipe(ialu_mem_reg);
8918 %}
8919 
8920 // Arithmetic shift right by one
8921 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8922 %{
8923   match(Set dst (RShiftI dst shift));
8924   effect(KILL cr);
8925 
8926   format %{ "sarl    $dst, $shift" %}
8927   opcode(0xD1, 0x7); /* D1 /7 */
8928   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8929   ins_pipe(ialu_reg);
8930 %}
8931 
8932 // Arithmetic shift right by one
8933 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8934 %{
8935   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8936   effect(KILL cr);
8937 
8938   format %{ "sarl    $dst, $shift" %}
8939   opcode(0xD1, 0x7); /* D1 /7 */
8940   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8941   ins_pipe(ialu_mem_imm);
8942 %}
8943 
8944 // Arithmetic Shift Right by 8-bit immediate
8945 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8946 %{
8947   match(Set dst (RShiftI dst shift));
8948   effect(KILL cr);
8949 
8950   format %{ "sarl    $dst, $shift" %}
8951   opcode(0xC1, 0x7); /* C1 /7 ib */
8952   ins_encode(reg_opc_imm(dst, shift));
8953   ins_pipe(ialu_mem_imm);
8954 %}
8955 
8956 // Arithmetic Shift Right by 8-bit immediate
8957 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8958 %{
8959   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8960   effect(KILL cr);
8961 
8962   format %{ "sarl    $dst, $shift" %}
8963   opcode(0xC1, 0x7); /* C1 /7 ib */
8964   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8965   ins_pipe(ialu_mem_imm);
8966 %}
8967 
8968 // Arithmetic Shift Right by variable
8969 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8970 %{
8971   match(Set dst (RShiftI dst shift));
8972   effect(KILL cr);
8973 
8974   format %{ "sarl    $dst, $shift" %}
8975   opcode(0xD3, 0x7); /* D3 /7 */
8976   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8977   ins_pipe(ialu_reg_reg);
8978 %}
8979 
8980 // Arithmetic Shift Right by variable
8981 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8982 %{
8983   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8984   effect(KILL cr);
8985 
8986   format %{ "sarl    $dst, $shift" %}
8987   opcode(0xD3, 0x7); /* D3 /7 */
8988   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8989   ins_pipe(ialu_mem_reg);
8990 %}
8991 
8992 // Logical shift right by one
8993 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8994 %{
8995   match(Set dst (URShiftI dst shift));
8996   effect(KILL cr);
8997 
8998   format %{ "shrl    $dst, $shift" %}
8999   opcode(0xD1, 0x5); /* D1 /5 */
9000   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9001   ins_pipe(ialu_reg);
9002 %}
9003 
9004 // Logical shift right by one
9005 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9006 %{
9007   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9008   effect(KILL cr);
9009 
9010   format %{ "shrl    $dst, $shift" %}
9011   opcode(0xD1, 0x5); /* D1 /5 */
9012   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9013   ins_pipe(ialu_mem_imm);
9014 %}
9015 
9016 // Logical Shift Right by 8-bit immediate
9017 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9018 %{
9019   match(Set dst (URShiftI dst shift));
9020   effect(KILL cr);
9021 
9022   format %{ "shrl    $dst, $shift" %}
9023   opcode(0xC1, 0x5); /* C1 /5 ib */
9024   ins_encode(reg_opc_imm(dst, shift));
9025   ins_pipe(ialu_reg);
9026 %}
9027 
9028 // Logical Shift Right by 8-bit immediate
9029 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9030 %{
9031   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9032   effect(KILL cr);
9033 
9034   format %{ "shrl    $dst, $shift" %}
9035   opcode(0xC1, 0x5); /* C1 /5 ib */
9036   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9037   ins_pipe(ialu_mem_imm);
9038 %}
9039 
9040 // Logical Shift Right by variable
9041 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9042 %{
9043   match(Set dst (URShiftI dst shift));
9044   effect(KILL cr);
9045 
9046   format %{ "shrl    $dst, $shift" %}
9047   opcode(0xD3, 0x5); /* D3 /5 */
9048   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9049   ins_pipe(ialu_reg_reg);
9050 %}
9051 
9052 // Logical Shift Right by variable
9053 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9054 %{
9055   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9056   effect(KILL cr);
9057 
9058   format %{ "shrl    $dst, $shift" %}
9059   opcode(0xD3, 0x5); /* D3 /5 */
9060   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9061   ins_pipe(ialu_mem_reg);
9062 %}
9063 
9064 // Long Shift Instructions
9065 // Shift Left by one
9066 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9067 %{
9068   match(Set dst (LShiftL dst shift));
9069   effect(KILL cr);
9070 
9071   format %{ "salq    $dst, $shift" %}
9072   opcode(0xD1, 0x4); /* D1 /4 */
9073   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9074   ins_pipe(ialu_reg);
9075 %}
9076 
9077 // Shift Left by one
9078 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9079 %{
9080   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9081   effect(KILL cr);
9082 
9083   format %{ "salq    $dst, $shift" %}
9084   opcode(0xD1, 0x4); /* D1 /4 */
9085   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9086   ins_pipe(ialu_mem_imm);
9087 %}
9088 
9089 // Shift Left by 8-bit immediate
9090 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9091 %{
9092   match(Set dst (LShiftL dst shift));
9093   effect(KILL cr);
9094 
9095   format %{ "salq    $dst, $shift" %}
9096   opcode(0xC1, 0x4); /* C1 /4 ib */
9097   ins_encode(reg_opc_imm_wide(dst, shift));
9098   ins_pipe(ialu_reg);
9099 %}
9100 
9101 // Shift Left by 8-bit immediate
9102 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9103 %{
9104   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9105   effect(KILL cr);
9106 
9107   format %{ "salq    $dst, $shift" %}
9108   opcode(0xC1, 0x4); /* C1 /4 ib */
9109   ins_encode(REX_mem_wide(dst), OpcP,
9110              RM_opc_mem(secondary, dst), Con8or32(shift));
9111   ins_pipe(ialu_mem_imm);
9112 %}
9113 
9114 // Shift Left by variable
9115 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9116 %{
9117   match(Set dst (LShiftL dst shift));
9118   effect(KILL cr);
9119 
9120   format %{ "salq    $dst, $shift" %}
9121   opcode(0xD3, 0x4); /* D3 /4 */
9122   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9123   ins_pipe(ialu_reg_reg);
9124 %}
9125 
9126 // Shift Left by variable
9127 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9128 %{
9129   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9130   effect(KILL cr);
9131 
9132   format %{ "salq    $dst, $shift" %}
9133   opcode(0xD3, 0x4); /* D3 /4 */
9134   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9135   ins_pipe(ialu_mem_reg);
9136 %}
9137 
9138 // Arithmetic shift right by one
9139 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9140 %{
9141   match(Set dst (RShiftL dst shift));
9142   effect(KILL cr);
9143 
9144   format %{ "sarq    $dst, $shift" %}
9145   opcode(0xD1, 0x7); /* D1 /7 */
9146   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9147   ins_pipe(ialu_reg);
9148 %}
9149 
9150 // Arithmetic shift right by one
9151 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9152 %{
9153   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9154   effect(KILL cr);
9155 
9156   format %{ "sarq    $dst, $shift" %}
9157   opcode(0xD1, 0x7); /* D1 /7 */
9158   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9159   ins_pipe(ialu_mem_imm);
9160 %}
9161 
9162 // Arithmetic Shift Right by 8-bit immediate
9163 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9164 %{
9165   match(Set dst (RShiftL dst shift));
9166   effect(KILL cr);
9167 
9168   format %{ "sarq    $dst, $shift" %}
9169   opcode(0xC1, 0x7); /* C1 /7 ib */
9170   ins_encode(reg_opc_imm_wide(dst, shift));
9171   ins_pipe(ialu_mem_imm);
9172 %}
9173 
9174 // Arithmetic Shift Right by 8-bit immediate
9175 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9176 %{
9177   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9178   effect(KILL cr);
9179 
9180   format %{ "sarq    $dst, $shift" %}
9181   opcode(0xC1, 0x7); /* C1 /7 ib */
9182   ins_encode(REX_mem_wide(dst), OpcP,
9183              RM_opc_mem(secondary, dst), Con8or32(shift));
9184   ins_pipe(ialu_mem_imm);
9185 %}
9186 
9187 // Arithmetic Shift Right by variable
9188 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9189 %{
9190   match(Set dst (RShiftL dst shift));
9191   effect(KILL cr);
9192 
9193   format %{ "sarq    $dst, $shift" %}
9194   opcode(0xD3, 0x7); /* D3 /7 */
9195   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9196   ins_pipe(ialu_reg_reg);
9197 %}
9198 
9199 // Arithmetic Shift Right by variable
9200 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9201 %{
9202   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9203   effect(KILL cr);
9204 
9205   format %{ "sarq    $dst, $shift" %}
9206   opcode(0xD3, 0x7); /* D3 /7 */
9207   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9208   ins_pipe(ialu_mem_reg);
9209 %}
9210 
9211 // Logical shift right by one
9212 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9213 %{
9214   match(Set dst (URShiftL dst shift));
9215   effect(KILL cr);
9216 
9217   format %{ "shrq    $dst, $shift" %}
9218   opcode(0xD1, 0x5); /* D1 /5 */
9219   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9220   ins_pipe(ialu_reg);
9221 %}
9222 
9223 // Logical shift right by one
9224 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9225 %{
9226   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9227   effect(KILL cr);
9228 
9229   format %{ "shrq    $dst, $shift" %}
9230   opcode(0xD1, 0x5); /* D1 /5 */
9231   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9232   ins_pipe(ialu_mem_imm);
9233 %}
9234 
9235 // Logical Shift Right by 8-bit immediate
9236 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9237 %{
9238   match(Set dst (URShiftL dst shift));
9239   effect(KILL cr);
9240 
9241   format %{ "shrq    $dst, $shift" %}
9242   opcode(0xC1, 0x5); /* C1 /5 ib */
9243   ins_encode(reg_opc_imm_wide(dst, shift));
9244   ins_pipe(ialu_reg);
9245 %}
9246 
9247 
9248 // Logical Shift Right by 8-bit immediate
9249 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9250 %{
9251   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9252   effect(KILL cr);
9253 
9254   format %{ "shrq    $dst, $shift" %}
9255   opcode(0xC1, 0x5); /* C1 /5 ib */
9256   ins_encode(REX_mem_wide(dst), OpcP,
9257              RM_opc_mem(secondary, dst), Con8or32(shift));
9258   ins_pipe(ialu_mem_imm);
9259 %}
9260 
9261 // Logical Shift Right by variable
9262 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9263 %{
9264   match(Set dst (URShiftL dst shift));
9265   effect(KILL cr);
9266 
9267   format %{ "shrq    $dst, $shift" %}
9268   opcode(0xD3, 0x5); /* D3 /5 */
9269   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9270   ins_pipe(ialu_reg_reg);
9271 %}
9272 
9273 // Logical Shift Right by variable
9274 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9275 %{
9276   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9277   effect(KILL cr);
9278 
9279   format %{ "shrq    $dst, $shift" %}
9280   opcode(0xD3, 0x5); /* D3 /5 */
9281   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9282   ins_pipe(ialu_mem_reg);
9283 %}
9284 
9285 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9286 // This idiom is used by the compiler for the i2b bytecode.
9287 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9288 %{
9289   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9290 
9291   format %{ "movsbl  $dst, $src\t# i2b" %}
9292   opcode(0x0F, 0xBE);
9293   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9294   ins_pipe(ialu_reg_reg);
9295 %}
9296 
9297 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9298 // This idiom is used by the compiler the i2s bytecode.
9299 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9300 %{
9301   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9302 
9303   format %{ "movswl  $dst, $src\t# i2s" %}
9304   opcode(0x0F, 0xBF);
9305   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9306   ins_pipe(ialu_reg_reg);
9307 %}
9308 
9309 // ROL/ROR instructions
9310 
9311 // ROL expand
9312 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9313   effect(KILL cr, USE_DEF dst);
9314 
9315   format %{ "roll    $dst" %}
9316   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9317   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9318   ins_pipe(ialu_reg);
9319 %}
9320 
9321 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9322   effect(USE_DEF dst, USE shift, KILL cr);
9323 
9324   format %{ "roll    $dst, $shift" %}
9325   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9326   ins_encode( reg_opc_imm(dst, shift) );
9327   ins_pipe(ialu_reg);
9328 %}
9329 
9330 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9331 %{
9332   effect(USE_DEF dst, USE shift, KILL cr);
9333 
9334   format %{ "roll    $dst, $shift" %}
9335   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9336   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9337   ins_pipe(ialu_reg_reg);
9338 %}
9339 // end of ROL expand
9340 
9341 // Rotate Left by one
9342 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9343 %{
9344   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9345 
9346   expand %{
9347     rolI_rReg_imm1(dst, cr);
9348   %}
9349 %}
9350 
9351 // Rotate Left by 8-bit immediate
9352 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9353 %{
9354   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9355   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9356 
9357   expand %{
9358     rolI_rReg_imm8(dst, lshift, cr);
9359   %}
9360 %}
9361 
9362 // Rotate Left by variable
9363 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9364 %{
9365   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9366 
9367   expand %{
9368     rolI_rReg_CL(dst, shift, cr);
9369   %}
9370 %}
9371 
9372 // Rotate Left by variable
9373 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9374 %{
9375   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9376 
9377   expand %{
9378     rolI_rReg_CL(dst, shift, cr);
9379   %}
9380 %}
9381 
9382 // ROR expand
9383 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9384 %{
9385   effect(USE_DEF dst, KILL cr);
9386 
9387   format %{ "rorl    $dst" %}
9388   opcode(0xD1, 0x1); /* D1 /1 */
9389   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9390   ins_pipe(ialu_reg);
9391 %}
9392 
9393 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9394 %{
9395   effect(USE_DEF dst, USE shift, KILL cr);
9396 
9397   format %{ "rorl    $dst, $shift" %}
9398   opcode(0xC1, 0x1); /* C1 /1 ib */
9399   ins_encode(reg_opc_imm(dst, shift));
9400   ins_pipe(ialu_reg);
9401 %}
9402 
9403 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9404 %{
9405   effect(USE_DEF dst, USE shift, KILL cr);
9406 
9407   format %{ "rorl    $dst, $shift" %}
9408   opcode(0xD3, 0x1); /* D3 /1 */
9409   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9410   ins_pipe(ialu_reg_reg);
9411 %}
9412 // end of ROR expand
9413 
9414 // Rotate Right by one
9415 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9416 %{
9417   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9418 
9419   expand %{
9420     rorI_rReg_imm1(dst, cr);
9421   %}
9422 %}
9423 
9424 // Rotate Right by 8-bit immediate
9425 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9426 %{
9427   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9428   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9429 
9430   expand %{
9431     rorI_rReg_imm8(dst, rshift, cr);
9432   %}
9433 %}
9434 
9435 // Rotate Right by variable
9436 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9437 %{
9438   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9439 
9440   expand %{
9441     rorI_rReg_CL(dst, shift, cr);
9442   %}
9443 %}
9444 
9445 // Rotate Right by variable
9446 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9447 %{
9448   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9449 
9450   expand %{
9451     rorI_rReg_CL(dst, shift, cr);
9452   %}
9453 %}
9454 
9455 // for long rotate
9456 // ROL expand
9457 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9458   effect(USE_DEF dst, KILL cr);
9459 
9460   format %{ "rolq    $dst" %}
9461   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9462   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9463   ins_pipe(ialu_reg);
9464 %}
9465 
9466 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9467   effect(USE_DEF dst, USE shift, KILL cr);
9468 
9469   format %{ "rolq    $dst, $shift" %}
9470   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9471   ins_encode( reg_opc_imm_wide(dst, shift) );
9472   ins_pipe(ialu_reg);
9473 %}
9474 
9475 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9476 %{
9477   effect(USE_DEF dst, USE shift, KILL cr);
9478 
9479   format %{ "rolq    $dst, $shift" %}
9480   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9481   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9482   ins_pipe(ialu_reg_reg);
9483 %}
9484 // end of ROL expand
9485 
9486 // Rotate Left by one
9487 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9488 %{
9489   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9490 
9491   expand %{
9492     rolL_rReg_imm1(dst, cr);
9493   %}
9494 %}
9495 
9496 // Rotate Left by 8-bit immediate
9497 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9498 %{
9499   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9500   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9501 
9502   expand %{
9503     rolL_rReg_imm8(dst, lshift, cr);
9504   %}
9505 %}
9506 
9507 // Rotate Left by variable
9508 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9509 %{
9510   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9511 
9512   expand %{
9513     rolL_rReg_CL(dst, shift, cr);
9514   %}
9515 %}
9516 
9517 // Rotate Left by variable
9518 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9519 %{
9520   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9521 
9522   expand %{
9523     rolL_rReg_CL(dst, shift, cr);
9524   %}
9525 %}
9526 
9527 // ROR expand
9528 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9529 %{
9530   effect(USE_DEF dst, KILL cr);
9531 
9532   format %{ "rorq    $dst" %}
9533   opcode(0xD1, 0x1); /* D1 /1 */
9534   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9535   ins_pipe(ialu_reg);
9536 %}
9537 
9538 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9539 %{
9540   effect(USE_DEF dst, USE shift, KILL cr);
9541 
9542   format %{ "rorq    $dst, $shift" %}
9543   opcode(0xC1, 0x1); /* C1 /1 ib */
9544   ins_encode(reg_opc_imm_wide(dst, shift));
9545   ins_pipe(ialu_reg);
9546 %}
9547 
9548 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9549 %{
9550   effect(USE_DEF dst, USE shift, KILL cr);
9551 
9552   format %{ "rorq    $dst, $shift" %}
9553   opcode(0xD3, 0x1); /* D3 /1 */
9554   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9555   ins_pipe(ialu_reg_reg);
9556 %}
9557 // end of ROR expand
9558 
9559 // Rotate Right by one
9560 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9561 %{
9562   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9563 
9564   expand %{
9565     rorL_rReg_imm1(dst, cr);
9566   %}
9567 %}
9568 
9569 // Rotate Right by 8-bit immediate
9570 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9571 %{
9572   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9573   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9574 
9575   expand %{
9576     rorL_rReg_imm8(dst, rshift, cr);
9577   %}
9578 %}
9579 
9580 // Rotate Right by variable
9581 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9582 %{
9583   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9584 
9585   expand %{
9586     rorL_rReg_CL(dst, shift, cr);
9587   %}
9588 %}
9589 
9590 // Rotate Right by variable
9591 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9592 %{
9593   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9594 
9595   expand %{
9596     rorL_rReg_CL(dst, shift, cr);
9597   %}
9598 %}
9599 
9600 // Logical Instructions
9601 
9602 // Integer Logical Instructions
9603 
9604 // And Instructions
9605 // And Register with Register
9606 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9607 %{
9608   match(Set dst (AndI dst src));
9609   effect(KILL cr);
9610 
9611   format %{ "andl    $dst, $src\t# int" %}
9612   opcode(0x23);
9613   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9614   ins_pipe(ialu_reg_reg);
9615 %}
9616 
9617 // And Register with Immediate 255
9618 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9619 %{
9620   match(Set dst (AndI dst src));
9621 
9622   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9623   opcode(0x0F, 0xB6);
9624   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9625   ins_pipe(ialu_reg);
9626 %}
9627 
9628 // And Register with Immediate 255 and promote to long
9629 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9630 %{
9631   match(Set dst (ConvI2L (AndI src mask)));
9632 
9633   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9634   opcode(0x0F, 0xB6);
9635   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9636   ins_pipe(ialu_reg);
9637 %}
9638 
9639 // And Register with Immediate 65535
9640 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9641 %{
9642   match(Set dst (AndI dst src));
9643 
9644   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9645   opcode(0x0F, 0xB7);
9646   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9647   ins_pipe(ialu_reg);
9648 %}
9649 
9650 // And Register with Immediate 65535 and promote to long
9651 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9652 %{
9653   match(Set dst (ConvI2L (AndI src mask)));
9654 
9655   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9656   opcode(0x0F, 0xB7);
9657   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9658   ins_pipe(ialu_reg);
9659 %}
9660 
9661 // And Register with Immediate
9662 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9663 %{
9664   match(Set dst (AndI dst src));
9665   effect(KILL cr);
9666 
9667   format %{ "andl    $dst, $src\t# int" %}
9668   opcode(0x81, 0x04); /* Opcode 81 /4 */
9669   ins_encode(OpcSErm(dst, src), Con8or32(src));
9670   ins_pipe(ialu_reg);
9671 %}
9672 
9673 // And Register with Memory
9674 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9675 %{
9676   match(Set dst (AndI dst (LoadI src)));
9677   effect(KILL cr);
9678 
9679   ins_cost(125);
9680   format %{ "andl    $dst, $src\t# int" %}
9681   opcode(0x23);
9682   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9683   ins_pipe(ialu_reg_mem);
9684 %}
9685 
9686 // And Memory with Register
9687 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9688 %{
9689   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9690   effect(KILL cr);
9691 
9692   ins_cost(150);
9693   format %{ "andl    $dst, $src\t# int" %}
9694   opcode(0x21); /* Opcode 21 /r */
9695   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9696   ins_pipe(ialu_mem_reg);
9697 %}
9698 
9699 // And Memory with Immediate
9700 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9701 %{
9702   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9703   effect(KILL cr);
9704 
9705   ins_cost(125);
9706   format %{ "andl    $dst, $src\t# int" %}
9707   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9708   ins_encode(REX_mem(dst), OpcSE(src),
9709              RM_opc_mem(secondary, dst), Con8or32(src));
9710   ins_pipe(ialu_mem_imm);
9711 %}
9712 
9713 // Or Instructions
9714 // Or Register with Register
9715 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9716 %{
9717   match(Set dst (OrI dst src));
9718   effect(KILL cr);
9719 
9720   format %{ "orl     $dst, $src\t# int" %}
9721   opcode(0x0B);
9722   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9723   ins_pipe(ialu_reg_reg);
9724 %}
9725 
9726 // Or Register with Immediate
9727 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9728 %{
9729   match(Set dst (OrI dst src));
9730   effect(KILL cr);
9731 
9732   format %{ "orl     $dst, $src\t# int" %}
9733   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9734   ins_encode(OpcSErm(dst, src), Con8or32(src));
9735   ins_pipe(ialu_reg);
9736 %}
9737 
9738 // Or Register with Memory
9739 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9740 %{
9741   match(Set dst (OrI dst (LoadI src)));
9742   effect(KILL cr);
9743 
9744   ins_cost(125);
9745   format %{ "orl     $dst, $src\t# int" %}
9746   opcode(0x0B);
9747   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9748   ins_pipe(ialu_reg_mem);
9749 %}
9750 
9751 // Or Memory with Register
9752 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9753 %{
9754   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9755   effect(KILL cr);
9756 
9757   ins_cost(150);
9758   format %{ "orl     $dst, $src\t# int" %}
9759   opcode(0x09); /* Opcode 09 /r */
9760   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9761   ins_pipe(ialu_mem_reg);
9762 %}
9763 
9764 // Or Memory with Immediate
9765 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9766 %{
9767   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9768   effect(KILL cr);
9769 
9770   ins_cost(125);
9771   format %{ "orl     $dst, $src\t# int" %}
9772   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9773   ins_encode(REX_mem(dst), OpcSE(src),
9774              RM_opc_mem(secondary, dst), Con8or32(src));
9775   ins_pipe(ialu_mem_imm);
9776 %}
9777 
9778 // Xor Instructions
9779 // Xor Register with Register
9780 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9781 %{
9782   match(Set dst (XorI dst src));
9783   effect(KILL cr);
9784 
9785   format %{ "xorl    $dst, $src\t# int" %}
9786   opcode(0x33);
9787   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9788   ins_pipe(ialu_reg_reg);
9789 %}
9790 
9791 // Xor Register with Immediate -1
9792 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9793   match(Set dst (XorI dst imm));
9794 
9795   format %{ "not    $dst" %}
9796   ins_encode %{
9797      __ notl($dst$$Register);
9798   %}
9799   ins_pipe(ialu_reg);
9800 %}
9801 
9802 // Xor Register with Immediate
9803 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9804 %{
9805   match(Set dst (XorI dst src));
9806   effect(KILL cr);
9807 
9808   format %{ "xorl    $dst, $src\t# int" %}
9809   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9810   ins_encode(OpcSErm(dst, src), Con8or32(src));
9811   ins_pipe(ialu_reg);
9812 %}
9813 
9814 // Xor Register with Memory
9815 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9816 %{
9817   match(Set dst (XorI dst (LoadI src)));
9818   effect(KILL cr);
9819 
9820   ins_cost(125);
9821   format %{ "xorl    $dst, $src\t# int" %}
9822   opcode(0x33);
9823   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9824   ins_pipe(ialu_reg_mem);
9825 %}
9826 
9827 // Xor Memory with Register
9828 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9829 %{
9830   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9831   effect(KILL cr);
9832 
9833   ins_cost(150);
9834   format %{ "xorl    $dst, $src\t# int" %}
9835   opcode(0x31); /* Opcode 31 /r */
9836   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9837   ins_pipe(ialu_mem_reg);
9838 %}
9839 
9840 // Xor Memory with Immediate
9841 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9842 %{
9843   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9844   effect(KILL cr);
9845 
9846   ins_cost(125);
9847   format %{ "xorl    $dst, $src\t# int" %}
9848   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9849   ins_encode(REX_mem(dst), OpcSE(src),
9850              RM_opc_mem(secondary, dst), Con8or32(src));
9851   ins_pipe(ialu_mem_imm);
9852 %}
9853 
9854 
9855 // Long Logical Instructions
9856 
9857 // And Instructions
9858 // And Register with Register
9859 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9860 %{
9861   match(Set dst (AndL dst src));
9862   effect(KILL cr);
9863 
9864   format %{ "andq    $dst, $src\t# long" %}
9865   opcode(0x23);
9866   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9867   ins_pipe(ialu_reg_reg);
9868 %}
9869 
9870 // And Register with Immediate 255
9871 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9872 %{
9873   match(Set dst (AndL dst src));
9874 
9875   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9876   opcode(0x0F, 0xB6);
9877   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9878   ins_pipe(ialu_reg);
9879 %}
9880 
9881 // And Register with Immediate 65535
9882 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9883 %{
9884   match(Set dst (AndL dst src));
9885 
9886   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9887   opcode(0x0F, 0xB7);
9888   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9889   ins_pipe(ialu_reg);
9890 %}
9891 
9892 // And Register with Immediate
9893 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9894 %{
9895   match(Set dst (AndL dst src));
9896   effect(KILL cr);
9897 
9898   format %{ "andq    $dst, $src\t# long" %}
9899   opcode(0x81, 0x04); /* Opcode 81 /4 */
9900   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9901   ins_pipe(ialu_reg);
9902 %}
9903 
9904 // And Register with Memory
9905 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9906 %{
9907   match(Set dst (AndL dst (LoadL src)));
9908   effect(KILL cr);
9909 
9910   ins_cost(125);
9911   format %{ "andq    $dst, $src\t# long" %}
9912   opcode(0x23);
9913   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9914   ins_pipe(ialu_reg_mem);
9915 %}
9916 
9917 // And Memory with Register
9918 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9919 %{
9920   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9921   effect(KILL cr);
9922 
9923   ins_cost(150);
9924   format %{ "andq    $dst, $src\t# long" %}
9925   opcode(0x21); /* Opcode 21 /r */
9926   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9927   ins_pipe(ialu_mem_reg);
9928 %}
9929 
9930 // And Memory with Immediate
9931 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9932 %{
9933   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9934   effect(KILL cr);
9935 
9936   ins_cost(125);
9937   format %{ "andq    $dst, $src\t# long" %}
9938   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9939   ins_encode(REX_mem_wide(dst), OpcSE(src),
9940              RM_opc_mem(secondary, dst), Con8or32(src));
9941   ins_pipe(ialu_mem_imm);
9942 %}
9943 
9944 // Or Instructions
9945 // Or Register with Register
9946 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9947 %{
9948   match(Set dst (OrL dst src));
9949   effect(KILL cr);
9950 
9951   format %{ "orq     $dst, $src\t# long" %}
9952   opcode(0x0B);
9953   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9954   ins_pipe(ialu_reg_reg);
9955 %}
9956 
9957 // Use any_RegP to match R15 (TLS register) without spilling.
9958 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9959   match(Set dst (OrL dst (CastP2X src)));
9960   effect(KILL cr);
9961 
9962   format %{ "orq     $dst, $src\t# long" %}
9963   opcode(0x0B);
9964   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9965   ins_pipe(ialu_reg_reg);
9966 %}
9967 
9968 
9969 // Or Register with Immediate
9970 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9971 %{
9972   match(Set dst (OrL dst src));
9973   effect(KILL cr);
9974 
9975   format %{ "orq     $dst, $src\t# long" %}
9976   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9977   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9978   ins_pipe(ialu_reg);
9979 %}
9980 
9981 // Or Register with Memory
9982 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9983 %{
9984   match(Set dst (OrL dst (LoadL src)));
9985   effect(KILL cr);
9986 
9987   ins_cost(125);
9988   format %{ "orq     $dst, $src\t# long" %}
9989   opcode(0x0B);
9990   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9991   ins_pipe(ialu_reg_mem);
9992 %}
9993 
9994 // Or Memory with Register
9995 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9996 %{
9997   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9998   effect(KILL cr);
9999 
10000   ins_cost(150);
10001   format %{ "orq     $dst, $src\t# long" %}
10002   opcode(0x09); /* Opcode 09 /r */
10003   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10004   ins_pipe(ialu_mem_reg);
10005 %}
10006 
10007 // Or Memory with Immediate
10008 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10009 %{
10010   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10011   effect(KILL cr);
10012 
10013   ins_cost(125);
10014   format %{ "orq     $dst, $src\t# long" %}
10015   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10016   ins_encode(REX_mem_wide(dst), OpcSE(src),
10017              RM_opc_mem(secondary, dst), Con8or32(src));
10018   ins_pipe(ialu_mem_imm);
10019 %}
10020 
10021 // Xor Instructions
10022 // Xor Register with Register
10023 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10024 %{
10025   match(Set dst (XorL dst src));
10026   effect(KILL cr);
10027 
10028   format %{ "xorq    $dst, $src\t# long" %}
10029   opcode(0x33);
10030   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10031   ins_pipe(ialu_reg_reg);
10032 %}
10033 
10034 // Xor Register with Immediate -1
10035 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10036   match(Set dst (XorL dst imm));
10037 
10038   format %{ "notq   $dst" %}
10039   ins_encode %{
10040      __ notq($dst$$Register);
10041   %}
10042   ins_pipe(ialu_reg);
10043 %}
10044 
10045 // Xor Register with Immediate
10046 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10047 %{
10048   match(Set dst (XorL dst src));
10049   effect(KILL cr);
10050 
10051   format %{ "xorq    $dst, $src\t# long" %}
10052   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10053   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10054   ins_pipe(ialu_reg);
10055 %}
10056 
10057 // Xor Register with Memory
10058 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10059 %{
10060   match(Set dst (XorL dst (LoadL src)));
10061   effect(KILL cr);
10062 
10063   ins_cost(125);
10064   format %{ "xorq    $dst, $src\t# long" %}
10065   opcode(0x33);
10066   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10067   ins_pipe(ialu_reg_mem);
10068 %}
10069 
10070 // Xor Memory with Register
10071 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10072 %{
10073   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10074   effect(KILL cr);
10075 
10076   ins_cost(150);
10077   format %{ "xorq    $dst, $src\t# long" %}
10078   opcode(0x31); /* Opcode 31 /r */
10079   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10080   ins_pipe(ialu_mem_reg);
10081 %}
10082 
10083 // Xor Memory with Immediate
10084 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10085 %{
10086   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10087   effect(KILL cr);
10088 
10089   ins_cost(125);
10090   format %{ "xorq    $dst, $src\t# long" %}
10091   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10092   ins_encode(REX_mem_wide(dst), OpcSE(src),
10093              RM_opc_mem(secondary, dst), Con8or32(src));
10094   ins_pipe(ialu_mem_imm);
10095 %}
10096 
10097 // Convert Int to Boolean
10098 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10099 %{
10100   match(Set dst (Conv2B src));
10101   effect(KILL cr);
10102 
10103   format %{ "testl   $src, $src\t# ci2b\n\t"
10104             "setnz   $dst\n\t"
10105             "movzbl  $dst, $dst" %}
10106   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10107              setNZ_reg(dst),
10108              REX_reg_breg(dst, dst), // movzbl
10109              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10110   ins_pipe(pipe_slow); // XXX
10111 %}
10112 
10113 // Convert Pointer to Boolean
10114 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10115 %{
10116   match(Set dst (Conv2B src));
10117   effect(KILL cr);
10118 
10119   format %{ "testq   $src, $src\t# cp2b\n\t"
10120             "setnz   $dst\n\t"
10121             "movzbl  $dst, $dst" %}
10122   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10123              setNZ_reg(dst),
10124              REX_reg_breg(dst, dst), // movzbl
10125              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10126   ins_pipe(pipe_slow); // XXX
10127 %}
10128 
10129 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10130 %{
10131   match(Set dst (CmpLTMask p q));
10132   effect(KILL cr);
10133 
10134   ins_cost(400); // XXX
10135   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10136             "setlt   $dst\n\t"
10137             "movzbl  $dst, $dst\n\t"
10138             "negl    $dst" %}
10139   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10140              setLT_reg(dst),
10141              REX_reg_breg(dst, dst), // movzbl
10142              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10143              neg_reg(dst));
10144   ins_pipe(pipe_slow);
10145 %}
10146 
10147 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10148 %{
10149   match(Set dst (CmpLTMask dst zero));
10150   effect(KILL cr);
10151 
10152   ins_cost(100); // XXX
10153   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10154   opcode(0xC1, 0x7);  /* C1 /7 ib */
10155   ins_encode(reg_opc_imm(dst, 0x1F));
10156   ins_pipe(ialu_reg);
10157 %}
10158 
10159 
10160 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
10161 %{
10162   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10163   effect(TEMP tmp, KILL cr);
10164 
10165   ins_cost(400); // XXX
10166   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10167             "sbbl    $tmp, $tmp\n\t"
10168             "andl    $tmp, $y\n\t"
10169             "addl    $p, $tmp" %}
10170   ins_encode %{
10171     Register Rp = $p$$Register;
10172     Register Rq = $q$$Register;
10173     Register Ry = $y$$Register;
10174     Register Rt = $tmp$$Register;
10175     __ subl(Rp, Rq);
10176     __ sbbl(Rt, Rt);
10177     __ andl(Rt, Ry);
10178     __ addl(Rp, Rt);
10179   %}
10180   ins_pipe(pipe_cmplt);
10181 %}
10182 
10183 //---------- FP Instructions------------------------------------------------
10184 
10185 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10186 %{
10187   match(Set cr (CmpF src1 src2));
10188 
10189   ins_cost(145);
10190   format %{ "ucomiss $src1, $src2\n\t"
10191             "jnp,s   exit\n\t"
10192             "pushfq\t# saw NaN, set CF\n\t"
10193             "andq    [rsp], #0xffffff2b\n\t"
10194             "popfq\n"
10195     "exit:   nop\t# avoid branch to branch" %}
10196   opcode(0x0F, 0x2E);
10197   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10198              cmpfp_fixup);
10199   ins_pipe(pipe_slow);
10200 %}
10201 
10202 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10203   match(Set cr (CmpF src1 src2));
10204 
10205   ins_cost(145);
10206   format %{ "ucomiss $src1, $src2" %}
10207   ins_encode %{
10208     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10209   %}
10210   ins_pipe(pipe_slow);
10211 %}
10212 
10213 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10214 %{
10215   match(Set cr (CmpF src1 (LoadF src2)));
10216 
10217   ins_cost(145);
10218   format %{ "ucomiss $src1, $src2\n\t"
10219             "jnp,s   exit\n\t"
10220             "pushfq\t# saw NaN, set CF\n\t"
10221             "andq    [rsp], #0xffffff2b\n\t"
10222             "popfq\n"
10223     "exit:   nop\t# avoid branch to branch" %}
10224   opcode(0x0F, 0x2E);
10225   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10226              cmpfp_fixup);
10227   ins_pipe(pipe_slow);
10228 %}
10229 
10230 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10231   match(Set cr (CmpF src1 (LoadF src2)));
10232 
10233   ins_cost(100);
10234   format %{ "ucomiss $src1, $src2" %}
10235   opcode(0x0F, 0x2E);
10236   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10237   ins_pipe(pipe_slow);
10238 %}
10239 
10240 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10241   match(Set cr (CmpF src con));
10242 
10243   ins_cost(145);
10244   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10245             "jnp,s   exit\n\t"
10246             "pushfq\t# saw NaN, set CF\n\t"
10247             "andq    [rsp], #0xffffff2b\n\t"
10248             "popfq\n"
10249     "exit:   nop\t# avoid branch to branch" %}
10250   ins_encode %{
10251     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10252     emit_cmpfp_fixup(_masm);
10253   %}
10254   ins_pipe(pipe_slow);
10255 %}
10256 
10257 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10258   match(Set cr (CmpF src con));
10259   ins_cost(100);
10260   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10261   ins_encode %{
10262     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10263   %}
10264   ins_pipe(pipe_slow);
10265 %}
10266 
10267 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10268 %{
10269   match(Set cr (CmpD src1 src2));
10270 
10271   ins_cost(145);
10272   format %{ "ucomisd $src1, $src2\n\t"
10273             "jnp,s   exit\n\t"
10274             "pushfq\t# saw NaN, set CF\n\t"
10275             "andq    [rsp], #0xffffff2b\n\t"
10276             "popfq\n"
10277     "exit:   nop\t# avoid branch to branch" %}
10278   opcode(0x66, 0x0F, 0x2E);
10279   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10280              cmpfp_fixup);
10281   ins_pipe(pipe_slow);
10282 %}
10283 
10284 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10285   match(Set cr (CmpD src1 src2));
10286 
10287   ins_cost(100);
10288   format %{ "ucomisd $src1, $src2 test" %}
10289   ins_encode %{
10290     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10291   %}
10292   ins_pipe(pipe_slow);
10293 %}
10294 
10295 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10296 %{
10297   match(Set cr (CmpD src1 (LoadD src2)));
10298 
10299   ins_cost(145);
10300   format %{ "ucomisd $src1, $src2\n\t"
10301             "jnp,s   exit\n\t"
10302             "pushfq\t# saw NaN, set CF\n\t"
10303             "andq    [rsp], #0xffffff2b\n\t"
10304             "popfq\n"
10305     "exit:   nop\t# avoid branch to branch" %}
10306   opcode(0x66, 0x0F, 0x2E);
10307   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10308              cmpfp_fixup);
10309   ins_pipe(pipe_slow);
10310 %}
10311 
10312 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10313   match(Set cr (CmpD src1 (LoadD src2)));
10314 
10315   ins_cost(100);
10316   format %{ "ucomisd $src1, $src2" %}
10317   opcode(0x66, 0x0F, 0x2E);
10318   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10319   ins_pipe(pipe_slow);
10320 %}
10321 
10322 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10323   match(Set cr (CmpD src con));
10324 
10325   ins_cost(145);
10326   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10327             "jnp,s   exit\n\t"
10328             "pushfq\t# saw NaN, set CF\n\t"
10329             "andq    [rsp], #0xffffff2b\n\t"
10330             "popfq\n"
10331     "exit:   nop\t# avoid branch to branch" %}
10332   ins_encode %{
10333     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10334     emit_cmpfp_fixup(_masm);
10335   %}
10336   ins_pipe(pipe_slow);
10337 %}
10338 
10339 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10340   match(Set cr (CmpD src con));
10341   ins_cost(100);
10342   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10343   ins_encode %{
10344     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10345   %}
10346   ins_pipe(pipe_slow);
10347 %}
10348 
10349 // Compare into -1,0,1
10350 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10351 %{
10352   match(Set dst (CmpF3 src1 src2));
10353   effect(KILL cr);
10354 
10355   ins_cost(275);
10356   format %{ "ucomiss $src1, $src2\n\t"
10357             "movl    $dst, #-1\n\t"
10358             "jp,s    done\n\t"
10359             "jb,s    done\n\t"
10360             "setne   $dst\n\t"
10361             "movzbl  $dst, $dst\n"
10362     "done:" %}
10363 
10364   opcode(0x0F, 0x2E);
10365   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10366              cmpfp3(dst));
10367   ins_pipe(pipe_slow);
10368 %}
10369 
10370 // Compare into -1,0,1
10371 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10372 %{
10373   match(Set dst (CmpF3 src1 (LoadF src2)));
10374   effect(KILL cr);
10375 
10376   ins_cost(275);
10377   format %{ "ucomiss $src1, $src2\n\t"
10378             "movl    $dst, #-1\n\t"
10379             "jp,s    done\n\t"
10380             "jb,s    done\n\t"
10381             "setne   $dst\n\t"
10382             "movzbl  $dst, $dst\n"
10383     "done:" %}
10384 
10385   opcode(0x0F, 0x2E);
10386   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10387              cmpfp3(dst));
10388   ins_pipe(pipe_slow);
10389 %}
10390 
10391 // Compare into -1,0,1
10392 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10393   match(Set dst (CmpF3 src con));
10394   effect(KILL cr);
10395 
10396   ins_cost(275);
10397   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10398             "movl    $dst, #-1\n\t"
10399             "jp,s    done\n\t"
10400             "jb,s    done\n\t"
10401             "setne   $dst\n\t"
10402             "movzbl  $dst, $dst\n"
10403     "done:" %}
10404   ins_encode %{
10405     Label L_done;
10406     Register Rdst = $dst$$Register;
10407     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10408     __ movl(Rdst, -1);
10409     __ jcc(Assembler::parity, L_done);
10410     __ jcc(Assembler::below, L_done);
10411     __ setb(Assembler::notEqual, Rdst);
10412     __ movzbl(Rdst, Rdst);
10413     __ bind(L_done);
10414   %}
10415   ins_pipe(pipe_slow);
10416 %}
10417 
10418 // Compare into -1,0,1
10419 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10420 %{
10421   match(Set dst (CmpD3 src1 src2));
10422   effect(KILL cr);
10423 
10424   ins_cost(275);
10425   format %{ "ucomisd $src1, $src2\n\t"
10426             "movl    $dst, #-1\n\t"
10427             "jp,s    done\n\t"
10428             "jb,s    done\n\t"
10429             "setne   $dst\n\t"
10430             "movzbl  $dst, $dst\n"
10431     "done:" %}
10432 
10433   opcode(0x66, 0x0F, 0x2E);
10434   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10435              cmpfp3(dst));
10436   ins_pipe(pipe_slow);
10437 %}
10438 
10439 // Compare into -1,0,1
10440 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10441 %{
10442   match(Set dst (CmpD3 src1 (LoadD src2)));
10443   effect(KILL cr);
10444 
10445   ins_cost(275);
10446   format %{ "ucomisd $src1, $src2\n\t"
10447             "movl    $dst, #-1\n\t"
10448             "jp,s    done\n\t"
10449             "jb,s    done\n\t"
10450             "setne   $dst\n\t"
10451             "movzbl  $dst, $dst\n"
10452     "done:" %}
10453 
10454   opcode(0x66, 0x0F, 0x2E);
10455   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10456              cmpfp3(dst));
10457   ins_pipe(pipe_slow);
10458 %}
10459 
10460 // Compare into -1,0,1
10461 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10462   match(Set dst (CmpD3 src con));
10463   effect(KILL cr);
10464 
10465   ins_cost(275);
10466   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10467             "movl    $dst, #-1\n\t"
10468             "jp,s    done\n\t"
10469             "jb,s    done\n\t"
10470             "setne   $dst\n\t"
10471             "movzbl  $dst, $dst\n"
10472     "done:" %}
10473   ins_encode %{
10474     Register Rdst = $dst$$Register;
10475     Label L_done;
10476     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10477     __ movl(Rdst, -1);
10478     __ jcc(Assembler::parity, L_done);
10479     __ jcc(Assembler::below, L_done);
10480     __ setb(Assembler::notEqual, Rdst);
10481     __ movzbl(Rdst, Rdst);
10482     __ bind(L_done);
10483   %}
10484   ins_pipe(pipe_slow);
10485 %}
10486 
10487 instruct addF_reg(regF dst, regF src)
10488 %{
10489   match(Set dst (AddF dst src));
10490 
10491   format %{ "addss   $dst, $src" %}
10492   ins_cost(150); // XXX
10493   opcode(0xF3, 0x0F, 0x58);
10494   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10495   ins_pipe(pipe_slow);
10496 %}
10497 
10498 instruct addF_mem(regF dst, memory src)
10499 %{
10500   match(Set dst (AddF dst (LoadF src)));
10501 
10502   format %{ "addss   $dst, $src" %}
10503   ins_cost(150); // XXX
10504   opcode(0xF3, 0x0F, 0x58);
10505   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10506   ins_pipe(pipe_slow);
10507 %}
10508 
10509 instruct addF_imm(regF dst, immF con) %{
10510   match(Set dst (AddF dst con));
10511   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10512   ins_cost(150); // XXX
10513   ins_encode %{
10514     __ addss($dst$$XMMRegister, $constantaddress($con));
10515   %}
10516   ins_pipe(pipe_slow);
10517 %}
10518 
10519 instruct addD_reg(regD dst, regD src)
10520 %{
10521   match(Set dst (AddD dst src));
10522 
10523   format %{ "addsd   $dst, $src" %}
10524   ins_cost(150); // XXX
10525   opcode(0xF2, 0x0F, 0x58);
10526   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10527   ins_pipe(pipe_slow);
10528 %}
10529 
10530 instruct addD_mem(regD dst, memory src)
10531 %{
10532   match(Set dst (AddD dst (LoadD src)));
10533 
10534   format %{ "addsd   $dst, $src" %}
10535   ins_cost(150); // XXX
10536   opcode(0xF2, 0x0F, 0x58);
10537   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10538   ins_pipe(pipe_slow);
10539 %}
10540 
10541 instruct addD_imm(regD dst, immD con) %{
10542   match(Set dst (AddD dst con));
10543   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10544   ins_cost(150); // XXX
10545   ins_encode %{
10546     __ addsd($dst$$XMMRegister, $constantaddress($con));
10547   %}
10548   ins_pipe(pipe_slow);
10549 %}
10550 
10551 instruct subF_reg(regF dst, regF src)
10552 %{
10553   match(Set dst (SubF dst src));
10554 
10555   format %{ "subss   $dst, $src" %}
10556   ins_cost(150); // XXX
10557   opcode(0xF3, 0x0F, 0x5C);
10558   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10559   ins_pipe(pipe_slow);
10560 %}
10561 
10562 instruct subF_mem(regF dst, memory src)
10563 %{
10564   match(Set dst (SubF dst (LoadF src)));
10565 
10566   format %{ "subss   $dst, $src" %}
10567   ins_cost(150); // XXX
10568   opcode(0xF3, 0x0F, 0x5C);
10569   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10570   ins_pipe(pipe_slow);
10571 %}
10572 
10573 instruct subF_imm(regF dst, immF con) %{
10574   match(Set dst (SubF dst con));
10575   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10576   ins_cost(150); // XXX
10577   ins_encode %{
10578     __ subss($dst$$XMMRegister, $constantaddress($con));
10579   %}
10580   ins_pipe(pipe_slow);
10581 %}
10582 
10583 instruct subD_reg(regD dst, regD src)
10584 %{
10585   match(Set dst (SubD dst src));
10586 
10587   format %{ "subsd   $dst, $src" %}
10588   ins_cost(150); // XXX
10589   opcode(0xF2, 0x0F, 0x5C);
10590   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10591   ins_pipe(pipe_slow);
10592 %}
10593 
10594 instruct subD_mem(regD dst, memory src)
10595 %{
10596   match(Set dst (SubD dst (LoadD src)));
10597 
10598   format %{ "subsd   $dst, $src" %}
10599   ins_cost(150); // XXX
10600   opcode(0xF2, 0x0F, 0x5C);
10601   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10602   ins_pipe(pipe_slow);
10603 %}
10604 
10605 instruct subD_imm(regD dst, immD con) %{
10606   match(Set dst (SubD dst con));
10607   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10608   ins_cost(150); // XXX
10609   ins_encode %{
10610     __ subsd($dst$$XMMRegister, $constantaddress($con));
10611   %}
10612   ins_pipe(pipe_slow);
10613 %}
10614 
10615 instruct mulF_reg(regF dst, regF src)
10616 %{
10617   match(Set dst (MulF dst src));
10618 
10619   format %{ "mulss   $dst, $src" %}
10620   ins_cost(150); // XXX
10621   opcode(0xF3, 0x0F, 0x59);
10622   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10623   ins_pipe(pipe_slow);
10624 %}
10625 
10626 instruct mulF_mem(regF dst, memory src)
10627 %{
10628   match(Set dst (MulF dst (LoadF src)));
10629 
10630   format %{ "mulss   $dst, $src" %}
10631   ins_cost(150); // XXX
10632   opcode(0xF3, 0x0F, 0x59);
10633   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10634   ins_pipe(pipe_slow);
10635 %}
10636 
10637 instruct mulF_imm(regF dst, immF con) %{
10638   match(Set dst (MulF dst con));
10639   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10640   ins_cost(150); // XXX
10641   ins_encode %{
10642     __ mulss($dst$$XMMRegister, $constantaddress($con));
10643   %}
10644   ins_pipe(pipe_slow);
10645 %}
10646 
10647 instruct mulD_reg(regD dst, regD src)
10648 %{
10649   match(Set dst (MulD dst src));
10650 
10651   format %{ "mulsd   $dst, $src" %}
10652   ins_cost(150); // XXX
10653   opcode(0xF2, 0x0F, 0x59);
10654   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10655   ins_pipe(pipe_slow);
10656 %}
10657 
10658 instruct mulD_mem(regD dst, memory src)
10659 %{
10660   match(Set dst (MulD dst (LoadD src)));
10661 
10662   format %{ "mulsd   $dst, $src" %}
10663   ins_cost(150); // XXX
10664   opcode(0xF2, 0x0F, 0x59);
10665   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10666   ins_pipe(pipe_slow);
10667 %}
10668 
10669 instruct mulD_imm(regD dst, immD con) %{
10670   match(Set dst (MulD dst con));
10671   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10672   ins_cost(150); // XXX
10673   ins_encode %{
10674     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10675   %}
10676   ins_pipe(pipe_slow);
10677 %}
10678 
10679 instruct divF_reg(regF dst, regF src)
10680 %{
10681   match(Set dst (DivF dst src));
10682 
10683   format %{ "divss   $dst, $src" %}
10684   ins_cost(150); // XXX
10685   opcode(0xF3, 0x0F, 0x5E);
10686   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10687   ins_pipe(pipe_slow);
10688 %}
10689 
10690 instruct divF_mem(regF dst, memory src)
10691 %{
10692   match(Set dst (DivF dst (LoadF src)));
10693 
10694   format %{ "divss   $dst, $src" %}
10695   ins_cost(150); // XXX
10696   opcode(0xF3, 0x0F, 0x5E);
10697   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10698   ins_pipe(pipe_slow);
10699 %}
10700 
10701 instruct divF_imm(regF dst, immF con) %{
10702   match(Set dst (DivF dst con));
10703   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10704   ins_cost(150); // XXX
10705   ins_encode %{
10706     __ divss($dst$$XMMRegister, $constantaddress($con));
10707   %}
10708   ins_pipe(pipe_slow);
10709 %}
10710 
10711 instruct divD_reg(regD dst, regD src)
10712 %{
10713   match(Set dst (DivD dst src));
10714 
10715   format %{ "divsd   $dst, $src" %}
10716   ins_cost(150); // XXX
10717   opcode(0xF2, 0x0F, 0x5E);
10718   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10719   ins_pipe(pipe_slow);
10720 %}
10721 
10722 instruct divD_mem(regD dst, memory src)
10723 %{
10724   match(Set dst (DivD dst (LoadD src)));
10725 
10726   format %{ "divsd   $dst, $src" %}
10727   ins_cost(150); // XXX
10728   opcode(0xF2, 0x0F, 0x5E);
10729   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10730   ins_pipe(pipe_slow);
10731 %}
10732 
10733 instruct divD_imm(regD dst, immD con) %{
10734   match(Set dst (DivD dst con));
10735   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10736   ins_cost(150); // XXX
10737   ins_encode %{
10738     __ divsd($dst$$XMMRegister, $constantaddress($con));
10739   %}
10740   ins_pipe(pipe_slow);
10741 %}
10742 
10743 instruct sqrtF_reg(regF dst, regF src)
10744 %{
10745   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10746 
10747   format %{ "sqrtss  $dst, $src" %}
10748   ins_cost(150); // XXX
10749   opcode(0xF3, 0x0F, 0x51);
10750   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10751   ins_pipe(pipe_slow);
10752 %}
10753 
10754 instruct sqrtF_mem(regF dst, memory src)
10755 %{
10756   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10757 
10758   format %{ "sqrtss  $dst, $src" %}
10759   ins_cost(150); // XXX
10760   opcode(0xF3, 0x0F, 0x51);
10761   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10762   ins_pipe(pipe_slow);
10763 %}
10764 
10765 instruct sqrtF_imm(regF dst, immF con) %{
10766   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10767   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10768   ins_cost(150); // XXX
10769   ins_encode %{
10770     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10771   %}
10772   ins_pipe(pipe_slow);
10773 %}
10774 
10775 instruct sqrtD_reg(regD dst, regD src)
10776 %{
10777   match(Set dst (SqrtD src));
10778 
10779   format %{ "sqrtsd  $dst, $src" %}
10780   ins_cost(150); // XXX
10781   opcode(0xF2, 0x0F, 0x51);
10782   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10783   ins_pipe(pipe_slow);
10784 %}
10785 
10786 instruct sqrtD_mem(regD dst, memory src)
10787 %{
10788   match(Set dst (SqrtD (LoadD src)));
10789 
10790   format %{ "sqrtsd  $dst, $src" %}
10791   ins_cost(150); // XXX
10792   opcode(0xF2, 0x0F, 0x51);
10793   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10794   ins_pipe(pipe_slow);
10795 %}
10796 
10797 instruct sqrtD_imm(regD dst, immD con) %{
10798   match(Set dst (SqrtD con));
10799   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10800   ins_cost(150); // XXX
10801   ins_encode %{
10802     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10803   %}
10804   ins_pipe(pipe_slow);
10805 %}
10806 
10807 instruct absF_reg(regF dst)
10808 %{
10809   match(Set dst (AbsF dst));
10810 
10811   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10812   ins_encode(absF_encoding(dst));
10813   ins_pipe(pipe_slow);
10814 %}
10815 
10816 instruct absD_reg(regD dst)
10817 %{
10818   match(Set dst (AbsD dst));
10819 
10820   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10821             "# abs double by sign masking" %}
10822   ins_encode(absD_encoding(dst));
10823   ins_pipe(pipe_slow);
10824 %}
10825 
10826 instruct negF_reg(regF dst)
10827 %{
10828   match(Set dst (NegF dst));
10829 
10830   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10831   ins_encode(negF_encoding(dst));
10832   ins_pipe(pipe_slow);
10833 %}
10834 
10835 instruct negD_reg(regD dst)
10836 %{
10837   match(Set dst (NegD dst));
10838 
10839   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10840             "# neg double by sign flipping" %}
10841   ins_encode(negD_encoding(dst));
10842   ins_pipe(pipe_slow);
10843 %}
10844 
10845 // -----------Trig and Trancendental Instructions------------------------------
10846 instruct cosD_reg(regD dst) %{
10847   match(Set dst (CosD dst));
10848 
10849   format %{ "dcos   $dst\n\t" %}
10850   opcode(0xD9, 0xFF);
10851   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10852   ins_pipe( pipe_slow );
10853 %}
10854 
10855 instruct sinD_reg(regD dst) %{
10856   match(Set dst (SinD dst));
10857 
10858   format %{ "dsin   $dst\n\t" %}
10859   opcode(0xD9, 0xFE);
10860   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10861   ins_pipe( pipe_slow );
10862 %}
10863 
10864 instruct tanD_reg(regD dst) %{
10865   match(Set dst (TanD dst));
10866 
10867   format %{ "dtan   $dst\n\t" %}
10868   ins_encode( Push_SrcXD(dst),
10869               Opcode(0xD9), Opcode(0xF2),   //fptan
10870               Opcode(0xDD), Opcode(0xD8),   //fstp st
10871               Push_ResultXD(dst) );
10872   ins_pipe( pipe_slow );
10873 %}
10874 
10875 instruct log10D_reg(regD dst) %{
10876   // The source and result Double operands in XMM registers
10877   match(Set dst (Log10D dst));
10878   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10879   // fyl2x        ; compute log_10(2) * log_2(x)
10880   format %{ "fldlg2\t\t\t#Log10\n\t"
10881             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10882          %}
10883    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10884               Push_SrcXD(dst),
10885               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10886               Push_ResultXD(dst));
10887 
10888   ins_pipe( pipe_slow );
10889 %}
10890 
10891 instruct logD_reg(regD dst) %{
10892   // The source and result Double operands in XMM registers
10893   match(Set dst (LogD dst));
10894   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10895   // fyl2x        ; compute log_e(2) * log_2(x)
10896   format %{ "fldln2\t\t\t#Log_e\n\t"
10897             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10898          %}
10899   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10900               Push_SrcXD(dst),
10901               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10902               Push_ResultXD(dst));
10903   ins_pipe( pipe_slow );
10904 %}
10905 
10906 
10907 
10908 //----------Arithmetic Conversion Instructions---------------------------------
10909 
10910 instruct roundFloat_nop(regF dst)
10911 %{
10912   match(Set dst (RoundFloat dst));
10913 
10914   ins_cost(0);
10915   ins_encode();
10916   ins_pipe(empty);
10917 %}
10918 
10919 instruct roundDouble_nop(regD dst)
10920 %{
10921   match(Set dst (RoundDouble dst));
10922 
10923   ins_cost(0);
10924   ins_encode();
10925   ins_pipe(empty);
10926 %}
10927 
10928 instruct convF2D_reg_reg(regD dst, regF src)
10929 %{
10930   match(Set dst (ConvF2D src));
10931 
10932   format %{ "cvtss2sd $dst, $src" %}
10933   opcode(0xF3, 0x0F, 0x5A);
10934   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10935   ins_pipe(pipe_slow); // XXX
10936 %}
10937 
10938 instruct convF2D_reg_mem(regD dst, memory src)
10939 %{
10940   match(Set dst (ConvF2D (LoadF src)));
10941 
10942   format %{ "cvtss2sd $dst, $src" %}
10943   opcode(0xF3, 0x0F, 0x5A);
10944   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10945   ins_pipe(pipe_slow); // XXX
10946 %}
10947 
10948 instruct convD2F_reg_reg(regF dst, regD src)
10949 %{
10950   match(Set dst (ConvD2F src));
10951 
10952   format %{ "cvtsd2ss $dst, $src" %}
10953   opcode(0xF2, 0x0F, 0x5A);
10954   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10955   ins_pipe(pipe_slow); // XXX
10956 %}
10957 
10958 instruct convD2F_reg_mem(regF dst, memory src)
10959 %{
10960   match(Set dst (ConvD2F (LoadD src)));
10961 
10962   format %{ "cvtsd2ss $dst, $src" %}
10963   opcode(0xF2, 0x0F, 0x5A);
10964   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10965   ins_pipe(pipe_slow); // XXX
10966 %}
10967 
10968 // XXX do mem variants
10969 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10970 %{
10971   match(Set dst (ConvF2I src));
10972   effect(KILL cr);
10973 
10974   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10975             "cmpl    $dst, #0x80000000\n\t"
10976             "jne,s   done\n\t"
10977             "subq    rsp, #8\n\t"
10978             "movss   [rsp], $src\n\t"
10979             "call    f2i_fixup\n\t"
10980             "popq    $dst\n"
10981     "done:   "%}
10982   opcode(0xF3, 0x0F, 0x2C);
10983   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10984              f2i_fixup(dst, src));
10985   ins_pipe(pipe_slow);
10986 %}
10987 
10988 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10989 %{
10990   match(Set dst (ConvF2L src));
10991   effect(KILL cr);
10992 
10993   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10994             "cmpq    $dst, [0x8000000000000000]\n\t"
10995             "jne,s   done\n\t"
10996             "subq    rsp, #8\n\t"
10997             "movss   [rsp], $src\n\t"
10998             "call    f2l_fixup\n\t"
10999             "popq    $dst\n"
11000     "done:   "%}
11001   opcode(0xF3, 0x0F, 0x2C);
11002   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11003              f2l_fixup(dst, src));
11004   ins_pipe(pipe_slow);
11005 %}
11006 
11007 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11008 %{
11009   match(Set dst (ConvD2I src));
11010   effect(KILL cr);
11011 
11012   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11013             "cmpl    $dst, #0x80000000\n\t"
11014             "jne,s   done\n\t"
11015             "subq    rsp, #8\n\t"
11016             "movsd   [rsp], $src\n\t"
11017             "call    d2i_fixup\n\t"
11018             "popq    $dst\n"
11019     "done:   "%}
11020   opcode(0xF2, 0x0F, 0x2C);
11021   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11022              d2i_fixup(dst, src));
11023   ins_pipe(pipe_slow);
11024 %}
11025 
11026 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11027 %{
11028   match(Set dst (ConvD2L src));
11029   effect(KILL cr);
11030 
11031   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11032             "cmpq    $dst, [0x8000000000000000]\n\t"
11033             "jne,s   done\n\t"
11034             "subq    rsp, #8\n\t"
11035             "movsd   [rsp], $src\n\t"
11036             "call    d2l_fixup\n\t"
11037             "popq    $dst\n"
11038     "done:   "%}
11039   opcode(0xF2, 0x0F, 0x2C);
11040   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11041              d2l_fixup(dst, src));
11042   ins_pipe(pipe_slow);
11043 %}
11044 
11045 instruct convI2F_reg_reg(regF dst, rRegI src)
11046 %{
11047   predicate(!UseXmmI2F);
11048   match(Set dst (ConvI2F src));
11049 
11050   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11051   opcode(0xF3, 0x0F, 0x2A);
11052   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11053   ins_pipe(pipe_slow); // XXX
11054 %}
11055 
11056 instruct convI2F_reg_mem(regF dst, memory src)
11057 %{
11058   match(Set dst (ConvI2F (LoadI src)));
11059 
11060   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11061   opcode(0xF3, 0x0F, 0x2A);
11062   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11063   ins_pipe(pipe_slow); // XXX
11064 %}
11065 
11066 instruct convI2D_reg_reg(regD dst, rRegI src)
11067 %{
11068   predicate(!UseXmmI2D);
11069   match(Set dst (ConvI2D src));
11070 
11071   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11072   opcode(0xF2, 0x0F, 0x2A);
11073   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11074   ins_pipe(pipe_slow); // XXX
11075 %}
11076 
11077 instruct convI2D_reg_mem(regD dst, memory src)
11078 %{
11079   match(Set dst (ConvI2D (LoadI src)));
11080 
11081   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11082   opcode(0xF2, 0x0F, 0x2A);
11083   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11084   ins_pipe(pipe_slow); // XXX
11085 %}
11086 
11087 instruct convXI2F_reg(regF dst, rRegI src)
11088 %{
11089   predicate(UseXmmI2F);
11090   match(Set dst (ConvI2F src));
11091 
11092   format %{ "movdl $dst, $src\n\t"
11093             "cvtdq2psl $dst, $dst\t# i2f" %}
11094   ins_encode %{
11095     __ movdl($dst$$XMMRegister, $src$$Register);
11096     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11097   %}
11098   ins_pipe(pipe_slow); // XXX
11099 %}
11100 
11101 instruct convXI2D_reg(regD dst, rRegI src)
11102 %{
11103   predicate(UseXmmI2D);
11104   match(Set dst (ConvI2D src));
11105 
11106   format %{ "movdl $dst, $src\n\t"
11107             "cvtdq2pdl $dst, $dst\t# i2d" %}
11108   ins_encode %{
11109     __ movdl($dst$$XMMRegister, $src$$Register);
11110     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11111   %}
11112   ins_pipe(pipe_slow); // XXX
11113 %}
11114 
11115 instruct convL2F_reg_reg(regF dst, rRegL src)
11116 %{
11117   match(Set dst (ConvL2F src));
11118 
11119   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11120   opcode(0xF3, 0x0F, 0x2A);
11121   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11122   ins_pipe(pipe_slow); // XXX
11123 %}
11124 
11125 instruct convL2F_reg_mem(regF dst, memory src)
11126 %{
11127   match(Set dst (ConvL2F (LoadL src)));
11128 
11129   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11130   opcode(0xF3, 0x0F, 0x2A);
11131   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11132   ins_pipe(pipe_slow); // XXX
11133 %}
11134 
11135 instruct convL2D_reg_reg(regD dst, rRegL src)
11136 %{
11137   match(Set dst (ConvL2D src));
11138 
11139   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11140   opcode(0xF2, 0x0F, 0x2A);
11141   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11142   ins_pipe(pipe_slow); // XXX
11143 %}
11144 
11145 instruct convL2D_reg_mem(regD dst, memory src)
11146 %{
11147   match(Set dst (ConvL2D (LoadL src)));
11148 
11149   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11150   opcode(0xF2, 0x0F, 0x2A);
11151   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11152   ins_pipe(pipe_slow); // XXX
11153 %}
11154 
11155 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11156 %{
11157   match(Set dst (ConvI2L src));
11158 
11159   ins_cost(125);
11160   format %{ "movslq  $dst, $src\t# i2l" %}
11161   ins_encode %{
11162     __ movslq($dst$$Register, $src$$Register);
11163   %}
11164   ins_pipe(ialu_reg_reg);
11165 %}
11166 
11167 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11168 // %{
11169 //   match(Set dst (ConvI2L src));
11170 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11171 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11172 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11173 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11174 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11175 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11176 
11177 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11178 //   ins_encode(enc_copy(dst, src));
11179 // //   opcode(0x63); // needs REX.W
11180 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11181 //   ins_pipe(ialu_reg_reg);
11182 // %}
11183 
11184 // Zero-extend convert int to long
11185 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11186 %{
11187   match(Set dst (AndL (ConvI2L src) mask));
11188 
11189   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11190   ins_encode(enc_copy(dst, src));
11191   ins_pipe(ialu_reg_reg);
11192 %}
11193 
11194 // Zero-extend convert int to long
11195 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11196 %{
11197   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11198 
11199   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11200   opcode(0x8B);
11201   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11202   ins_pipe(ialu_reg_mem);
11203 %}
11204 
11205 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11206 %{
11207   match(Set dst (AndL src mask));
11208 
11209   format %{ "movl    $dst, $src\t# zero-extend long" %}
11210   ins_encode(enc_copy_always(dst, src));
11211   ins_pipe(ialu_reg_reg);
11212 %}
11213 
11214 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11215 %{
11216   match(Set dst (ConvL2I src));
11217 
11218   format %{ "movl    $dst, $src\t# l2i" %}
11219   ins_encode(enc_copy_always(dst, src));
11220   ins_pipe(ialu_reg_reg);
11221 %}
11222 
11223 
11224 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11225   match(Set dst (MoveF2I src));
11226   effect(DEF dst, USE src);
11227 
11228   ins_cost(125);
11229   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11230   opcode(0x8B);
11231   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11232   ins_pipe(ialu_reg_mem);
11233 %}
11234 
11235 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11236   match(Set dst (MoveI2F src));
11237   effect(DEF dst, USE src);
11238 
11239   ins_cost(125);
11240   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11241   opcode(0xF3, 0x0F, 0x10);
11242   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11243   ins_pipe(pipe_slow);
11244 %}
11245 
11246 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11247   match(Set dst (MoveD2L src));
11248   effect(DEF dst, USE src);
11249 
11250   ins_cost(125);
11251   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11252   opcode(0x8B);
11253   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11254   ins_pipe(ialu_reg_mem);
11255 %}
11256 
11257 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11258   predicate(!UseXmmLoadAndClearUpper);
11259   match(Set dst (MoveL2D src));
11260   effect(DEF dst, USE src);
11261 
11262   ins_cost(125);
11263   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11264   opcode(0x66, 0x0F, 0x12);
11265   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11266   ins_pipe(pipe_slow);
11267 %}
11268 
11269 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11270   predicate(UseXmmLoadAndClearUpper);
11271   match(Set dst (MoveL2D src));
11272   effect(DEF dst, USE src);
11273 
11274   ins_cost(125);
11275   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11276   opcode(0xF2, 0x0F, 0x10);
11277   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11278   ins_pipe(pipe_slow);
11279 %}
11280 
11281 
11282 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11283   match(Set dst (MoveF2I src));
11284   effect(DEF dst, USE src);
11285 
11286   ins_cost(95); // XXX
11287   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11288   opcode(0xF3, 0x0F, 0x11);
11289   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11290   ins_pipe(pipe_slow);
11291 %}
11292 
11293 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11294   match(Set dst (MoveI2F src));
11295   effect(DEF dst, USE src);
11296 
11297   ins_cost(100);
11298   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11299   opcode(0x89);
11300   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11301   ins_pipe( ialu_mem_reg );
11302 %}
11303 
11304 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11305   match(Set dst (MoveD2L src));
11306   effect(DEF dst, USE src);
11307 
11308   ins_cost(95); // XXX
11309   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11310   opcode(0xF2, 0x0F, 0x11);
11311   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11312   ins_pipe(pipe_slow);
11313 %}
11314 
11315 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11316   match(Set dst (MoveL2D src));
11317   effect(DEF dst, USE src);
11318 
11319   ins_cost(100);
11320   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11321   opcode(0x89);
11322   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11323   ins_pipe(ialu_mem_reg);
11324 %}
11325 
11326 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11327   match(Set dst (MoveF2I src));
11328   effect(DEF dst, USE src);
11329   ins_cost(85);
11330   format %{ "movd    $dst,$src\t# MoveF2I" %}
11331   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11332   ins_pipe( pipe_slow );
11333 %}
11334 
11335 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11336   match(Set dst (MoveD2L src));
11337   effect(DEF dst, USE src);
11338   ins_cost(85);
11339   format %{ "movd    $dst,$src\t# MoveD2L" %}
11340   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11341   ins_pipe( pipe_slow );
11342 %}
11343 
11344 // The next instructions have long latency and use Int unit. Set high cost.
11345 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11346   match(Set dst (MoveI2F src));
11347   effect(DEF dst, USE src);
11348   ins_cost(300);
11349   format %{ "movd    $dst,$src\t# MoveI2F" %}
11350   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11351   ins_pipe( pipe_slow );
11352 %}
11353 
11354 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11355   match(Set dst (MoveL2D src));
11356   effect(DEF dst, USE src);
11357   ins_cost(300);
11358   format %{ "movd    $dst,$src\t# MoveL2D" %}
11359   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11360   ins_pipe( pipe_slow );
11361 %}
11362 
11363 // Replicate scalar to packed byte (1 byte) values in xmm
11364 instruct Repl8B_reg(regD dst, regD src) %{
11365   match(Set dst (Replicate8B src));
11366   format %{ "MOVDQA  $dst,$src\n\t"
11367             "PUNPCKLBW $dst,$dst\n\t"
11368             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11369   ins_encode( pshufd_8x8(dst, src));
11370   ins_pipe( pipe_slow );
11371 %}
11372 
11373 // Replicate scalar to packed byte (1 byte) values in xmm
11374 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11375   match(Set dst (Replicate8B src));
11376   format %{ "MOVD    $dst,$src\n\t"
11377             "PUNPCKLBW $dst,$dst\n\t"
11378             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11379   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 // Replicate scalar zero to packed byte (1 byte) values in xmm
11384 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11385   match(Set dst (Replicate8B zero));
11386   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11387   ins_encode( pxor(dst, dst));
11388   ins_pipe( fpu_reg_reg );
11389 %}
11390 
11391 // Replicate scalar to packed shore (2 byte) values in xmm
11392 instruct Repl4S_reg(regD dst, regD src) %{
11393   match(Set dst (Replicate4S src));
11394   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11395   ins_encode( pshufd_4x16(dst, src));
11396   ins_pipe( fpu_reg_reg );
11397 %}
11398 
11399 // Replicate scalar to packed shore (2 byte) values in xmm
11400 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11401   match(Set dst (Replicate4S src));
11402   format %{ "MOVD    $dst,$src\n\t"
11403             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11404   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11405   ins_pipe( fpu_reg_reg );
11406 %}
11407 
11408 // Replicate scalar zero to packed short (2 byte) values in xmm
11409 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11410   match(Set dst (Replicate4S zero));
11411   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11412   ins_encode( pxor(dst, dst));
11413   ins_pipe( fpu_reg_reg );
11414 %}
11415 
11416 // Replicate scalar to packed char (2 byte) values in xmm
11417 instruct Repl4C_reg(regD dst, regD src) %{
11418   match(Set dst (Replicate4C src));
11419   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11420   ins_encode( pshufd_4x16(dst, src));
11421   ins_pipe( fpu_reg_reg );
11422 %}
11423 
11424 // Replicate scalar to packed char (2 byte) values in xmm
11425 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11426   match(Set dst (Replicate4C src));
11427   format %{ "MOVD    $dst,$src\n\t"
11428             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11429   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11430   ins_pipe( fpu_reg_reg );
11431 %}
11432 
11433 // Replicate scalar zero to packed char (2 byte) values in xmm
11434 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11435   match(Set dst (Replicate4C zero));
11436   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11437   ins_encode( pxor(dst, dst));
11438   ins_pipe( fpu_reg_reg );
11439 %}
11440 
11441 // Replicate scalar to packed integer (4 byte) values in xmm
11442 instruct Repl2I_reg(regD dst, regD src) %{
11443   match(Set dst (Replicate2I src));
11444   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11445   ins_encode( pshufd(dst, src, 0x00));
11446   ins_pipe( fpu_reg_reg );
11447 %}
11448 
11449 // Replicate scalar to packed integer (4 byte) values in xmm
11450 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11451   match(Set dst (Replicate2I src));
11452   format %{ "MOVD   $dst,$src\n\t"
11453             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11454   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11455   ins_pipe( fpu_reg_reg );
11456 %}
11457 
11458 // Replicate scalar zero to packed integer (2 byte) values in xmm
11459 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11460   match(Set dst (Replicate2I zero));
11461   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11462   ins_encode( pxor(dst, dst));
11463   ins_pipe( fpu_reg_reg );
11464 %}
11465 
11466 // Replicate scalar to packed single precision floating point values in xmm
11467 instruct Repl2F_reg(regD dst, regD src) %{
11468   match(Set dst (Replicate2F src));
11469   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11470   ins_encode( pshufd(dst, src, 0xe0));
11471   ins_pipe( fpu_reg_reg );
11472 %}
11473 
11474 // Replicate scalar to packed single precision floating point values in xmm
11475 instruct Repl2F_regF(regD dst, regF src) %{
11476   match(Set dst (Replicate2F src));
11477   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11478   ins_encode( pshufd(dst, src, 0xe0));
11479   ins_pipe( fpu_reg_reg );
11480 %}
11481 
11482 // Replicate scalar to packed single precision floating point values in xmm
11483 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11484   match(Set dst (Replicate2F zero));
11485   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11486   ins_encode( pxor(dst, dst));
11487   ins_pipe( fpu_reg_reg );
11488 %}
11489 
11490 
11491 // =======================================================================
11492 // fast clearing of an array
11493 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11494                   rFlagsReg cr)
11495 %{
11496   match(Set dummy (ClearArray cnt base));
11497   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11498 
11499   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11500             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11501   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11502              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11503   ins_pipe(pipe_slow);
11504 %}
11505 
11506 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11507                         rax_RegI result, regD tmp1, rFlagsReg cr)
11508 %{
11509   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11510   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11511 
11512   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11513   ins_encode %{
11514     __ string_compare($str1$$Register, $str2$$Register,
11515                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11516                       $tmp1$$XMMRegister);
11517   %}
11518   ins_pipe( pipe_slow );
11519 %}
11520 
11521 // fast search of substring with known size.
11522 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11523                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11524 %{
11525   predicate(UseSSE42Intrinsics);
11526   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11527   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11528 
11529   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11530   ins_encode %{
11531     int icnt2 = (int)$int_cnt2$$constant;
11532     if (icnt2 >= 8) {
11533       // IndexOf for constant substrings with size >= 8 elements
11534       // which don't need to be loaded through stack.
11535       __ string_indexofC8($str1$$Register, $str2$$Register,
11536                           $cnt1$$Register, $cnt2$$Register,
11537                           icnt2, $result$$Register,
11538                           $vec$$XMMRegister, $tmp$$Register);
11539     } else {
11540       // Small strings are loaded through stack if they cross page boundary.
11541       __ string_indexof($str1$$Register, $str2$$Register,
11542                         $cnt1$$Register, $cnt2$$Register,
11543                         icnt2, $result$$Register,
11544                         $vec$$XMMRegister, $tmp$$Register);
11545     }
11546   %}
11547   ins_pipe( pipe_slow );
11548 %}
11549 
11550 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11551                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11552 %{
11553   predicate(UseSSE42Intrinsics);
11554   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11555   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11556 
11557   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11558   ins_encode %{
11559     __ string_indexof($str1$$Register, $str2$$Register,
11560                       $cnt1$$Register, $cnt2$$Register,
11561                       (-1), $result$$Register,
11562                       $vec$$XMMRegister, $tmp$$Register);
11563   %}
11564   ins_pipe( pipe_slow );
11565 %}
11566 
11567 // fast string equals
11568 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11569                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11570 %{
11571   match(Set result (StrEquals (Binary str1 str2) cnt));
11572   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11573 
11574   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11575   ins_encode %{
11576     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11577                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11578                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11579   %}
11580   ins_pipe( pipe_slow );
11581 %}
11582 
11583 // fast array equals
11584 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11585                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11586 %{
11587   match(Set result (AryEq ary1 ary2));
11588   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11589   //ins_cost(300);
11590 
11591   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11592   ins_encode %{
11593     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11594                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11595                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11596   %}
11597   ins_pipe( pipe_slow );
11598 %}
11599 
11600 //----------Control Flow Instructions------------------------------------------
11601 // Signed compare Instructions
11602 
11603 // XXX more variants!!
11604 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11605 %{
11606   match(Set cr (CmpI op1 op2));
11607   effect(DEF cr, USE op1, USE op2);
11608 
11609   format %{ "cmpl    $op1, $op2" %}
11610   opcode(0x3B);  /* Opcode 3B /r */
11611   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11612   ins_pipe(ialu_cr_reg_reg);
11613 %}
11614 
11615 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11616 %{
11617   match(Set cr (CmpI op1 op2));
11618 
11619   format %{ "cmpl    $op1, $op2" %}
11620   opcode(0x81, 0x07); /* Opcode 81 /7 */
11621   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11622   ins_pipe(ialu_cr_reg_imm);
11623 %}
11624 
11625 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11626 %{
11627   match(Set cr (CmpI op1 (LoadI op2)));
11628 
11629   ins_cost(500); // XXX
11630   format %{ "cmpl    $op1, $op2" %}
11631   opcode(0x3B); /* Opcode 3B /r */
11632   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11633   ins_pipe(ialu_cr_reg_mem);
11634 %}
11635 
11636 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11637 %{
11638   match(Set cr (CmpI src zero));
11639 
11640   format %{ "testl   $src, $src" %}
11641   opcode(0x85);
11642   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11643   ins_pipe(ialu_cr_reg_imm);
11644 %}
11645 
11646 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11647 %{
11648   match(Set cr (CmpI (AndI src con) zero));
11649 
11650   format %{ "testl   $src, $con" %}
11651   opcode(0xF7, 0x00);
11652   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11653   ins_pipe(ialu_cr_reg_imm);
11654 %}
11655 
11656 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11657 %{
11658   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11659 
11660   format %{ "testl   $src, $mem" %}
11661   opcode(0x85);
11662   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11663   ins_pipe(ialu_cr_reg_mem);
11664 %}
11665 
11666 // Unsigned compare Instructions; really, same as signed except they
11667 // produce an rFlagsRegU instead of rFlagsReg.
11668 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11669 %{
11670   match(Set cr (CmpU op1 op2));
11671 
11672   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11673   opcode(0x3B); /* Opcode 3B /r */
11674   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11675   ins_pipe(ialu_cr_reg_reg);
11676 %}
11677 
11678 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11679 %{
11680   match(Set cr (CmpU op1 op2));
11681 
11682   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11683   opcode(0x81,0x07); /* Opcode 81 /7 */
11684   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11685   ins_pipe(ialu_cr_reg_imm);
11686 %}
11687 
11688 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11689 %{
11690   match(Set cr (CmpU op1 (LoadI op2)));
11691 
11692   ins_cost(500); // XXX
11693   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11694   opcode(0x3B); /* Opcode 3B /r */
11695   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11696   ins_pipe(ialu_cr_reg_mem);
11697 %}
11698 
11699 // // // Cisc-spilled version of cmpU_rReg
11700 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11701 // //%{
11702 // //  match(Set cr (CmpU (LoadI op1) op2));
11703 // //
11704 // //  format %{ "CMPu   $op1,$op2" %}
11705 // //  ins_cost(500);
11706 // //  opcode(0x39);  /* Opcode 39 /r */
11707 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11708 // //%}
11709 
11710 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11711 %{
11712   match(Set cr (CmpU src zero));
11713 
11714   format %{ "testl  $src, $src\t# unsigned" %}
11715   opcode(0x85);
11716   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11717   ins_pipe(ialu_cr_reg_imm);
11718 %}
11719 
11720 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11721 %{
11722   match(Set cr (CmpP op1 op2));
11723 
11724   format %{ "cmpq    $op1, $op2\t# ptr" %}
11725   opcode(0x3B); /* Opcode 3B /r */
11726   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11727   ins_pipe(ialu_cr_reg_reg);
11728 %}
11729 
11730 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11731 %{
11732   match(Set cr (CmpP op1 (LoadP op2)));
11733 
11734   ins_cost(500); // XXX
11735   format %{ "cmpq    $op1, $op2\t# ptr" %}
11736   opcode(0x3B); /* Opcode 3B /r */
11737   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11738   ins_pipe(ialu_cr_reg_mem);
11739 %}
11740 
11741 // // // Cisc-spilled version of cmpP_rReg
11742 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11743 // //%{
11744 // //  match(Set cr (CmpP (LoadP op1) op2));
11745 // //
11746 // //  format %{ "CMPu   $op1,$op2" %}
11747 // //  ins_cost(500);
11748 // //  opcode(0x39);  /* Opcode 39 /r */
11749 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11750 // //%}
11751 
11752 // XXX this is generalized by compP_rReg_mem???
11753 // Compare raw pointer (used in out-of-heap check).
11754 // Only works because non-oop pointers must be raw pointers
11755 // and raw pointers have no anti-dependencies.
11756 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11757 %{
11758   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11759   match(Set cr (CmpP op1 (LoadP op2)));
11760 
11761   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11762   opcode(0x3B); /* Opcode 3B /r */
11763   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11764   ins_pipe(ialu_cr_reg_mem);
11765 %}
11766 
11767 // This will generate a signed flags result. This should be OK since
11768 // any compare to a zero should be eq/neq.
11769 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11770 %{
11771   match(Set cr (CmpP src zero));
11772 
11773   format %{ "testq   $src, $src\t# ptr" %}
11774   opcode(0x85);
11775   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11776   ins_pipe(ialu_cr_reg_imm);
11777 %}
11778 
11779 // This will generate a signed flags result. This should be OK since
11780 // any compare to a zero should be eq/neq.
11781 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11782 %{
11783   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11784   match(Set cr (CmpP (LoadP op) zero));
11785 
11786   ins_cost(500); // XXX
11787   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11788   opcode(0xF7); /* Opcode F7 /0 */
11789   ins_encode(REX_mem_wide(op),
11790              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11791   ins_pipe(ialu_cr_reg_imm);
11792 %}
11793 
11794 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11795 %{
11796   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11797   match(Set cr (CmpP (LoadP mem) zero));
11798 
11799   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11800   ins_encode %{
11801     __ cmpq(r12, $mem$$Address);
11802   %}
11803   ins_pipe(ialu_cr_reg_mem);
11804 %}
11805 
11806 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11807 %{
11808   match(Set cr (CmpN op1 op2));
11809 
11810   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11811   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11812   ins_pipe(ialu_cr_reg_reg);
11813 %}
11814 
11815 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11816 %{
11817   match(Set cr (CmpN src (LoadN mem)));
11818 
11819   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11820   ins_encode %{
11821     __ cmpl($src$$Register, $mem$$Address);
11822   %}
11823   ins_pipe(ialu_cr_reg_mem);
11824 %}
11825 
11826 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11827   match(Set cr (CmpN op1 op2));
11828 
11829   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11830   ins_encode %{
11831     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11832   %}
11833   ins_pipe(ialu_cr_reg_imm);
11834 %}
11835 
11836 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11837 %{
11838   match(Set cr (CmpN src (LoadN mem)));
11839 
11840   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11841   ins_encode %{
11842     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11843   %}
11844   ins_pipe(ialu_cr_reg_mem);
11845 %}
11846 
11847 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11848   match(Set cr (CmpN src zero));
11849 
11850   format %{ "testl   $src, $src\t# compressed ptr" %}
11851   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11852   ins_pipe(ialu_cr_reg_imm);
11853 %}
11854 
11855 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11856 %{
11857   predicate(Universe::narrow_oop_base() != NULL);
11858   match(Set cr (CmpN (LoadN mem) zero));
11859 
11860   ins_cost(500); // XXX
11861   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11862   ins_encode %{
11863     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11864   %}
11865   ins_pipe(ialu_cr_reg_mem);
11866 %}
11867 
11868 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11869 %{
11870   predicate(Universe::narrow_oop_base() == NULL);
11871   match(Set cr (CmpN (LoadN mem) zero));
11872 
11873   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11874   ins_encode %{
11875     __ cmpl(r12, $mem$$Address);
11876   %}
11877   ins_pipe(ialu_cr_reg_mem);
11878 %}
11879 
11880 // Yanked all unsigned pointer compare operations.
11881 // Pointer compares are done with CmpP which is already unsigned.
11882 
11883 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11884 %{
11885   match(Set cr (CmpL op1 op2));
11886 
11887   format %{ "cmpq    $op1, $op2" %}
11888   opcode(0x3B);  /* Opcode 3B /r */
11889   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11890   ins_pipe(ialu_cr_reg_reg);
11891 %}
11892 
11893 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11894 %{
11895   match(Set cr (CmpL op1 op2));
11896 
11897   format %{ "cmpq    $op1, $op2" %}
11898   opcode(0x81, 0x07); /* Opcode 81 /7 */
11899   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11900   ins_pipe(ialu_cr_reg_imm);
11901 %}
11902 
11903 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11904 %{
11905   match(Set cr (CmpL op1 (LoadL op2)));
11906 
11907   format %{ "cmpq    $op1, $op2" %}
11908   opcode(0x3B); /* Opcode 3B /r */
11909   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11910   ins_pipe(ialu_cr_reg_mem);
11911 %}
11912 
11913 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11914 %{
11915   match(Set cr (CmpL src zero));
11916 
11917   format %{ "testq   $src, $src" %}
11918   opcode(0x85);
11919   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11920   ins_pipe(ialu_cr_reg_imm);
11921 %}
11922 
11923 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11924 %{
11925   match(Set cr (CmpL (AndL src con) zero));
11926 
11927   format %{ "testq   $src, $con\t# long" %}
11928   opcode(0xF7, 0x00);
11929   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11930   ins_pipe(ialu_cr_reg_imm);
11931 %}
11932 
11933 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11934 %{
11935   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11936 
11937   format %{ "testq   $src, $mem" %}
11938   opcode(0x85);
11939   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11940   ins_pipe(ialu_cr_reg_mem);
11941 %}
11942 
11943 // Manifest a CmpL result in an integer register.  Very painful.
11944 // This is the test to avoid.
11945 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11946 %{
11947   match(Set dst (CmpL3 src1 src2));
11948   effect(KILL flags);
11949 
11950   ins_cost(275); // XXX
11951   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11952             "movl    $dst, -1\n\t"
11953             "jl,s    done\n\t"
11954             "setne   $dst\n\t"
11955             "movzbl  $dst, $dst\n\t"
11956     "done:" %}
11957   ins_encode(cmpl3_flag(src1, src2, dst));
11958   ins_pipe(pipe_slow);
11959 %}
11960 
11961 //----------Max and Min--------------------------------------------------------
11962 // Min Instructions
11963 
11964 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11965 %{
11966   effect(USE_DEF dst, USE src, USE cr);
11967 
11968   format %{ "cmovlgt $dst, $src\t# min" %}
11969   opcode(0x0F, 0x4F);
11970   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11971   ins_pipe(pipe_cmov_reg);
11972 %}
11973 
11974 
11975 instruct minI_rReg(rRegI dst, rRegI src)
11976 %{
11977   match(Set dst (MinI dst src));
11978 
11979   ins_cost(200);
11980   expand %{
11981     rFlagsReg cr;
11982     compI_rReg(cr, dst, src);
11983     cmovI_reg_g(dst, src, cr);
11984   %}
11985 %}
11986 
11987 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11988 %{
11989   effect(USE_DEF dst, USE src, USE cr);
11990 
11991   format %{ "cmovllt $dst, $src\t# max" %}
11992   opcode(0x0F, 0x4C);
11993   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11994   ins_pipe(pipe_cmov_reg);
11995 %}
11996 
11997 
11998 instruct maxI_rReg(rRegI dst, rRegI src)
11999 %{
12000   match(Set dst (MaxI dst src));
12001 
12002   ins_cost(200);
12003   expand %{
12004     rFlagsReg cr;
12005     compI_rReg(cr, dst, src);
12006     cmovI_reg_l(dst, src, cr);
12007   %}
12008 %}
12009 
12010 // ============================================================================
12011 // Branch Instructions
12012 
12013 // Jump Direct - Label defines a relative address from JMP+1
12014 instruct jmpDir(label labl)
12015 %{
12016   match(Goto);
12017   effect(USE labl);
12018 
12019   ins_cost(300);
12020   format %{ "jmp     $labl" %}
12021   size(5);
12022   opcode(0xE9);
12023   ins_encode(OpcP, Lbl(labl));
12024   ins_pipe(pipe_jmp);
12025   ins_pc_relative(1);
12026 %}
12027 
12028 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12029 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12030 %{
12031   match(If cop cr);
12032   effect(USE labl);
12033 
12034   ins_cost(300);
12035   format %{ "j$cop     $labl" %}
12036   size(6);
12037   opcode(0x0F, 0x80);
12038   ins_encode(Jcc(cop, labl));
12039   ins_pipe(pipe_jcc);
12040   ins_pc_relative(1);
12041 %}
12042 
12043 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12044 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12045 %{
12046   match(CountedLoopEnd cop cr);
12047   effect(USE labl);
12048 
12049   ins_cost(300);
12050   format %{ "j$cop     $labl\t# loop end" %}
12051   size(6);
12052   opcode(0x0F, 0x80);
12053   ins_encode(Jcc(cop, labl));
12054   ins_pipe(pipe_jcc);
12055   ins_pc_relative(1);
12056 %}
12057 
12058 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12059 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12060   match(CountedLoopEnd cop cmp);
12061   effect(USE labl);
12062 
12063   ins_cost(300);
12064   format %{ "j$cop,u   $labl\t# loop end" %}
12065   size(6);
12066   opcode(0x0F, 0x80);
12067   ins_encode(Jcc(cop, labl));
12068   ins_pipe(pipe_jcc);
12069   ins_pc_relative(1);
12070 %}
12071 
12072 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12073   match(CountedLoopEnd cop cmp);
12074   effect(USE labl);
12075 
12076   ins_cost(200);
12077   format %{ "j$cop,u   $labl\t# loop end" %}
12078   size(6);
12079   opcode(0x0F, 0x80);
12080   ins_encode(Jcc(cop, labl));
12081   ins_pipe(pipe_jcc);
12082   ins_pc_relative(1);
12083 %}
12084 
12085 // Jump Direct Conditional - using unsigned comparison
12086 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12087   match(If cop cmp);
12088   effect(USE labl);
12089 
12090   ins_cost(300);
12091   format %{ "j$cop,u  $labl" %}
12092   size(6);
12093   opcode(0x0F, 0x80);
12094   ins_encode(Jcc(cop, labl));
12095   ins_pipe(pipe_jcc);
12096   ins_pc_relative(1);
12097 %}
12098 
12099 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12100   match(If cop cmp);
12101   effect(USE labl);
12102 
12103   ins_cost(200);
12104   format %{ "j$cop,u  $labl" %}
12105   size(6);
12106   opcode(0x0F, 0x80);
12107   ins_encode(Jcc(cop, labl));
12108   ins_pipe(pipe_jcc);
12109   ins_pc_relative(1);
12110 %}
12111 
12112 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12113   match(If cop cmp);
12114   effect(USE labl);
12115 
12116   ins_cost(200);
12117   format %{ $$template
12118     if ($cop$$cmpcode == Assembler::notEqual) {
12119       $$emit$$"jp,u   $labl\n\t"
12120       $$emit$$"j$cop,u   $labl"
12121     } else {
12122       $$emit$$"jp,u   done\n\t"
12123       $$emit$$"j$cop,u   $labl\n\t"
12124       $$emit$$"done:"
12125     }
12126   %}
12127   size(12);
12128   opcode(0x0F, 0x80);
12129   ins_encode %{
12130     Label* l = $labl$$label;
12131     assert(l != NULL, "need Label");
12132     $$$emit8$primary;
12133     emit_cc(cbuf, $secondary, Assembler::parity);
12134     int parity_disp = -1;
12135     if ($cop$$cmpcode == Assembler::notEqual) {
12136        // the two jumps 6 bytes apart so the jump distances are too
12137        parity_disp = l->loc_pos() - (cbuf.insts_size() + 4);
12138     } else if ($cop$$cmpcode == Assembler::equal) {
12139        parity_disp = 6;
12140     } else {
12141        ShouldNotReachHere();
12142     }
12143     emit_d32(cbuf, parity_disp);
12144     $$$emit8$primary;
12145     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12146     int disp = l->loc_pos() - (cbuf.insts_size() + 4);
12147     emit_d32(cbuf, disp);
12148   %}
12149   ins_pipe(pipe_jcc);
12150   ins_pc_relative(1);
12151 %}
12152 
12153 // ============================================================================
12154 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12155 // superklass array for an instance of the superklass.  Set a hidden
12156 // internal cache on a hit (cache is checked with exposed code in
12157 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12158 // encoding ALSO sets flags.
12159 
12160 instruct partialSubtypeCheck(rdi_RegP result,
12161                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12162                              rFlagsReg cr)
12163 %{
12164   match(Set result (PartialSubtypeCheck sub super));
12165   effect(KILL rcx, KILL cr);
12166 
12167   ins_cost(1100);  // slightly larger than the next version
12168   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12169             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12170             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12171             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12172             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12173             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12174             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12175     "miss:\t" %}
12176 
12177   opcode(0x1); // Force a XOR of RDI
12178   ins_encode(enc_PartialSubtypeCheck());
12179   ins_pipe(pipe_slow);
12180 %}
12181 
12182 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12183                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12184                                      immP0 zero,
12185                                      rdi_RegP result)
12186 %{
12187   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12188   effect(KILL rcx, KILL result);
12189 
12190   ins_cost(1000);
12191   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12192             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12193             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12194             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12195             "jne,s   miss\t\t# Missed: flags nz\n\t"
12196             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12197     "miss:\t" %}
12198 
12199   opcode(0x0); // No need to XOR RDI
12200   ins_encode(enc_PartialSubtypeCheck());
12201   ins_pipe(pipe_slow);
12202 %}
12203 
12204 // ============================================================================
12205 // Branch Instructions -- short offset versions
12206 //
12207 // These instructions are used to replace jumps of a long offset (the default
12208 // match) with jumps of a shorter offset.  These instructions are all tagged
12209 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12210 // match rules in general matching.  Instead, the ADLC generates a conversion
12211 // method in the MachNode which can be used to do in-place replacement of the
12212 // long variant with the shorter variant.  The compiler will determine if a
12213 // branch can be taken by the is_short_branch_offset() predicate in the machine
12214 // specific code section of the file.
12215 
12216 // Jump Direct - Label defines a relative address from JMP+1
12217 instruct jmpDir_short(label labl) %{
12218   match(Goto);
12219   effect(USE labl);
12220 
12221   ins_cost(300);
12222   format %{ "jmp,s   $labl" %}
12223   size(2);
12224   opcode(0xEB);
12225   ins_encode(OpcP, LblShort(labl));
12226   ins_pipe(pipe_jmp);
12227   ins_pc_relative(1);
12228   ins_short_branch(1);
12229 %}
12230 
12231 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12232 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12233   match(If cop cr);
12234   effect(USE labl);
12235 
12236   ins_cost(300);
12237   format %{ "j$cop,s   $labl" %}
12238   size(2);
12239   opcode(0x70);
12240   ins_encode(JccShort(cop, labl));
12241   ins_pipe(pipe_jcc);
12242   ins_pc_relative(1);
12243   ins_short_branch(1);
12244 %}
12245 
12246 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12247 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12248   match(CountedLoopEnd cop cr);
12249   effect(USE labl);
12250 
12251   ins_cost(300);
12252   format %{ "j$cop,s   $labl\t# loop end" %}
12253   size(2);
12254   opcode(0x70);
12255   ins_encode(JccShort(cop, labl));
12256   ins_pipe(pipe_jcc);
12257   ins_pc_relative(1);
12258   ins_short_branch(1);
12259 %}
12260 
12261 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12262 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12263   match(CountedLoopEnd cop cmp);
12264   effect(USE labl);
12265 
12266   ins_cost(300);
12267   format %{ "j$cop,us  $labl\t# loop end" %}
12268   size(2);
12269   opcode(0x70);
12270   ins_encode(JccShort(cop, labl));
12271   ins_pipe(pipe_jcc);
12272   ins_pc_relative(1);
12273   ins_short_branch(1);
12274 %}
12275 
12276 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12277   match(CountedLoopEnd cop cmp);
12278   effect(USE labl);
12279 
12280   ins_cost(300);
12281   format %{ "j$cop,us  $labl\t# loop end" %}
12282   size(2);
12283   opcode(0x70);
12284   ins_encode(JccShort(cop, labl));
12285   ins_pipe(pipe_jcc);
12286   ins_pc_relative(1);
12287   ins_short_branch(1);
12288 %}
12289 
12290 // Jump Direct Conditional - using unsigned comparison
12291 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12292   match(If cop cmp);
12293   effect(USE labl);
12294 
12295   ins_cost(300);
12296   format %{ "j$cop,us  $labl" %}
12297   size(2);
12298   opcode(0x70);
12299   ins_encode(JccShort(cop, labl));
12300   ins_pipe(pipe_jcc);
12301   ins_pc_relative(1);
12302   ins_short_branch(1);
12303 %}
12304 
12305 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12306   match(If cop cmp);
12307   effect(USE labl);
12308 
12309   ins_cost(300);
12310   format %{ "j$cop,us  $labl" %}
12311   size(2);
12312   opcode(0x70);
12313   ins_encode(JccShort(cop, labl));
12314   ins_pipe(pipe_jcc);
12315   ins_pc_relative(1);
12316   ins_short_branch(1);
12317 %}
12318 
12319 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12320   match(If cop cmp);
12321   effect(USE labl);
12322 
12323   ins_cost(300);
12324   format %{ $$template
12325     if ($cop$$cmpcode == Assembler::notEqual) {
12326       $$emit$$"jp,u,s   $labl\n\t"
12327       $$emit$$"j$cop,u,s   $labl"
12328     } else {
12329       $$emit$$"jp,u,s   done\n\t"
12330       $$emit$$"j$cop,u,s  $labl\n\t"
12331       $$emit$$"done:"
12332     }
12333   %}
12334   size(4);
12335   opcode(0x70);
12336   ins_encode %{
12337     Label* l = $labl$$label;
12338     assert(l != NULL, "need Label");
12339     emit_cc(cbuf, $primary, Assembler::parity);
12340     int parity_disp = -1;
12341     if ($cop$$cmpcode == Assembler::notEqual) {
12342       parity_disp = l->loc_pos() - (cbuf.insts_size() + 1);
12343     } else if ($cop$$cmpcode == Assembler::equal) {
12344       parity_disp = 2;
12345     } else {
12346       ShouldNotReachHere();
12347     }
12348     emit_d8(cbuf, parity_disp);
12349     emit_cc(cbuf, $primary, $cop$$cmpcode);
12350     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
12351     emit_d8(cbuf, disp);
12352     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12353     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12354   %}
12355   ins_pipe(pipe_jcc);
12356   ins_pc_relative(1);
12357   ins_short_branch(1);
12358 %}
12359 
12360 // ============================================================================
12361 // inlined locking and unlocking
12362 
12363 instruct cmpFastLock(rFlagsReg cr,
12364                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12365 %{
12366   match(Set cr (FastLock object box));
12367   effect(TEMP tmp, TEMP scr);
12368 
12369   ins_cost(300);
12370   format %{ "fastlock $object,$box,$tmp,$scr" %}
12371   ins_encode(Fast_Lock(object, box, tmp, scr));
12372   ins_pipe(pipe_slow);
12373   ins_pc_relative(1);
12374 %}
12375 
12376 instruct cmpFastUnlock(rFlagsReg cr,
12377                        rRegP object, rax_RegP box, rRegP tmp)
12378 %{
12379   match(Set cr (FastUnlock object box));
12380   effect(TEMP tmp);
12381 
12382   ins_cost(300);
12383   format %{ "fastunlock $object, $box, $tmp" %}
12384   ins_encode(Fast_Unlock(object, box, tmp));
12385   ins_pipe(pipe_slow);
12386   ins_pc_relative(1);
12387 %}
12388 
12389 
12390 // ============================================================================
12391 // Safepoint Instructions
12392 instruct safePoint_poll(rFlagsReg cr)
12393 %{
12394   predicate(!Assembler::is_polling_page_far());
12395   match(SafePoint);
12396   effect(KILL cr);
12397 
12398   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12399             "# Safepoint: poll for GC" %}
12400   ins_cost(125);
12401   ins_encode %{
12402     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12403     __ testl(rax, addr);
12404   %}
12405   ins_pipe(ialu_reg_mem);
12406 %}
12407 
12408 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12409 %{
12410   predicate(Assembler::is_polling_page_far());
12411   match(SafePoint poll);
12412   effect(KILL cr, USE poll);
12413 
12414   format %{ "testl  rax, [$poll]\t"
12415             "# Safepoint: poll for GC" %}
12416   ins_cost(125);
12417   ins_encode %{
12418     __ relocate(relocInfo::poll_type);
12419     __ testl(rax, Address($poll$$Register, 0));
12420   %}
12421   ins_pipe(ialu_reg_mem);
12422 %}
12423 
12424 // ============================================================================
12425 // Procedure Call/Return Instructions
12426 // Call Java Static Instruction
12427 // Note: If this code changes, the corresponding ret_addr_offset() and
12428 //       compute_padding() functions will have to be adjusted.
12429 instruct CallStaticJavaDirect(method meth) %{
12430   match(CallStaticJava);
12431   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12432   effect(USE meth);
12433 
12434   ins_cost(300);
12435   format %{ "call,static " %}
12436   opcode(0xE8); /* E8 cd */
12437   ins_encode(Java_Static_Call(meth), call_epilog);
12438   ins_pipe(pipe_slow);
12439   ins_pc_relative(1);
12440   ins_alignment(4);
12441 %}
12442 
12443 // Call Java Static Instruction (method handle version)
12444 // Note: If this code changes, the corresponding ret_addr_offset() and
12445 //       compute_padding() functions will have to be adjusted.
12446 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12447   match(CallStaticJava);
12448   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12449   effect(USE meth);
12450   // RBP is saved by all callees (for interpreter stack correction).
12451   // We use it here for a similar purpose, in {preserve,restore}_SP.
12452 
12453   ins_cost(300);
12454   format %{ "call,static/MethodHandle " %}
12455   opcode(0xE8); /* E8 cd */
12456   ins_encode(preserve_SP,
12457              Java_Static_Call(meth),
12458              restore_SP,
12459              call_epilog);
12460   ins_pipe(pipe_slow);
12461   ins_pc_relative(1);
12462   ins_alignment(4);
12463 %}
12464 
12465 // Call Java Dynamic Instruction
12466 // Note: If this code changes, the corresponding ret_addr_offset() and
12467 //       compute_padding() functions will have to be adjusted.
12468 instruct CallDynamicJavaDirect(method meth)
12469 %{
12470   match(CallDynamicJava);
12471   effect(USE meth);
12472 
12473   ins_cost(300);
12474   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12475             "call,dynamic " %}
12476   opcode(0xE8); /* E8 cd */
12477   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12478   ins_pipe(pipe_slow);
12479   ins_pc_relative(1);
12480   ins_alignment(4);
12481 %}
12482 
12483 // Call Runtime Instruction
12484 instruct CallRuntimeDirect(method meth)
12485 %{
12486   match(CallRuntime);
12487   effect(USE meth);
12488 
12489   ins_cost(300);
12490   format %{ "call,runtime " %}
12491   opcode(0xE8); /* E8 cd */
12492   ins_encode(Java_To_Runtime(meth));
12493   ins_pipe(pipe_slow);
12494   ins_pc_relative(1);
12495 %}
12496 
12497 // Call runtime without safepoint
12498 instruct CallLeafDirect(method meth)
12499 %{
12500   match(CallLeaf);
12501   effect(USE meth);
12502 
12503   ins_cost(300);
12504   format %{ "call_leaf,runtime " %}
12505   opcode(0xE8); /* E8 cd */
12506   ins_encode(Java_To_Runtime(meth));
12507   ins_pipe(pipe_slow);
12508   ins_pc_relative(1);
12509 %}
12510 
12511 // Call runtime without safepoint
12512 instruct CallLeafNoFPDirect(method meth)
12513 %{
12514   match(CallLeafNoFP);
12515   effect(USE meth);
12516 
12517   ins_cost(300);
12518   format %{ "call_leaf_nofp,runtime " %}
12519   opcode(0xE8); /* E8 cd */
12520   ins_encode(Java_To_Runtime(meth));
12521   ins_pipe(pipe_slow);
12522   ins_pc_relative(1);
12523 %}
12524 
12525 // Return Instruction
12526 // Remove the return address & jump to it.
12527 // Notice: We always emit a nop after a ret to make sure there is room
12528 // for safepoint patching
12529 instruct Ret()
12530 %{
12531   match(Return);
12532 
12533   format %{ "ret" %}
12534   opcode(0xC3);
12535   ins_encode(OpcP);
12536   ins_pipe(pipe_jmp);
12537 %}
12538 
12539 // Tail Call; Jump from runtime stub to Java code.
12540 // Also known as an 'interprocedural jump'.
12541 // Target of jump will eventually return to caller.
12542 // TailJump below removes the return address.
12543 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12544 %{
12545   match(TailCall jump_target method_oop);
12546 
12547   ins_cost(300);
12548   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12549   opcode(0xFF, 0x4); /* Opcode FF /4 */
12550   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12551   ins_pipe(pipe_jmp);
12552 %}
12553 
12554 // Tail Jump; remove the return address; jump to target.
12555 // TailCall above leaves the return address around.
12556 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12557 %{
12558   match(TailJump jump_target ex_oop);
12559 
12560   ins_cost(300);
12561   format %{ "popq    rdx\t# pop return address\n\t"
12562             "jmp     $jump_target" %}
12563   opcode(0xFF, 0x4); /* Opcode FF /4 */
12564   ins_encode(Opcode(0x5a), // popq rdx
12565              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12566   ins_pipe(pipe_jmp);
12567 %}
12568 
12569 // Create exception oop: created by stack-crawling runtime code.
12570 // Created exception is now available to this handler, and is setup
12571 // just prior to jumping to this handler.  No code emitted.
12572 instruct CreateException(rax_RegP ex_oop)
12573 %{
12574   match(Set ex_oop (CreateEx));
12575 
12576   size(0);
12577   // use the following format syntax
12578   format %{ "# exception oop is in rax; no code emitted" %}
12579   ins_encode();
12580   ins_pipe(empty);
12581 %}
12582 
12583 // Rethrow exception:
12584 // The exception oop will come in the first argument position.
12585 // Then JUMP (not call) to the rethrow stub code.
12586 instruct RethrowException()
12587 %{
12588   match(Rethrow);
12589 
12590   // use the following format syntax
12591   format %{ "jmp     rethrow_stub" %}
12592   ins_encode(enc_rethrow);
12593   ins_pipe(pipe_jmp);
12594 %}
12595 
12596 
12597 //----------PEEPHOLE RULES-----------------------------------------------------
12598 // These must follow all instruction definitions as they use the names
12599 // defined in the instructions definitions.
12600 //
12601 // peepmatch ( root_instr_name [preceding_instruction]* );
12602 //
12603 // peepconstraint %{
12604 // (instruction_number.operand_name relational_op instruction_number.operand_name
12605 //  [, ...] );
12606 // // instruction numbers are zero-based using left to right order in peepmatch
12607 //
12608 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12609 // // provide an instruction_number.operand_name for each operand that appears
12610 // // in the replacement instruction's match rule
12611 //
12612 // ---------VM FLAGS---------------------------------------------------------
12613 //
12614 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12615 //
12616 // Each peephole rule is given an identifying number starting with zero and
12617 // increasing by one in the order seen by the parser.  An individual peephole
12618 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12619 // on the command-line.
12620 //
12621 // ---------CURRENT LIMITATIONS----------------------------------------------
12622 //
12623 // Only match adjacent instructions in same basic block
12624 // Only equality constraints
12625 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12626 // Only one replacement instruction
12627 //
12628 // ---------EXAMPLE----------------------------------------------------------
12629 //
12630 // // pertinent parts of existing instructions in architecture description
12631 // instruct movI(rRegI dst, rRegI src)
12632 // %{
12633 //   match(Set dst (CopyI src));
12634 // %}
12635 //
12636 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12637 // %{
12638 //   match(Set dst (AddI dst src));
12639 //   effect(KILL cr);
12640 // %}
12641 //
12642 // // Change (inc mov) to lea
12643 // peephole %{
12644 //   // increment preceeded by register-register move
12645 //   peepmatch ( incI_rReg movI );
12646 //   // require that the destination register of the increment
12647 //   // match the destination register of the move
12648 //   peepconstraint ( 0.dst == 1.dst );
12649 //   // construct a replacement instruction that sets
12650 //   // the destination to ( move's source register + one )
12651 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12652 // %}
12653 //
12654 
12655 // Implementation no longer uses movX instructions since
12656 // machine-independent system no longer uses CopyX nodes.
12657 //
12658 // peephole
12659 // %{
12660 //   peepmatch (incI_rReg movI);
12661 //   peepconstraint (0.dst == 1.dst);
12662 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12663 // %}
12664 
12665 // peephole
12666 // %{
12667 //   peepmatch (decI_rReg movI);
12668 //   peepconstraint (0.dst == 1.dst);
12669 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12670 // %}
12671 
12672 // peephole
12673 // %{
12674 //   peepmatch (addI_rReg_imm movI);
12675 //   peepconstraint (0.dst == 1.dst);
12676 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12677 // %}
12678 
12679 // peephole
12680 // %{
12681 //   peepmatch (incL_rReg movL);
12682 //   peepconstraint (0.dst == 1.dst);
12683 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12684 // %}
12685 
12686 // peephole
12687 // %{
12688 //   peepmatch (decL_rReg movL);
12689 //   peepconstraint (0.dst == 1.dst);
12690 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12691 // %}
12692 
12693 // peephole
12694 // %{
12695 //   peepmatch (addL_rReg_imm movL);
12696 //   peepconstraint (0.dst == 1.dst);
12697 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12698 // %}
12699 
12700 // peephole
12701 // %{
12702 //   peepmatch (addP_rReg_imm movP);
12703 //   peepconstraint (0.dst == 1.dst);
12704 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12705 // %}
12706 
12707 // // Change load of spilled value to only a spill
12708 // instruct storeI(memory mem, rRegI src)
12709 // %{
12710 //   match(Set mem (StoreI mem src));
12711 // %}
12712 //
12713 // instruct loadI(rRegI dst, memory mem)
12714 // %{
12715 //   match(Set dst (LoadI mem));
12716 // %}
12717 //
12718 
12719 peephole
12720 %{
12721   peepmatch (loadI storeI);
12722   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12723   peepreplace (storeI(1.mem 1.mem 1.src));
12724 %}
12725 
12726 peephole
12727 %{
12728   peepmatch (loadL storeL);
12729   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12730   peepreplace (storeL(1.mem 1.mem 1.src));
12731 %}
12732 
12733 //----------SMARTSPILL RULES---------------------------------------------------
12734 // These must follow all instruction definitions as they use the names
12735 // defined in the instructions definitions.